mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-13 05:28:49 +00:00
Move PCRE2 to pcre2
This commit is contained in:
parent
1e234f492c
commit
f4f9ed56ee
113 changed files with 157381 additions and 0 deletions
15
pcre2/.gitignore
vendored
Normal file
15
pcre2/.gitignore
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
!config.h.in
|
||||
!configure
|
||||
!doc/
|
||||
.deps
|
||||
.dirstamp
|
||||
.libs/
|
||||
*.la
|
||||
*.lo
|
||||
*.pc
|
||||
libtool
|
||||
pcre2_chartables.c
|
||||
pcre2-config
|
||||
pcre2test
|
||||
pcre2.h
|
||||
stamp-h1
|
313
pcre2/132html
Executable file
313
pcre2/132html
Executable file
|
@ -0,0 +1,313 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to turn PCRE2 man pages into HTML
|
||||
|
||||
|
||||
# Subroutine to handle font changes and other escapes
|
||||
|
||||
sub do_line {
|
||||
my($s) = $_[0];
|
||||
|
||||
$s =~ s/</</g; # Deal with < and >
|
||||
$s =~ s/>/>/g;
|
||||
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
|
||||
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
|
||||
$s =~ s"\\e"\\"g;
|
||||
$s =~ s/(?<=Copyright )\(c\)/©/g;
|
||||
$s;
|
||||
}
|
||||
|
||||
# Subroutine to ensure not in a paragraph
|
||||
|
||||
sub end_para {
|
||||
if ($inpara)
|
||||
{
|
||||
print TEMP "</PRE>\n" if ($inpre);
|
||||
print TEMP "</P>\n";
|
||||
}
|
||||
$inpara = $inpre = 0;
|
||||
$wrotetext = 0;
|
||||
}
|
||||
|
||||
# Subroutine to start a new paragraph
|
||||
|
||||
sub new_para {
|
||||
&end_para();
|
||||
print TEMP "<P>\n";
|
||||
$inpara = 1;
|
||||
}
|
||||
|
||||
|
||||
# Main program
|
||||
|
||||
$innf = 0;
|
||||
$inpara = 0;
|
||||
$inpre = 0;
|
||||
$wrotetext = 0;
|
||||
$toc = 0;
|
||||
$ref = 1;
|
||||
|
||||
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
|
||||
{
|
||||
$toc = 1 if $ARGV[0] eq "-toc";
|
||||
shift;
|
||||
}
|
||||
|
||||
# Initial output to STDOUT
|
||||
|
||||
print <<End ;
|
||||
<html>
|
||||
<head>
|
||||
<title>$ARGV[0] specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>$ARGV[0] man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
End
|
||||
|
||||
print "<ul>\n" if ($toc);
|
||||
|
||||
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Handle lines beginning with a dot
|
||||
|
||||
if (/^\./)
|
||||
{
|
||||
# Some of the PCRE2 man pages used to contain instances of .br. However,
|
||||
# they should have all been removed because they cause trouble in some
|
||||
# (other) automated systems that translate man pages to HTML. Complain if
|
||||
# we find .br or .in (another macro that is deprecated).
|
||||
|
||||
if (/^\.br/ || /^\.in/)
|
||||
{
|
||||
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
|
||||
print STDERR "*** $_\n";
|
||||
die "*** Processing abandoned\n";
|
||||
}
|
||||
|
||||
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
|
||||
|
||||
elsif (/^\.nf/)
|
||||
{
|
||||
$innf = 1;
|
||||
}
|
||||
|
||||
elsif (/^\.fi/)
|
||||
{
|
||||
$innf = 0;
|
||||
}
|
||||
|
||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||
# the next line is a non literal text line; similarly, if not inside a
|
||||
# literal section, do nothing if a literal follows, unless we are inside
|
||||
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
|
||||
# literal sections will do the spacing. Always skip if no previous output.
|
||||
|
||||
elsif (/^\.sp/)
|
||||
{
|
||||
if ($wrotetext)
|
||||
{
|
||||
$_ = <STDIN>;
|
||||
if ($inpre)
|
||||
{
|
||||
print TEMP "\n" if (/^[\s.]/);
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
|
||||
}
|
||||
redo; # Now process the lookahead line we just read
|
||||
}
|
||||
}
|
||||
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
|
||||
{
|
||||
&new_para();
|
||||
}
|
||||
elsif (/^\.SH\s*("?)(.*)\1/)
|
||||
{
|
||||
# Ignore the NAME section
|
||||
if ($2 =~ /^NAME\b/)
|
||||
{
|
||||
<STDIN>;
|
||||
next;
|
||||
}
|
||||
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
if ($toc)
|
||||
{
|
||||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||
$ref, $ref);
|
||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||
$ref);
|
||||
$ref++;
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
}
|
||||
elsif (/^\.SS\s*("?)(.*)\1/)
|
||||
{
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
elsif (/^\.B\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<b>$_</b>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
elsif (/^\.I\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<i>$_</i>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# A comment that starts "HREF" takes the next line as a name that
|
||||
# is turned into a hyperlink, using the text given, which might be
|
||||
# in a special font. If it ends in () or (digits) or punctuation, they
|
||||
# aren't part of the link.
|
||||
|
||||
elsif (/^\.\\"\s*HREF/)
|
||||
{
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
|
||||
print TEMP "<a href=\"$1.html\">$_</a>\n";
|
||||
}
|
||||
|
||||
# A comment that starts "HTML" inserts literal HTML
|
||||
|
||||
elsif (/^\.\\"\s*HTML\s*(.*)/)
|
||||
{
|
||||
print TEMP $1;
|
||||
}
|
||||
|
||||
# A comment that starts < inserts that HTML at the end of the
|
||||
# *next* input line - so as not to get a newline between them.
|
||||
|
||||
elsif (/^\.\\"\s*(<.*>)/)
|
||||
{
|
||||
my($markup) = $1;
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
print TEMP "$_$markup\n";
|
||||
}
|
||||
|
||||
# A comment that starts JOIN joins the next two lines together, with one
|
||||
# space between them. Then that line is processed. This is used in some
|
||||
# displays where two lines are needed for the "man" version. JOINSH works
|
||||
# the same, except that it assumes this is a shell command, so removes
|
||||
# continuation backslashes.
|
||||
|
||||
elsif (/^\.\\"\s*JOIN(SH)?/)
|
||||
{
|
||||
my($one,$two);
|
||||
$one = <STDIN>;
|
||||
$two = <STDIN>;
|
||||
$one =~ s/\s*\\e\s*$// if (defined($1));
|
||||
chomp($one);
|
||||
$two =~ s/^\s+//;
|
||||
$_ = "$one $two";
|
||||
redo; # Process the joined lines
|
||||
}
|
||||
|
||||
# .EX/.EE are used in the pcre2demo page to bracket the entire program,
|
||||
# which is unmodified except for turning backslash into "\e".
|
||||
|
||||
elsif (/^\.EX\s*$/)
|
||||
{
|
||||
print TEMP "<PRE>\n";
|
||||
while (<STDIN>)
|
||||
{
|
||||
last if /^\.EE\s*$/;
|
||||
s/\\e/\\/g;
|
||||
s/&/&/g;
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
print TEMP;
|
||||
}
|
||||
}
|
||||
|
||||
# Ignore anything not recognized
|
||||
|
||||
next;
|
||||
}
|
||||
|
||||
# Line does not begin with a dot. Replace blank lines with new paragraphs
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
&end_para() if ($wrotetext);
|
||||
next;
|
||||
}
|
||||
|
||||
# Convert fonts changes and output an ordinary line. Ensure that indented
|
||||
# lines are marked as literal.
|
||||
|
||||
$_ = &do_line($_);
|
||||
&new_para() if (!$inpara);
|
||||
|
||||
if (/^\s/)
|
||||
{
|
||||
if (!$inpre)
|
||||
{
|
||||
print TEMP "<pre>\n";
|
||||
$inpre = 1;
|
||||
}
|
||||
}
|
||||
elsif ($inpre)
|
||||
{
|
||||
print TEMP "</pre>\n";
|
||||
$inpre = 0;
|
||||
}
|
||||
|
||||
# Add <br> to the end of a non-literal line if we are within .nf/.fi
|
||||
|
||||
$_ .= "<br>\n" if (!$inpre && $innf);
|
||||
|
||||
print TEMP;
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# The TOC, if present, will have been written - terminate it
|
||||
|
||||
print "</ul>\n" if ($toc);
|
||||
|
||||
# Copy the remainder to the standard output
|
||||
|
||||
close(TEMP);
|
||||
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
|
||||
|
||||
print while (<TEMP>);
|
||||
|
||||
print <<End ;
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
End
|
||||
|
||||
close(TEMP);
|
||||
unlink("/tmp/$$");
|
||||
|
||||
# End
|
36
pcre2/AUTHORS
Normal file
36
pcre2/AUTHORS
Normal file
|
@ -0,0 +1,36 @@
|
|||
THE MAIN PCRE2 LIBRARY CODE
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
780
pcre2/CMakeLists.txt
Normal file
780
pcre2/CMakeLists.txt
Normal file
|
@ -0,0 +1,780 @@
|
|||
# CMakeLists.txt
|
||||
#
|
||||
#
|
||||
# This file enables PCRE2 to be built with the CMake configuration and build
|
||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||
# Converted to support PCRE2 from the original PCRE file, August 2014.
|
||||
#
|
||||
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
|
||||
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
|
||||
# 2007-09-19 Adjusted by PH to retain previous default settings
|
||||
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
|
||||
# (b) Ensure pcretest and pcregrep link with the local library,
|
||||
# not a previously-installed one.
|
||||
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
|
||||
# PCRE_SUPPORT_LIBBZ2.
|
||||
# 2008-01-20 Brought up to date to include several new features by Christian
|
||||
# Ehrlicher.
|
||||
# 2008-01-22 Sheri added options for backward compatibility of library names
|
||||
# when building with minGW:
|
||||
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
|
||||
# be built without "lib" as prefix. (The libraries will be named
|
||||
# pcre.dll, pcreposix.dll and pcrecpp.dll).
|
||||
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
|
||||
# be built with suffix of "-0.dll". (The libraries will be named
|
||||
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
|
||||
# built by default with Configure and Make.
|
||||
# 2008-01-23 PH removed the automatic build of pcredemo.
|
||||
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
|
||||
# 2008-07-03 PH updated for revised UCP property support (change of files)
|
||||
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
|
||||
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
|
||||
# is included within another project.
|
||||
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
|
||||
# add options to stop the building of pcregrep and the tests, and
|
||||
# to disable the final configuration report.
|
||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||
# are set by specifying a release type.
|
||||
# 2010-01-02 PH added test for stdint.h
|
||||
# 2010-03-02 PH added test for inttypes.h
|
||||
# 2011-08-01 PH added PCREGREP_BUFSIZE
|
||||
# 2011-08-22 PH added PCRE_SUPPORT_JIT
|
||||
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
|
||||
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
|
||||
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
|
||||
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
|
||||
# the source dir by the user prior to building
|
||||
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
|
||||
# of DEBUG location only (likely only matters in MSVC)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
|
||||
# RunGrepTest (used for UNIX and Msys)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
|
||||
# RunTest.bat in Win32 (for effortless testing with "make test")
|
||||
# 2011-10-04 Sheri Increased minimum required cmake version
|
||||
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
|
||||
# 2012-01-10 Zoltan Herczeg added libpcre16 support
|
||||
# 2012-01-13 Stephen Kelly added out of source build support
|
||||
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
|
||||
# of the configure.ac file
|
||||
# 2012-02-26 PH added support for libedit
|
||||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||
# 2012-09-08 ChPe added PCRE32 support
|
||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||
# so it has been removed.
|
||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
||||
# 2015-10=16 PH added support for never-backslash-C
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
# Increased minimum to 2.8.0 to support newer add_test features. Set policy
|
||||
# CMP0026 to avoid warnings for the use of LOCATION in GET_TARGET_PROPERTY.
|
||||
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||
CMAKE_POLICY(SET CMP0026 OLD)
|
||||
|
||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
|
||||
|
||||
# external packages
|
||||
FIND_PACKAGE( BZip2 )
|
||||
FIND_PACKAGE( ZLIB )
|
||||
FIND_PACKAGE( Readline )
|
||||
FIND_PACKAGE( Editline )
|
||||
|
||||
# Configuration checks
|
||||
|
||||
INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckFunctionExists)
|
||||
INCLUDE(CheckTypeSize)
|
||||
|
||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
||||
CHECK_INCLUDE_FILE(inttypes.h HAVE_INTTYPES_H)
|
||||
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
|
||||
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
||||
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
||||
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
|
||||
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
|
||||
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
# Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||
# the order we use here.
|
||||
|
||||
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
|
||||
"Build shared libraries instead of static ones.")
|
||||
|
||||
OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
|
||||
|
||||
OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
|
||||
|
||||
OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
|
||||
|
||||
OPTION(PCRE2_DEBUG "Include debugging code" OFF)
|
||||
|
||||
SET(PCRE2_EBCDIC OFF CACHE BOOL
|
||||
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
|
||||
|
||||
SET(PCRE2_EBCDIC_NL25 OFF CACHE BOOL
|
||||
"Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
|
||||
|
||||
SET(PCRE2_LINK_SIZE "2" CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
|
||||
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
||||
|
||||
SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING
|
||||
"Buffer size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_NEWLINE "LF" CACHE STRING
|
||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
||||
|
||||
SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
|
||||
"If ON, then don't use stack recursion when matching. See HEAP_MATCH_RECURSE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
|
||||
"Enable support for Just-in-time compiling.")
|
||||
|
||||
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
|
||||
"Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
||||
SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
|
||||
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||
|
||||
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||
|
||||
SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL
|
||||
"If ON, backslash-C (upper case C) is locked out.")
|
||||
|
||||
SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||
"Enable Valgrind support.")
|
||||
|
||||
OPTION(PCRE2_SHOW_REPORT "Show the final configuration report" ON)
|
||||
OPTION(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON)
|
||||
OPTION(PCRE2_BUILD_TESTS "Build the tests" ON)
|
||||
|
||||
IF (MINGW)
|
||||
OPTION(NON_STANDARD_LIB_PREFIX
|
||||
"ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF)
|
||||
|
||||
OPTION(NON_STANDARD_LIB_SUFFIX
|
||||
"ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF)
|
||||
ENDIF(MINGW)
|
||||
|
||||
IF(MSVC)
|
||||
OPTION(INSTALL_MSVC_PDB
|
||||
"ON=Install .pdb files built by MSVC, if generated"
|
||||
OFF)
|
||||
ENDIF(MSVC)
|
||||
|
||||
# bzip2 lib
|
||||
IF(BZIP2_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON)
|
||||
ENDIF(BZIP2_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBBZ2)
|
||||
INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBBZ2)
|
||||
|
||||
# zlib
|
||||
IF(ZLIB_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON)
|
||||
ENDIF(ZLIB_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBZ)
|
||||
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBZ)
|
||||
|
||||
# editline lib
|
||||
IF(EDITLINE_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF)
|
||||
ENDIF(EDITLINE_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBEDIT)
|
||||
INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBEDIT)
|
||||
|
||||
# readline lib
|
||||
IF(READLINE_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON)
|
||||
ENDIF(READLINE_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
|
||||
# Prepare build configuration
|
||||
|
||||
IF(NOT BUILD_SHARED_LIBS)
|
||||
SET(PCRE2_STATIC 1)
|
||||
ENDIF(NOT BUILD_SHARED_LIBS)
|
||||
|
||||
IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||
MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled")
|
||||
ENDIF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
SET(SUPPORT_PCRE2_8 1)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
SET(SUPPORT_PCRE2_16 1)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
SET(SUPPORT_PCRE2_32 1)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||
MESSAGE(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program")
|
||||
SET(PCRE2_BUILD_PCRE2GREP OFF)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||
MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
|
||||
ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
SET(BSR_ANYCRLF 1)
|
||||
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
|
||||
IF(PCRE2_NEVER_BACKSLASH_C)
|
||||
SET(NEVER_BACKSLASH_C 1)
|
||||
ENDIF(PCRE2_NEVER_BACKSLASH_C)
|
||||
|
||||
IF(PCRE2_SUPPORT_UNICODE)
|
||||
SET(SUPPORT_UNICODE 1)
|
||||
ENDIF(PCRE2_SUPPORT_UNICODE)
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
SET(SUPPORT_JIT 1)
|
||||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_PCRE2GREP_JIT)
|
||||
SET(SUPPORT_PCRE2GREP_JIT 1)
|
||||
ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_VALGRIND)
|
||||
SET(SUPPORT_VALGRIND 1)
|
||||
ENDIF(PCRE2_SUPPORT_VALGRIND)
|
||||
|
||||
# This next one used to reference ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
# some modifications to cmake/FindReadline.cmake which should
|
||||
# make it possible to override the default if necessary. PH
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
SET(SUPPORT_LIBREADLINE 1)
|
||||
SET(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
ENDIF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
|
||||
# libedit is a plug-compatible alternative to libreadline
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBEDIT)
|
||||
SET(SUPPORT_LIBEDIT 1)
|
||||
SET(PCRE2TEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
ENDIF(PCRE2_SUPPORT_LIBEDIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBZ)
|
||||
SET(SUPPORT_LIBZ 1)
|
||||
SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES})
|
||||
ENDIF(PCRE2_SUPPORT_LIBZ)
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBBZ2)
|
||||
SET(SUPPORT_LIBBZ2 1)
|
||||
SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES})
|
||||
ENDIF(PCRE2_SUPPORT_LIBBZ2)
|
||||
|
||||
SET(NEWLINE_DEFAULT "")
|
||||
|
||||
IF(PCRE2_NEWLINE STREQUAL "CR")
|
||||
SET(NEWLINE_DEFAULT "1")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "CR")
|
||||
IF(PCRE2_NEWLINE STREQUAL "LF")
|
||||
SET(NEWLINE_DEFAULT "2")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "LF")
|
||||
IF(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||
SET(NEWLINE_DEFAULT "3")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||
IF(PCRE2_NEWLINE STREQUAL "ANY")
|
||||
SET(NEWLINE_DEFAULT "4")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "ANY")
|
||||
IF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
SET(NEWLINE_DEFAULT "5")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
|
||||
IF(NEWLINE_DEFAULT STREQUAL "")
|
||||
MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
|
||||
ENDIF(NEWLINE_DEFAULT STREQUAL "")
|
||||
|
||||
IF(PCRE2_EBCDIC)
|
||||
SET(EBCDIC 1)
|
||||
ENDIF(PCRE2_EBCDIC)
|
||||
|
||||
IF(PCRE2_EBCDIC_NL25)
|
||||
SET(EBCDIC 1)
|
||||
SET(EBCDIC_NL25 1)
|
||||
ENDIF(PCRE2_EBCDIC_NL25)
|
||||
|
||||
IF(PCRE2_HEAP_MATCH_RECURSE)
|
||||
SET(HEAP_MATCH_RECURSE 1)
|
||||
ENDIF(PCRE2_HEAP_MATCH_RECURSE)
|
||||
|
||||
# Output files
|
||||
|
||||
CONFIGURE_FILE(config-cmake.h.in
|
||||
${PROJECT_BINARY_DIR}/config.h
|
||||
@ONLY)
|
||||
|
||||
# Parse version numbers and date out of configure.ac
|
||||
|
||||
file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
|
||||
configure_lines
|
||||
LIMIT_COUNT 50 # Read only the first 50 lines of the file
|
||||
)
|
||||
|
||||
set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
|
||||
foreach(configure_line ${configure_lines})
|
||||
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||
if (NOT ${_substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||
if (CMAKE_MATCH_1)
|
||||
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
CONFIGURE_FILE(src/pcre2.h.in
|
||||
${PROJECT_BINARY_DIR}/pcre2.h
|
||||
@ONLY)
|
||||
|
||||
# What about pcre2-config and libpcre2.pc?
|
||||
|
||||
# Character table generation
|
||||
|
||||
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
IF(PCRE2_REBUILD_CHARTABLES)
|
||||
ADD_EXECUTABLE(dftables src/dftables.c)
|
||||
ADD_CUSTOM_COMMAND(
|
||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||
DEPENDS dftables
|
||||
COMMAND dftables
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
)
|
||||
ELSE(PCRE2_REBUILD_CHARTABLES)
|
||||
CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist
|
||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
COPYONLY)
|
||||
ENDIF(PCRE2_REBUILD_CHARTABLES)
|
||||
|
||||
# Source code
|
||||
|
||||
SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
|
||||
|
||||
SET(PCRE2_SOURCES
|
||||
src/pcre2_auto_possess.c
|
||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
src/pcre2_compile.c
|
||||
src/pcre2_config.c
|
||||
src/pcre2_context.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
src/pcre2_match_data.c
|
||||
src/pcre2_newline.c
|
||||
src/pcre2_ord2utf.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_serialize.c
|
||||
src/pcre2_string_utils.c
|
||||
src/pcre2_study.c
|
||||
src/pcre2_substitute.c
|
||||
src/pcre2_substring.c
|
||||
src/pcre2_tables.c
|
||||
src/pcre2_ucd.c
|
||||
src/pcre2_valid_utf.c
|
||||
src/pcre2_xclass.c
|
||||
)
|
||||
|
||||
SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
|
||||
SET(PCRE2POSIX_SOURCES src/pcre2posix.c)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
|
||||
PRE-LINK
|
||||
COMMAND windres ARGS pcre2.rc pcre2.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT Using pcre2 coff info in mingw build)
|
||||
SET(PCRE2_SOURCES
|
||||
${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o
|
||||
)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
|
||||
PRE-LINK
|
||||
COMMAND windres ARGS pcre2posix.rc pcre2posix.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT Using pcre2posix coff info in mingw build)
|
||||
SET(PCRE2POSIX_SOURCES
|
||||
${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o
|
||||
)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
||||
IF(MSVC AND NOT PCRE2_STATIC)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
SET(PCRE2_SOURCES
|
||||
${PCRE2_SOURCES} pcre2.rc)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
SET(PCRE2POSIX_SOURCES
|
||||
${PCRE2POSIX_SOURCES} pcre2posix.rc)
|
||||
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
ENDIF(MSVC AND NOT PCRE2_STATIC)
|
||||
|
||||
# Build setup
|
||||
|
||||
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
|
||||
|
||||
IF(MSVC)
|
||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
|
||||
ENDIF(MSVC)
|
||||
|
||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
# needed to make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
IF(WIN32)
|
||||
SET(CMAKE_DEBUG_POSTFIX "d")
|
||||
ENDIF(WIN32)
|
||||
|
||||
SET(targets)
|
||||
|
||||
# 8-bit library
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-8
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2-8)
|
||||
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
SET_PROPERTY(TARGET pcre2posix
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2posix)
|
||||
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES PREFIX "")
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES SUFFIX "-0.dll")
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
|
||||
# 16-bit library
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-16
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
|
||||
SET(targets ${targets} pcre2-16)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES PREFIX "")
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES SUFFIX "-0.dll")
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
|
||||
# 32-bit library
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-32
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
||||
SET(targets ${targets} pcre2-32)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES PREFIX "")
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES SUFFIX "-0.dll")
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
|
||||
# Executables
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
|
||||
SET_PROPERTY(TARGET pcre2grep
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2grep)
|
||||
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
||||
# Testing
|
||||
|
||||
IF(PCRE2_BUILD_TESTS)
|
||||
ENABLE_TESTING()
|
||||
|
||||
SET(PCRE2TEST_SOURCES src/pcre2test.c)
|
||||
|
||||
ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES})
|
||||
SET(targets ${targets} pcre2test)
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
LIST(APPEND PCRE2TEST_LIBS pcre2posix pcre2-8)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
LIST(APPEND PCRE2TEST_LIBS pcre2-16)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
LIST(APPEND PCRE2TEST_LIBS pcre2-32)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS})
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c)
|
||||
SET(targets ${targets} pcre2_jit_test)
|
||||
SET(PCRE2_JIT_TEST_LIBS )
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-16)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-32)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
|
||||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
# exes in Debug location tested by the RunTest shell script
|
||||
# via "make test"
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
||||
GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
|
||||
|
||||
# =================================================
|
||||
# Write out a CTest configuration file
|
||||
#
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||
"# This is a generated file.
|
||||
MESSAGE(\"When testing is complete, review test output in the
|
||||
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
|
||||
MESSAGE(\" \")
|
||||
")
|
||||
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
")
|
||||
|
||||
IF(UNIX)
|
||||
ADD_TEST(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
ENDIF(UNIX)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
")
|
||||
|
||||
IF(UNIX)
|
||||
ADD_TEST(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
ENDIF(UNIX)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
||||
IF(WIN32)
|
||||
# Provide environment for executing the bat file version of RunTest
|
||||
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
|
||||
FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
|
||||
FILE(TO_NATIVE_PATH ${PCRE2TEST_EXE} winexe)
|
||||
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.bat
|
||||
"\@REM This is a generated file.
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winexe}\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
call %srcdir%\\RunTest.Bat
|
||||
if errorlevel 1 exit /b 1
|
||||
echo RunTest.bat tests successfully completed
|
||||
")
|
||||
|
||||
ADD_TEST(NAME pcre2_test_bat
|
||||
COMMAND pcre2_test.bat)
|
||||
SET_TESTS_PROPERTIES(pcre2_test_bat PROPERTIES
|
||||
PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
|
||||
|
||||
IF("$ENV{OSTYPE}" STREQUAL "msys")
|
||||
# Both the sh and bat file versions of RunTest are run if make test is used
|
||||
# in msys
|
||||
ADD_TEST(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
ADD_TEST(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
ENDIF("$ENV{OSTYPE}" STREQUAL "msys")
|
||||
ENDIF(WIN32)
|
||||
|
||||
# Changed to accommodate testing whichever location was just built
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
ADD_TEST(pcre2_jit_test pcre2_jit_test)
|
||||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
ENDIF(PCRE2_BUILD_TESTS)
|
||||
|
||||
# Installation
|
||||
|
||||
SET(CMAKE_INSTALL_ALWAYS 1)
|
||||
|
||||
INSTALL(TARGETS ${targets}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
|
||||
INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
|
||||
|
||||
FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
|
||||
FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
|
||||
FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
|
||||
|
||||
FOREACH(man ${man3})
|
||||
GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
|
||||
SET(man3_new ${man3} ${man})
|
||||
ENDFOREACH(man ${man3})
|
||||
SET(man3 ${man3_new})
|
||||
|
||||
INSTALL(FILES ${man1} DESTINATION man/man1)
|
||||
INSTALL(FILES ${man3} DESTINATION man/man3)
|
||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)
|
||||
|
||||
IF(MSVC AND INSTALL_MSVC_PDB)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2posix.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS RelWithDebInfo)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2posixd.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS Debug)
|
||||
ENDIF(MSVC AND INSTALL_MSVC_PDB)
|
||||
|
||||
# Help, only for nice output
|
||||
IF(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS OFF)
|
||||
ELSE(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS ON)
|
||||
ENDIF(BUILD_SHARED_LIBS)
|
||||
|
||||
IF(PCRE2_SHOW_REPORT)
|
||||
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||
IF (CMAKE_C_FLAGS)
|
||||
SET(cfsp " ")
|
||||
ENDIF(CMAKE_C_FLAGS)
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "PCRE2 configuration summary:")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}")
|
||||
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}")
|
||||
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}")
|
||||
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
|
||||
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||
MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}")
|
||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
||||
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
|
||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
||||
MESSAGE(STATUS " Use heap recursion .............. : ${PCRE2_HEAP_MATCH_RECURSE}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE2_MATCH_LIMIT_RECURSION}")
|
||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||
MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
|
||||
MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}")
|
||||
MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
|
||||
MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}")
|
||||
MESSAGE(STATUS " and pcre2grep)")
|
||||
IF(ZLIB_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}")
|
||||
ELSE(ZLIB_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libz ........ : Library not found" )
|
||||
ENDIF(ZLIB_FOUND)
|
||||
IF(BZIP2_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : ${PCRE2_SUPPORT_LIBBZ2}")
|
||||
ELSE(BZIP2_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : Library not found" )
|
||||
ENDIF(BZIP2_FOUND)
|
||||
IF(EDITLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libeditline . : ${PCRE2_SUPPORT_LIBEDIT}")
|
||||
ELSE(EDITLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libeditline . : Library not found" )
|
||||
ENDIF(EDITLINE_FOUND)
|
||||
IF(READLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libreadline . : ${PCRE2_SUPPORT_LIBREADLINE}")
|
||||
ELSE(READLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" )
|
||||
ENDIF(READLINE_FOUND)
|
||||
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
||||
IF(MSVC)
|
||||
MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
|
||||
ENDIF(MSVC)
|
||||
|
||||
MESSAGE(STATUS "")
|
||||
ENDIF(PCRE2_SHOW_REPORT)
|
||||
|
||||
# end CMakeLists.txt
|
5
pcre2/COPYING
Normal file
5
pcre2/COPYING
Normal file
|
@ -0,0 +1,5 @@
|
|||
PCRE2 LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE2 distribution for licensing details.
|
||||
|
||||
End
|
758
pcre2/ChangeLog
Normal file
758
pcre2/ChangeLog
Normal file
|
@ -0,0 +1,758 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
||||
1. Improve matching speed of patterns starting with + or * in JIT.
|
||||
|
||||
2. Use memchr() to find the first character in an unanchored match in 8-bit
|
||||
mode in the interpreter. This gives a significant speed improvement.
|
||||
|
||||
3. Removed a redundant copy of the opcode_possessify table in the
|
||||
pcre2_auto_possessify.c source.
|
||||
|
||||
4. Fix typos in dftables.c for z/OS.
|
||||
|
||||
5. Change 36 for 10.20 broke the handling of [[:>:]] and [[:<:]] in that
|
||||
processing them could involve a buffer overflow if the following character was
|
||||
an opening parenthesis.
|
||||
|
||||
6. Change 36 for 10.20 also introduced a bug in processing this pattern:
|
||||
/((?x)(*:0))#(?'/. Specifically: if a setting of (?x) was followed by a (*MARK)
|
||||
setting (which (*:0) is), then (?x) did not get unset at the end of its group
|
||||
during the scan for named groups, and hence the external # was incorrectly
|
||||
treated as a comment and the invalid (?' at the end of the pattern was not
|
||||
diagnosed. This caused a buffer overflow during the real compile. This bug was
|
||||
discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its
|
||||
own source module to avoid a circular dependency between src/pcre2_compile.c
|
||||
and src/pcre2_study.c
|
||||
|
||||
8. A callout with a string argument containing an opening square bracket, for
|
||||
example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer
|
||||
overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
9. The handling of callouts during the pre-pass for named group identification
|
||||
has been tightened up.
|
||||
|
||||
10. The quantifier {1} can be ignored, whether greedy, non-greedy, or
|
||||
possessive. This is a very minor optimization.
|
||||
|
||||
11. A possessively repeated conditional group that could match an empty string,
|
||||
for example, /(?(R))*+/, was incorrectly compiled.
|
||||
|
||||
12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian
|
||||
Persch).
|
||||
|
||||
13. An empty comment (?#) in a pattern was incorrectly processed and could
|
||||
provoke a buffer overflow. This bug was discovered by Karl Skomski with the
|
||||
LLVM fuzzer.
|
||||
|
||||
14. Fix infinite recursion in the JIT compiler when certain patterns such as
|
||||
/(?:|a|){100}x/ are analysed.
|
||||
|
||||
15. Some patterns with character classes involving [: and \\ were incorrectly
|
||||
compiled and could cause reading from uninitialized memory or an incorrect
|
||||
error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The
|
||||
first of these bugs was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
16. Pathological patterns containing many nested occurrences of [: caused
|
||||
pcre2_compile() to run for a very long time. This bug was found by the LLVM
|
||||
fuzzer.
|
||||
|
||||
17. A missing closing parenthesis for a callout with a string argument was not
|
||||
being diagnosed, possibly leading to a buffer overflow. This bug was found by
|
||||
the LLVM fuzzer.
|
||||
|
||||
18. A conditional group with only one branch has an implicit empty alternative
|
||||
branch and must therefore be treated as potentially matching an empty string.
|
||||
|
||||
19. If (?R was followed by - or + incorrect behaviour happened instead of a
|
||||
diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
20. Another bug that was introduced by change 36 for 10.20: conditional groups
|
||||
whose condition was an assertion preceded by an explicit callout with a string
|
||||
argument might be incorrectly processed, especially if the string contained \Q.
|
||||
This bug was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
21. Compiling PCRE2 with the sanitize options of clang showed up a number of
|
||||
very pedantic coding infelicities and a buffer overflow while checking a UTF-8
|
||||
string if the final multi-byte UTF-8 character was truncated.
|
||||
|
||||
22. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
|
||||
class, where both values are literal letters in the same case, omit the
|
||||
non-letter EBCDIC code points within the range.
|
||||
|
||||
23. Finding the minimum matching length of complex patterns with back
|
||||
references and/or recursions can take a long time. There is now a cut-off that
|
||||
gives up trying to find a minimum length when things get too complex.
|
||||
|
||||
24. An optimization has been added that speeds up finding the minimum matching
|
||||
length for patterns containing repeated capturing groups or recursions.
|
||||
|
||||
25. If a pattern contained a back reference to a group whose number was
|
||||
duplicated as a result of appearing in a (?|...) group, the computation of the
|
||||
minimum matching length gave a wrong result, which could cause incorrect "no
|
||||
match" errors. For such patterns, a minimum matching length cannot at present
|
||||
be computed.
|
||||
|
||||
26. Added a check for integer overflow in conditions (?(<digits>) and
|
||||
(?(R<digits>). This omission was discovered by Karl Skomski with the LLVM
|
||||
fuzzer.
|
||||
|
||||
27. Fixed an issue when \p{Any} inside an xclass did not read the current
|
||||
character.
|
||||
|
||||
28. If pcre2grep was given the -q option with -c or -l, or when handling a
|
||||
binary file, it incorrectly wrote output to stdout.
|
||||
|
||||
29. The JIT compiler did not restore the control verb head in case of *THEN
|
||||
control verbs. This issue was found by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
30. The way recursive references such as (?3) are compiled has been re-written
|
||||
because the old way was the cause of many issues. Now, conversion of the group
|
||||
number into a pattern offset does not happen until the pattern has been
|
||||
completely compiled. This does mean that detection of all infinitely looping
|
||||
recursions is postponed till match time. In the past, some easy ones were
|
||||
detected at compile time. This re-writing was done in response to yet another
|
||||
bug found by the LLVM fuzzer.
|
||||
|
||||
31. A test for a back reference to a non-existent group was missing for items
|
||||
such as \987. This caused incorrect code to be compiled. This issue was found
|
||||
by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
32. Error messages for syntax errors following \g and \k were giving inaccurate
|
||||
offsets in the pattern.
|
||||
|
||||
33. Improve the performance of starting single character repetitions in JIT.
|
||||
|
||||
34. (*LIMIT_MATCH=) now gives an error instead of setting the value to 0.
|
||||
|
||||
35. Error messages for syntax errors in *LIMIT_MATCH and *LIMIT_RECURSION now
|
||||
give the right offset instead of zero.
|
||||
|
||||
36. The JIT compiler should not check repeats after a {0,1} repeat byte code.
|
||||
This issue was found by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
37. The JIT compiler should restore the control chain for empty possessive
|
||||
repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
38. A bug which was introduced by the single character repetition optimization
|
||||
was fixed.
|
||||
|
||||
39. Match limit check added to recursion. This issue was found by Karl Skomski
|
||||
with a custom LLVM fuzzer.
|
||||
|
||||
40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look
|
||||
only at the part of the subject that is relevant when the starting offset is
|
||||
non-zero.
|
||||
|
||||
41. Improve first character match in JIT with SSE2 on x86.
|
||||
|
||||
42. Fix two assertion fails in JIT. These issues were found by Karl Skomski
|
||||
with a custom LLVM fuzzer.
|
||||
|
||||
43. Correct the setting of CMAKE_C_FLAGS in CMakeLists.txt (patch from Roy Ivy
|
||||
III).
|
||||
|
||||
44. Fix bug in RunTest.bat for new test 14, and adjust the script for the added
|
||||
test (there are now 20 in total).
|
||||
|
||||
45. Fixed a corner case of range optimization in JIT.
|
||||
|
||||
46. Add the ${*MARK} facility to pcre2_substitute().
|
||||
|
||||
47. Modifier lists in pcre2test were splitting at spaces without the required
|
||||
commas.
|
||||
|
||||
48. Implemented PCRE2_ALT_VERBNAMES.
|
||||
|
||||
49. Fixed two issues in JIT. These were found by Karl Skomski with a custom
|
||||
LLVM fuzzer.
|
||||
|
||||
50. The pcre2test program has been extended by adding the #newline_default
|
||||
command. This has made it possible to run the standard tests when PCRE2 is
|
||||
compiled with either CR or CRLF as the default newline convention. As part of
|
||||
this work, the new command was added to several test files and the testing
|
||||
scripts were modified. The pcre2grep tests can now also be run when there is no
|
||||
LF in the default newline convention.
|
||||
|
||||
51. The RunTest script has been modified so that, when JIT is used and valgrind
|
||||
is specified, a valgrind suppressions file is set up to ignore "Invalid read of
|
||||
size 16" errors because these are false positives when the hardware supports
|
||||
the SSE2 instruction set.
|
||||
|
||||
52. It is now possible to have comment lines amid the subject strings in
|
||||
pcre2test (and perltest.sh) input.
|
||||
|
||||
53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit().
|
||||
|
||||
54. Add the null_context modifier to pcre2test so that calling pcre2_compile()
|
||||
and the matching functions with NULL contexts can be tested.
|
||||
|
||||
55. Implemented PCRE2_SUBSTITUTE_EXTENDED.
|
||||
|
||||
56. In a character class such as [\W\p{Any}] where both a negative-type escape
|
||||
("not a word character") and a property escape were present, the property
|
||||
escape was being ignored.
|
||||
|
||||
57. Fixed integer overflow for patterns whose minimum matching length is very,
|
||||
very large.
|
||||
|
||||
58. Implemented --never-backslash-C.
|
||||
|
||||
59. Change 55 above introduced a bug by which certain patterns provoked the
|
||||
erroneous error "\ at end of pattern".
|
||||
|
||||
60. The special sequences [[:<:]] and [[:>:]] gave rise to incorrect compiling
|
||||
errors or other strange effects if compiled in UCP mode. Found with libFuzzer
|
||||
and AddressSanitizer.
|
||||
|
||||
61. Whitespace at the end of a pcre2test pattern line caused a spurious error
|
||||
message if there were only single-character modifiers. It should be ignored.
|
||||
|
||||
62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results
|
||||
or segmentation errors for some patterns. Found with libFuzzer and
|
||||
AddressSanitizer.
|
||||
|
||||
63. Very long names in (*MARK) or (*THEN) etc. items could provoke a buffer
|
||||
overflow.
|
||||
|
||||
64. Improve error message for overly-complicated patterns.
|
||||
|
||||
65. Implemented an optional replication feature for patterns in pcre2test, to
|
||||
make it easier to test long repetitive patterns. The tests for 63 above are
|
||||
converted to use the new feature.
|
||||
|
||||
66. In the POSIX wrapper, if regerror() was given too small a buffer, it could
|
||||
misbehave.
|
||||
|
||||
67. In pcre2_substitute() in UTF mode, the UTF validity check on the
|
||||
replacement string was happening before the length setting when the replacement
|
||||
string was zero-terminated.
|
||||
|
||||
68. In pcre2_substitute() in UTF mode, PCRE2_NO_UTF_CHECK can be set for the
|
||||
second and subsequent calls to pcre2_match().
|
||||
|
||||
69. There was no check for integer overflow for a replacement group number in
|
||||
pcre2_substitute(). An added check for a number greater than the largest group
|
||||
number in the pattern means this is not now needed.
|
||||
|
||||
70. The PCRE2-specific VERSION condition didn't work correctly if only one
|
||||
digit was given after the decimal point, or if more than two digits were given.
|
||||
It now works with one or two digits, and gives a compile time error if more are
|
||||
given.
|
||||
|
||||
71. In pcre2_substitute() there was the possibility of reading one code unit
|
||||
beyond the end of the replacement string.
|
||||
|
||||
72. The code for checking a subject's UTF-32 validity for a pattern with a
|
||||
lookbehind involved an out-of-bounds pointer, which could potentially cause
|
||||
trouble in some environments.
|
||||
|
||||
73. The maximum lookbehind length was incorrectly calculated for patterns such
|
||||
as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
|
||||
|
||||
74. Give an error if a lookbehind assertion is longer than 65535 code units.
|
||||
|
||||
75. Give an error in pcre2_substitute() if a match ends before it starts (as a
|
||||
result of the use of \K).
|
||||
|
||||
76. Check the length of subpattern names and the names in (*MARK:xx) etc.
|
||||
dynamically to avoid the possibility of integer overflow.
|
||||
|
||||
77. Implement pcre2_set_max_pattern_length() so that programs can restrict the
|
||||
size of patterns that they are prepared to handle.
|
||||
|
||||
78. (*NO_AUTO_POSSESS) was not working.
|
||||
|
||||
79. Adding group information caching improves the speed of compiling when
|
||||
checking whether a group has a fixed length and/or could match an empty string,
|
||||
especially when recursion or subroutine calls are involved. However, this
|
||||
cannot be used when (?| is present in the pattern because the same number may
|
||||
be used for groups of different sizes. To catch runaway patterns in this
|
||||
situation, counts have been introduced to the functions that scan for empty
|
||||
branches or compute fixed lengths.
|
||||
|
||||
80. Allow for the possibility of the size of the nest_save structure not being
|
||||
a factor of the size of the compiling workspace (it currently is).
|
||||
|
||||
81. Check for integer overflow in minimum length calculation and cap it at
|
||||
65535.
|
||||
|
||||
82. Small optimizations in code for finding the minimum matching length.
|
||||
|
||||
83. Lock out configuring for EBCDIC with non-8-bit libraries.
|
||||
|
||||
84. Test for error code <= 0 in regerror().
|
||||
|
||||
85. Check for too many replacements (more than INT_MAX) in pcre2_substitute().
|
||||
|
||||
86. Avoid the possibility of computing with an out-of-bounds pointer (though
|
||||
not dereferencing it) while handling lookbehind assertions.
|
||||
|
||||
87. Failure to get memory for the match data in regcomp() is now given as a
|
||||
regcomp() error instead of waiting for regexec() to pick it up.
|
||||
|
||||
88. In pcre2_substitute(), ensure that CRLF is not split when it is a valid
|
||||
newline sequence.
|
||||
|
||||
89. Paranoid check in regcomp() for bad error code from pcre2_compile().
|
||||
|
||||
90. Run test 8 (internal offsets and code sizes) for link sizes 3 and 4 as well
|
||||
as for link size 2.
|
||||
|
||||
91. Document that JIT has a limit on pattern size, and give more information
|
||||
about JIT compile failures in pcre2test.
|
||||
|
||||
92. Implement PCRE2_INFO_HASBACKSLASHC.
|
||||
|
||||
93. Re-arrange valgrind support code in pcre2test to avoid spurious reports
|
||||
with JIT (possibly caused by SSE2?).
|
||||
|
||||
94. Support offset_limit in JIT.
|
||||
|
||||
95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
|
||||
by a single ASCII character in a class item, was incorrectly compiled in UCP
|
||||
mode. The POSIX class got lost, but only if the single character followed it.
|
||||
|
||||
96. [:punct:] in UCP mode was matching some characters in the range 128-255
|
||||
that should not have been matched.
|
||||
|
||||
97. If [:^ascii:] or [:^xdigit:] are present in a non-negated class, all
|
||||
characters with code points greater than 255 are in the class. When a Unicode
|
||||
property was also in the class (if PCRE2_UCP is set, escapes such as \w are
|
||||
turned into Unicode properties), wide characters were not correctly handled,
|
||||
and could fail to match.
|
||||
|
||||
98. In pcre2test, make the "startoffset" modifier a synonym of "offset",
|
||||
because it sets the "startoffset" parameter for pcre2_match().
|
||||
|
||||
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
|
||||
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
|
||||
misbehaved. This bug was found by the LLVM fuzzer.
|
||||
|
||||
100. The error for an invalid UTF pattern string always gave the code unit
|
||||
offset as zero instead of where the invalidity was found.
|
||||
|
||||
101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not
|
||||
working correctly in UCP mode.
|
||||
|
||||
102. Similar to 99 above, if an isolated \E was present between an item and its
|
||||
qualifier when PCRE2_AUTO_CALLOUT was set, pcre2_compile() misbehaved. This bug
|
||||
was found by the LLVM fuzzer.
|
||||
|
||||
103. The POSIX wrapper function regexec() crashed if the option REG_STARTEND
|
||||
was set when the pmatch argument was NULL. It now returns REG_INVARG.
|
||||
|
||||
104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep.
|
||||
|
||||
105. An empty \Q\E sequence between an item and its qualifier caused
|
||||
pcre2_compile() to misbehave when auto callouts were enabled. This bug
|
||||
was found by the LLVM fuzzer.
|
||||
|
||||
106. If both PCRE2_ALT_VERBNAMES and PCRE2_EXTENDED were set, and a (*MARK) or
|
||||
other verb "name" ended with whitespace immediately before the closing
|
||||
parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when
|
||||
both those options were set.
|
||||
|
||||
107. In a number of places pcre2_compile() was not handling NULL characters
|
||||
correctly, and pcre2test with the "bincode" modifier was not always correctly
|
||||
displaying fields containing NULLS:
|
||||
|
||||
(a) Within /x extended #-comments
|
||||
(b) Within the "name" part of (*MARK) and other *verbs
|
||||
(c) Within the text argument of a callout
|
||||
|
||||
108. If a pattern that was compiled with PCRE2_EXTENDED started with white
|
||||
space or a #-type comment that was followed by (?-x), which turns off
|
||||
PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again,
|
||||
pcre2_compile() assumed that (?-x) applied to the whole pattern and
|
||||
consequently mis-compiled it. This bug was found by the LLVM fuzzer. The fix
|
||||
for this bug means that a setting of any of the (?imsxU) options at the start
|
||||
of a pattern is no longer transferred to the options that are returned by
|
||||
PCRE2_INFO_ALLOPTIONS. In fact, this was an anachronism that should have
|
||||
changed when the effects of those options were all moved to compile time.
|
||||
|
||||
109. An escaped closing parenthesis in the "name" part of a (*verb) when
|
||||
PCRE2_ALT_VERBNAMES was set caused pcre2_compile() to malfunction. This bug
|
||||
was found by the LLVM fuzzer.
|
||||
|
||||
110. Implemented PCRE2_SUBSTITUTE_UNSET_EMPTY, and updated pcre2test to make it
|
||||
possible to test it.
|
||||
|
||||
111. "Harden" pcre2test against ridiculously large values in modifiers and
|
||||
command line arguments.
|
||||
|
||||
112. Implemented PCRE2_SUBSTITUTE_UNKNOWN_UNSET and PCRE2_SUBSTITUTE_OVERFLOW_
|
||||
LENGTH.
|
||||
|
||||
113. Fix printing of *MARK names that contain binary zeroes in pcre2test.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments have been added.
|
||||
|
||||
2. Assertion code generator in JIT has been optimized.
|
||||
|
||||
3. The invalid pattern (?(?C) has a missing assertion condition at the end. The
|
||||
pcre2_compile() function read past the end of the input before diagnosing an
|
||||
error. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
4. Implemented pcre2_callout_enumerate().
|
||||
|
||||
5. Fix JIT compilation of conditional blocks whose assertion is converted to
|
||||
(*FAIL). E.g: /(?(?!))/.
|
||||
|
||||
6. The pattern /(?(?!)^)/ caused references to random memory. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly
|
||||
when this assertion was used as a condition, for example (?(?!)a|b). In
|
||||
pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect
|
||||
error about an unsupported item.
|
||||
|
||||
8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
|
||||
possessification code could take exponential time to complete. A recursion
|
||||
depth limit of 1000 has been imposed to limit the resources used by this
|
||||
optimization. This infelicity was discovered by the LLVM fuzzer.
|
||||
|
||||
9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
|
||||
such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored
|
||||
because \S ensures they are all in the class. The code for doing this was
|
||||
interacting badly with the code for computing the amount of space needed to
|
||||
compile the pattern, leading to a buffer overflow. This bug was discovered by
|
||||
the LLVM fuzzer.
|
||||
|
||||
10. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside
|
||||
other kinds of group caused stack overflow at compile time. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment
|
||||
between a subroutine call and its quantifier was incorrectly compiled, leading
|
||||
to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
12. The illegal pattern /(?(?<E>.*!.*)?)/ was not being diagnosed as missing an
|
||||
assertion after (?(. The code was failing to check the character after (?(?<
|
||||
for the ! or = that would indicate a lookbehind assertion. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with
|
||||
a fixed maximum following a group that contains a subroutine reference was
|
||||
incorrectly compiled and could trigger buffer overflow. This bug was discovered
|
||||
by the LLVM fuzzer.
|
||||
|
||||
14. Negative relative recursive references such as (?-7) to non-existent
|
||||
subpatterns were not being diagnosed and could lead to unpredictable behaviour.
|
||||
This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
15. The bug fixed in 14 was due to an integer variable that was unsigned when
|
||||
it should have been signed. Some other "int" variables, having been checked,
|
||||
have either been changed to uint32_t or commented as "must be signed".
|
||||
|
||||
16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
|
||||
caused a stack overflow instead of the diagnosis of a non-fixed length
|
||||
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
|
||||
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
|
||||
|
||||
18. There was a similar problem to 17 in pcre2test for global matches, though
|
||||
the code there did catch the loop.
|
||||
|
||||
19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
|
||||
and a subsequent item in the pattern caused a non-match, backtracking over the
|
||||
repeated \X did not stop, but carried on past the start of the subject, causing
|
||||
reference to random memory and/or a segfault. There were also some other cases
|
||||
where backtracking after \C could crash. This set of bugs was discovered by the
|
||||
LLVM fuzzer.
|
||||
|
||||
20. The function for finding the minimum length of a matching string could take
|
||||
a very long time if mutual recursion was present many times in a pattern, for
|
||||
example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has
|
||||
been implemented. This infelicity was discovered by the LLVM fuzzer.
|
||||
|
||||
21. Implemented PCRE2_NEVER_BACKSLASH_C.
|
||||
|
||||
22. The feature for string replication in pcre2test could read from freed
|
||||
memory if the replication required a buffer to be extended, and it was not
|
||||
working properly in 16-bit and 32-bit modes. This issue was discovered by a
|
||||
fuzzer: see http://lcamtuf.coredump.cx/afl/.
|
||||
|
||||
23. Added the PCRE2_ALT_CIRCUMFLEX option.
|
||||
|
||||
24. Adjust the treatment of \8 and \9 to be the same as the current Perl
|
||||
behaviour.
|
||||
|
||||
25. Static linking against the PCRE2 library using the pkg-config module was
|
||||
failing on missing pthread symbols.
|
||||
|
||||
26. If a group that contained a recursive back reference also contained a
|
||||
forward reference subroutine call followed by a non-forward-reference
|
||||
subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
|
||||
compile correct code, leading to undefined behaviour or an internally detected
|
||||
error. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
27. Quantification of certain items (e.g. atomic back references) could cause
|
||||
incorrect code to be compiled when recursive forward references were involved.
|
||||
For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
28. A repeated conditional group whose condition was a reference by name caused
|
||||
a buffer overflow if there was more than one group with the given name. This
|
||||
bug was discovered by the LLVM fuzzer.
|
||||
|
||||
29. A recursive back reference by name within a group that had the same name as
|
||||
another group caused a buffer overflow. For example: /(?J)(?'d'(?'d'\g{d}))/.
|
||||
This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
30. A forward reference by name to a group whose number is the same as the
|
||||
current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused a
|
||||
buffer overflow at compile time. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
31. Fix -fsanitize=undefined warnings for left shifts of 1 by 31 (it treats 1
|
||||
as an int; fixed by writing it as 1u).
|
||||
|
||||
32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives
|
||||
a warning for "fileno" unless -std=gnu99 us used.
|
||||
|
||||
33. A lookbehind assertion within a set of mutually recursive subpatterns could
|
||||
provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
34. Give an error for an empty subpattern name such as (?'').
|
||||
|
||||
35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
|
||||
\P, \p, or \X.
|
||||
|
||||
36. The way named subpatterns are handled has been refactored. There is now a
|
||||
pre-pass over the regex which does nothing other than identify named
|
||||
subpatterns and count the total captures. This means that information about
|
||||
named patterns is known before the rest of the compile. In particular, it means
|
||||
that forward references can be checked as they are encountered. Previously, the
|
||||
code for handling forward references was contorted and led to several errors in
|
||||
computing the memory requirements for some patterns, leading to buffer
|
||||
overflows.
|
||||
|
||||
37. There was no check for integer overflow in subroutine calls such as (?123).
|
||||
|
||||
38. The table entry for \l in EBCDIC environments was incorrect, leading to its
|
||||
being treated as a literal 'l' instead of causing an error.
|
||||
|
||||
39. If a non-capturing group containing a conditional group that could match
|
||||
an empty string was repeated, it was not identified as matching an empty string
|
||||
itself. For example: /^(?:(?(1)x|)+)+$()/.
|
||||
|
||||
40. In an EBCDIC environment, pcretest was mishandling the escape sequences
|
||||
\a and \e in test subject lines.
|
||||
|
||||
41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
|
||||
instead of the EBCDIC value.
|
||||
|
||||
42. The handling of \c in an EBCDIC environment has been revised so that it is
|
||||
now compatible with the specification in Perl's perlebcdic page.
|
||||
|
||||
43. Single character repetition in JIT has been improved. 20-30% speedup
|
||||
was achieved on certain patterns.
|
||||
|
||||
44. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in
|
||||
ASCII/Unicode. This has now been added to the list of characters that are
|
||||
recognized as white space in EBCDIC.
|
||||
|
||||
45. When PCRE2 was compiled without Unicode support, the use of \p and \P gave
|
||||
an error (correctly) when used outside a class, but did not give an error
|
||||
within a class.
|
||||
|
||||
46. \h within a class was incorrectly compiled in EBCDIC environments.
|
||||
|
||||
47. JIT should return with error when the compiled pattern requires
|
||||
more stack space than the maximum.
|
||||
|
||||
48. Fixed a memory leak in pcre2grep when a locale is set.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. When a pattern is compiled, it remembers the highest back reference so that
|
||||
when matching, if the ovector is too small, extra memory can be obtained to
|
||||
use instead. A conditional subpattern whose condition is a check on a capture
|
||||
having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is
|
||||
another kind of back reference, but it was not setting the highest
|
||||
backreference number. This mattered only if pcre2_match() was called with an
|
||||
ovector that was too small to hold the capture, and there was no other kind of
|
||||
back reference (a situation which is probably quite rare). The effect of the
|
||||
bug was that the condition was always treated as FALSE when the capture could
|
||||
not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug
|
||||
has been fixed.
|
||||
|
||||
2. Functions for serialization and deserialization of sets of compiled patterns
|
||||
have been added.
|
||||
|
||||
3. The value that is returned by PCRE2_INFO_SIZE has been corrected to remove
|
||||
excess code units at the end of the data block that may occasionally occur if
|
||||
the code for calculating the size over-estimates. This change stops the
|
||||
serialization code copying uninitialized data, to which valgrind objects. The
|
||||
documentation of PCRE2_INFO_SIZE was incorrect in stating that the size did not
|
||||
include the general overhead. This has been corrected.
|
||||
|
||||
4. All code units in every slot in the table of group names are now set, again
|
||||
in order to avoid accessing uninitialized data when serializing.
|
||||
|
||||
5. The (*NO_JIT) feature is implemented.
|
||||
|
||||
6. If a bug that caused pcre2_compile() to use more memory than allocated was
|
||||
triggered when using valgrind, the code in (3) above passed a stupidly large
|
||||
value to valgrind. This caused a crash instead of an "internal error" return.
|
||||
|
||||
7. A reference to a duplicated named group (either a back reference or a test
|
||||
for being set in a conditional) that occurred in a part of the pattern where
|
||||
PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern
|
||||
to be incorrectly calculated, leading to overwriting.
|
||||
|
||||
8. A mutually recursive set of back references such as (\2)(\1) caused a
|
||||
segfault at compile time (while trying to find the minimum matching length).
|
||||
The infinite loop is now broken (with the minimum length unset, that is, zero).
|
||||
|
||||
9. If an assertion that was used as a condition was quantified with a minimum
|
||||
of zero, matching went wrong. In particular, if the whole group had unlimited
|
||||
repetition and could match an empty string, a segfault was likely. The pattern
|
||||
(?(?=0)?)+ is an example that caused this. Perl allows assertions to be
|
||||
quantified, but not if they are being used as conditions, so the above pattern
|
||||
is faulted by Perl. PCRE2 has now been changed so that it also rejects such
|
||||
patterns.
|
||||
|
||||
10. The error message for an invalid quantifier has been changed from "nothing
|
||||
to repeat" to "quantifier does not follow a repeatable item".
|
||||
|
||||
11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but
|
||||
scanning the compiled pattern in subsequent auto-possessification can get out
|
||||
of step and lead to an unknown opcode. Previously this could have caused an
|
||||
infinite loop. Now it generates an "internal error" error. This is a tidyup,
|
||||
not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an
|
||||
undefined outcome.
|
||||
|
||||
12. A UTF pattern containing a "not" match of a non-ASCII character and a
|
||||
subroutine reference could loop at compile time. Example: /[^\xff]((?1))/.
|
||||
|
||||
13. The locale test (RunTest 3) has been upgraded. It now checks that a locale
|
||||
that is found in the output of "locale -a" can actually be set by pcre2test
|
||||
before it is accepted. Previously, in an environment where a locale was listed
|
||||
but would not set (an example does exist), the test would "pass" without
|
||||
actually doing anything. Also the fr_CA locale has been added to the list of
|
||||
locales that can be used.
|
||||
|
||||
14. Fixed a bug in pcre2_substitute(). If a replacement string ended in a
|
||||
capturing group number without parentheses, the last character was incorrectly
|
||||
literally included at the end of the replacement string.
|
||||
|
||||
15. A possessive capturing group such as (a)*+ with a minimum repeat of zero
|
||||
failed to allow the zero-repeat case if pcre2_match() was called with an
|
||||
ovector too small to capture the group.
|
||||
|
||||
16. Improved error message in pcre2test when setting the stack size (-S) fails.
|
||||
|
||||
17. Fixed two bugs in CMakeLists.txt: (1) Some lines had got lost in the
|
||||
transfer from PCRE1, meaning that CMake configuration failed if "build tests"
|
||||
was selected. (2) The file src/pcre2_serialize.c had not been added to the list
|
||||
of PCRE2 sources, which caused a failure to build pcre2test.
|
||||
|
||||
18. Fixed typo in pcre2_serialize.c (DECL instead of DEFN) that causes problems
|
||||
only on Windows.
|
||||
|
||||
19. Use binary input when reading back saved serialized patterns in pcre2test.
|
||||
|
||||
20. Added RunTest.bat for running the tests under Windows.
|
||||
|
||||
21. "make distclean" was not removing config.h, a file that may be created for
|
||||
use with CMake.
|
||||
|
||||
22. A pattern such as "((?2){0,1999}())?", which has a group containing a
|
||||
forward reference repeated a large (but limited) number of times within a
|
||||
repeated outer group that has a zero minimum quantifier, caused incorrect code
|
||||
to be compiled, leading to the error "internal error: previously-checked
|
||||
referenced subpattern not found" when an incorrect memory address was read.
|
||||
This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's
|
||||
FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.)
|
||||
|
||||
23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine
|
||||
call within a group that also contained a recursive back reference caused
|
||||
incorrect code to be compiled. This bug was reported as "heap overflow",
|
||||
discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015:
|
||||
CVE-2015-2326 was given to this.)
|
||||
|
||||
24. Computing the size of the JIT read-only data in advance has been a source
|
||||
of various issues, and new ones are still appear unfortunately. To fix
|
||||
existing and future issues, size computation is eliminated from the code,
|
||||
and replaced by on-demand memory allocation.
|
||||
|
||||
25. A pattern such as /(?i)[A-`]/, where characters in the other case are
|
||||
adjacent to the end of the range, and the range contained characters with more
|
||||
than one other case, caused incorrect behaviour when compiled in UTF mode. In
|
||||
that example, the range a-j was left out of the class.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36.
|
||||
|
||||
The code of the library was heavily revised as part of the new API
|
||||
implementation. Details of each and every modification were not individually
|
||||
logged. In addition to the API changes, the following changes were made. They
|
||||
are either new functionality, or bug fixes and other noticeable changes of
|
||||
behaviour that were implemented after the code had been forked.
|
||||
|
||||
1. Including Unicode support at build time is now enabled by default, but it
|
||||
can optionally be disabled. It is not enabled by default at run time (no
|
||||
change).
|
||||
|
||||
2. The test program, now called pcre2test, was re-specified and almost
|
||||
completely re-written. Its input is not compatible with input for pcretest.
|
||||
|
||||
3. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
|
||||
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
|
||||
matched by that pattern.
|
||||
|
||||
4. For the benefit of those who use PCRE2 via some other application, that is,
|
||||
not writing the function calls themselves, it is possible to check the PCRE2
|
||||
version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a
|
||||
string such as "yesno".
|
||||
|
||||
5. There are case-equivalent Unicode characters whose encodings use different
|
||||
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
|
||||
theoretically possible for this to happen in UTF-16 too.) If a backreference to
|
||||
a group containing one of these characters was greedily repeated, and during
|
||||
the match a backtrack occurred, the subject might be backtracked by the wrong
|
||||
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
|
||||
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
|
||||
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
|
||||
Incorrect backtracking meant that group 2 captured only the last two bytes.
|
||||
This bug has been fixed; the new code is slower, but it is used only when the
|
||||
strings matched by the repetition are not all the same length.
|
||||
|
||||
6. A pattern such as /()a/ was not setting the "first character must be 'a'"
|
||||
information. This applied to any pattern with a group that matched no
|
||||
characters, for example: /(?:(?=.)|(?<!x))a/.
|
||||
|
||||
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
|
||||
those parentheses to be closed with whatever has been captured so far. However,
|
||||
it was failing to mark any other groups between the highest capture so far and
|
||||
the currrent group as "unset". Thus, the ovector for those groups contained
|
||||
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
|
||||
matched against "abcd".
|
||||
|
||||
8. The pcre2_substitute() function has been implemented.
|
||||
|
||||
9. If an assertion used as a condition was quantified with a minimum of zero
|
||||
(an odd thing to do, but it happened), SIGSEGV or other misbehaviour could
|
||||
occur.
|
||||
|
||||
10. The PCRE2_NO_DOTSTAR_ANCHOR option has been implemented.
|
||||
|
||||
****
|
67
pcre2/CheckMan
Executable file
67
pcre2/CheckMan
Executable file
|
@ -0,0 +1,67 @@
|
|||
#! /usr/bin/perl
|
||||
|
||||
# A script to scan PCRE2's man pages to check for typos in the control
|
||||
# sequences. I use only a small set of the available repertoire, so it is
|
||||
# straightforward to check that nothing else has slipped in by mistake. This
|
||||
# script should be called in the doc directory.
|
||||
|
||||
$yield = 0;
|
||||
|
||||
while (scalar(@ARGV) > 0)
|
||||
{
|
||||
$line = 0;
|
||||
$file = shift @ARGV;
|
||||
|
||||
open (IN, $file) || die "Failed to open $file\n";
|
||||
|
||||
while (<IN>)
|
||||
{
|
||||
$line++;
|
||||
if (/^\s*$/)
|
||||
{
|
||||
printf "Empty line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
elsif (/^\./)
|
||||
{
|
||||
if (!/^\.\s*$|
|
||||
^\.B\s+\S|
|
||||
^\.TH\s\S|
|
||||
^\.SH\s\S|
|
||||
^\.SS\s\S|
|
||||
^\.TP(?:\s?\d+)?\s*$|
|
||||
^\.SM\s*$|
|
||||
^\.br\s*$|
|
||||
^\.rs\s*$|
|
||||
^\.sp\s*$|
|
||||
^\.nf\s*$|
|
||||
^\.fi\s*$|
|
||||
^\.P\s*$|
|
||||
^\.PP\s*$|
|
||||
^\.\\"(?:\ HREF)?\s*$|
|
||||
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
|
||||
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
|
||||
^\.\\"\s<\/a>\s*$|
|
||||
^\.\\"\sJOINSH\s*$|
|
||||
^\.\\"\sJOIN\s*$/x
|
||||
)
|
||||
{
|
||||
printf "Bad control line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (/\\[^ef]|\\f[^IBP]/)
|
||||
{
|
||||
printf "Bad backslash in line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(IN);
|
||||
}
|
||||
|
||||
exit $yield;
|
||||
# End
|
113
pcre2/CleanTxt
Executable file
113
pcre2/CleanTxt
Executable file
|
@ -0,0 +1,113 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to take the output of nroff -man and remove all the backspacing and
|
||||
# the page footers and the screen commands etc so that it is more usefully
|
||||
# readable online. In fact, in the latest nroff, intermediate footers don't
|
||||
# seem to be generated any more.
|
||||
|
||||
$blankcount = 0;
|
||||
$lastwascut = 0;
|
||||
$firstheader = 1;
|
||||
|
||||
# Input on STDIN; output to STDOUT.
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
s/.\x8//g; # Remove "char, backspace"
|
||||
|
||||
# Handle header lines. Retain only the first one we encounter, but remove
|
||||
# the blank line that follows. Any others (e.g. at end of document) and the
|
||||
# following blank line are dropped.
|
||||
|
||||
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
|
||||
{
|
||||
if ($firstheader)
|
||||
{
|
||||
$firstheader = 0;
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
}
|
||||
$_=<STDIN>; # Remove a blank that follows
|
||||
next;
|
||||
}
|
||||
|
||||
# Count runs of empty lines
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
$blankcount++;
|
||||
$lastwascut = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
# If a chunk of lines has been cut out (page footer) and the next line
|
||||
# has a different indentation, put back one blank line.
|
||||
|
||||
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
|
||||
{
|
||||
($a) = $lastprinted =~ /^(\s*)/;
|
||||
($b) = $_ =~ /^(\s*)/;
|
||||
$blankcount++ if ($a ne $b);
|
||||
}
|
||||
|
||||
# We get here only when we have a non-blank line in hand. If it was preceded
|
||||
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
|
||||
# If so, remove all 7 lines, and remember that we have just done a cut.
|
||||
|
||||
if ($blankcount >= 3)
|
||||
{
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] = <STDIN>;
|
||||
$next[$i] = "" if !defined $next[$i];
|
||||
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
|
||||
}
|
||||
|
||||
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
|
||||
|
||||
if ($next[0] =~ /^\s*$/ &&
|
||||
$next[1] =~ /^\s*$/ &&
|
||||
$next[2] =~ /^\s*$/)
|
||||
{
|
||||
$blankcount -= 3;
|
||||
$lastwascut = 1;
|
||||
}
|
||||
|
||||
# Otherwise output the saved blanks, the current, and the next three
|
||||
# lines. Remember the last printed line.
|
||||
|
||||
else
|
||||
{
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] =~ s/.\x8//g;
|
||||
print $next[$i];
|
||||
$lastprinted = $_;
|
||||
}
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# This non-blank line is not preceded by 3 or more blank lines. Output
|
||||
# any blanks there are, and the line. Remember it. Force two blank lines
|
||||
# before headings.
|
||||
|
||||
else
|
||||
{
|
||||
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
|
||||
defined($lastprinted);
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# End
|
35
pcre2/Detrail
Executable file
35
pcre2/Detrail
Executable file
|
@ -0,0 +1,35 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# This is a script for removing trailing whitespace from lines in files that
|
||||
# are listed on the command line.
|
||||
|
||||
# This subroutine does the work for one file.
|
||||
|
||||
sub detrail {
|
||||
my($file) = $_[0];
|
||||
my($changed) = 0;
|
||||
open(IN, "$file") || die "Can't open $file for input";
|
||||
@lines = <IN>;
|
||||
close(IN);
|
||||
foreach (@lines)
|
||||
{
|
||||
if (/\s+\n$/)
|
||||
{
|
||||
s/\s+\n$/\n/;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
if ($changed)
|
||||
{
|
||||
open(OUT, ">$file") || die "Can't open $file for output";
|
||||
print OUT @lines;
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
|
||||
# This is the main program
|
||||
|
||||
$, = ""; # Output field separator
|
||||
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
|
||||
|
||||
# End
|
604
pcre2/HACKING
Normal file
604
pcre2/HACKING
Normal file
|
@ -0,0 +1,604 @@
|
|||
Technical Notes about PCRE2
|
||||
---------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE2 internals. PCRE2 is a library based on the original PCRE library,
|
||||
but with a revised (and incompatible) API. To avoid confusion, the original
|
||||
library is referred to as PCRE1 below. For information about testing PCRE2, see
|
||||
the pcre2test documentation and the comment at the head of the RunTest file.
|
||||
|
||||
PCRE1 releases were up to 8.3x when PCRE2 was developed. The 8.xx series will
|
||||
continue for bugfixes if necessary. PCRE2 releases started at 10.00 to avoid
|
||||
confusion with PCRE1.
|
||||
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
|
||||
restricted in what they could do by comparison with Perl. The interesting part
|
||||
about the algorithm was that the amount of space required to hold the compiled
|
||||
form of an expression was known in advance. The code to apply an expression did
|
||||
not operate by backtracking, as the original Henry Spencer code and current
|
||||
PCRE2 and Perl code does, but instead checked all possibilities simultaneously
|
||||
by keeping a list of current states and checking all of them as it advanced
|
||||
through the subject string. In the terminology of Jeffrey Friedl's book, it was
|
||||
a "DFA algorithm", though it was not a traditional Finite State Machine (FSM).
|
||||
When the pattern was all used up, all remaining states were possible matches,
|
||||
and the one matching the longest subset of the subject string was chosen. This
|
||||
did not necessarily maximize the individual wild portions of the pattern, as is
|
||||
expected in Unix and Perl-style regular expressions.
|
||||
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that formed the original PCRE1 library (which are
|
||||
unrelated to those mentioned above), I tried at first to invent an algorithm
|
||||
that used an amount of store bounded by a multiple of the number of characters
|
||||
in the pattern, to save on compiling time. However, because of the greater
|
||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||
first pass through the pattern is helpful for other reasons.
|
||||
|
||||
|
||||
Support for 16-bit and 32-bit data strings
|
||||
-------------------------------------------
|
||||
|
||||
The library can be compiled in any combination of 8-bit, 16-bit or 32-bit
|
||||
modes, creating up to three different libraries. In the description that
|
||||
follows, the word "short" is used for a 16-bit data quantity, and the phrase
|
||||
"code unit" is used for a quantity that is a byte in 8-bit mode, a short in
|
||||
16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are
|
||||
given in generic form, without the _8, _16, or _32 suffix.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
--------------------------------------------
|
||||
|
||||
Up to and including release 6.7, PCRE1 worked by running a very degenerate
|
||||
first pass to calculate a maximum memory requirement, and then a second pass to
|
||||
do the real compile - which might use a bit less than the predicted amount of
|
||||
memory. The idea was that this would turn out faster than the Henry Spencer
|
||||
code because the first pass is degenerate and the second pass can just store
|
||||
stuff straight into memory, which it knows is big enough.
|
||||
|
||||
|
||||
Computing the memory requirement: how it is
|
||||
-------------------------------------------
|
||||
|
||||
By the time I was working on a potential 6.8 release, the degenerate first pass
|
||||
had become very complicated and hard to maintain. Indeed one of the early
|
||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
||||
I had a flash of inspiration as to how I could run the real compile function in
|
||||
a "fake" mode that enables it to compute how much memory it would need, while
|
||||
actually only ever using a few hundred bytes of working memory, and without too
|
||||
many tests of the mode that might slow it down. So I refactored the compiling
|
||||
functions to work this way. This got rid of about 600 lines of source. It
|
||||
should make future maintenance and development easier. As this was such a major
|
||||
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
||||
major changes were also present in the 7.0 release).
|
||||
|
||||
A side effect of this work was that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there was a downside: compiling ran
|
||||
more slowly than before (30% or more, depending on the pattern) because it now
|
||||
did a full analysis of the pattern. My hope was that this would not be a big
|
||||
issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by the compile function, which uses recursive function calls
|
||||
for nested parenthesized groups. This is a safety feature for environments with
|
||||
small stacks where the patterns are provided by users.
|
||||
|
||||
History repeated itself for release 10.20. A number of bugs relating to named
|
||||
subpatterns had been discovered by fuzzers. Most of these were related to the
|
||||
handling of forward references when it was not known if the named pattern was
|
||||
unique. (References to non-unique names use a different opcode and more
|
||||
memory.) The use of duplicate group numbers (the (?| facility) also caused
|
||||
issues.
|
||||
|
||||
To get around these problems I adopted a new approach by adding a third pass,
|
||||
really a "pre-pass", over the pattern, which does nothing other than identify
|
||||
all the named subpatterns and their corresponding group numbers. This means
|
||||
that the actual compile (both pre-pass and real compile) have full knowledge of
|
||||
group names and numbers throughout. Several dozen lines of messy code were
|
||||
eliminated, though the new pre-pass is not short (skipping over [] classes is
|
||||
complicated).
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre2_match(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. This is not surprising, since it is intended to be
|
||||
as compatible with Perl as possible. This is the function most users of PCRE2
|
||||
will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
|
||||
support, and studying a compiled pattern with JIT is successful, the JIT code
|
||||
is run instead of the normal pcre2_match() code, but the result is the same.
|
||||
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
There is also a supplementary matching function called pcre2_dfa_match(). This
|
||||
implements a DFA matching algorithm that searches simultaneously for all
|
||||
possible matches that start at one point in the subject string. (Going back to
|
||||
my roots: see Historical Note 1 above.) This function intreprets the same
|
||||
compiled pattern data as pcre2_match(); however, not all the facilities are
|
||||
available, and those that are do not always work in quite the same way. See the
|
||||
user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
|
||||
some others) may change in the middle of patterns. Their processing is handled
|
||||
entirely at compile time by generating different opcodes for the different
|
||||
settings. The runtime functions do not need to keep track of an options state.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of unsigned code units (bytes in
|
||||
8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing
|
||||
items of variable length. The first code unit in an item contains an opcode,
|
||||
and the length of the item is either implicit in the opcode or contained in the
|
||||
data that follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
|
||||
only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
|
||||
and the 16-bit library can be compiled to use 4-byte values, though this
|
||||
impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
|
||||
necessary only when patterns whose compiled length is greater than 64K code
|
||||
units are going to be processed. When a LINK_SIZE value uses more than one code
|
||||
unit, the most significant unit is first.
|
||||
|
||||
In this description, we assume the "normal" compilation options. Data values
|
||||
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
|
||||
(most significant byte first), or one code unit in 16-bit and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one unit long
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single code unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data)
|
||||
OP_CIRCM ^ multiline mode (start of data or after newline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match. The assertion (?!) is
|
||||
always optimized to OP_FAIL.
|
||||
|
||||
|
||||
Backtracking control verbs with optional data
|
||||
---------------------------------------------
|
||||
|
||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||
OP_MARK is followed by the mark name, preceded by a length in one code unit,
|
||||
and followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with
|
||||
arguments, the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used,
|
||||
with the name following in the same format as OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
||||
the character may be more than one code unit long. In UTF-32 mode, characters
|
||||
are always exactly one code unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?), when applied to a single character, use the
|
||||
following opcodes, which come in caseful and caseless versions:
|
||||
|
||||
Caseful Caseless
|
||||
OP_STAR OP_STARI
|
||||
OP_MINSTAR OP_MINSTARI
|
||||
OP_POSSTAR OP_POSSTARI
|
||||
OP_PLUS OP_PLUSI
|
||||
OP_MINPLUS OP_MINPLUSI
|
||||
OP_POSPLUS OP_POSPLUSI
|
||||
OP_QUERY OP_QUERYI
|
||||
OP_MINQUERY OP_MINQUERYI
|
||||
OP_POSQUERY OP_POSQUERYI
|
||||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII or
|
||||
UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the
|
||||
length is variable. Those with "MIN" in their names are the minimizing
|
||||
versions. Those with "POS" in their names are possessive versions. Other kinds
|
||||
of repeat make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
OP_MINUPTO OP_MINUPTOI
|
||||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a count and then the repeated character. The count
|
||||
is two bytes long in 8-bit mode (most significant byte first), or one code unit
|
||||
in 16-bit and 32-bit modes.
|
||||
|
||||
OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum
|
||||
and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or
|
||||
OP_MINUPTO or OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored
|
||||
in the next code unit. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPOSSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEPOSPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEPOSQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEPOSUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two code units that encode the desired property as a type
|
||||
and a value. The types are a set of #defines of the form PT_xxx, and the values
|
||||
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
|
||||
The value is relevant only for PT_GC (General Category), PT_PC (Particular
|
||||
Category), and PT_SC (Script).
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]).
|
||||
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and
|
||||
16-bit and 32-bit modes, subject characters with values greater than 255 can be
|
||||
handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they
|
||||
do.
|
||||
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
|
||||
code points are less than 256, followed by a list of pairs (for a range) and/or
|
||||
single characters and/or properties. In caseless mode, both cases are
|
||||
explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
|
||||
opcode and its data. This is followed by a code unit containing flag bits:
|
||||
XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a
|
||||
bit map is present. There follows the bit map, if XCL_MAP is set, and then a
|
||||
sequence of items coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 255, it is split into two ranges, with characters less than 256 being
|
||||
indicated in the bit map, and the rest with XCL_RANGE.
|
||||
|
||||
When XCL_NOT is set, the bit map, if present, contains bits for characters that
|
||||
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
|
||||
specifies characters and properties that are not allowed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number when the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference to such a group by name
|
||||
generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
|
||||
(not the byte offset) in the group name table of the first entry for the
|
||||
required name, followed by the number of groups with the same name. The
|
||||
matching code can then search for the first one that is set.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of these:
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last three are single-code-unit items, with no data. The others are
|
||||
followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
myself, can be round, square, curly, or pointy. Hence this usage rather than
|
||||
"parentheses".]
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A
|
||||
bracket opcode is followed by a LINK_SIZE value which gives the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset
|
||||
to the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number is a count that immediately follows the offset.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively (see below for possessive repetitions). All three are followed by
|
||||
a LINK_SIZE value giving (as a positive number) the offset back to the matching
|
||||
bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
single-unit opcodes that tell the matcher that skipping the following
|
||||
subpattern entirely is a valid match. In the case of the first two, not
|
||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||
because it may be called as a subroutine from elsewhere in the pattern.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
|
||||
has the same number.
|
||||
|
||||
When a repeated subpattern has an unbounded upper limit, it is checked to see
|
||||
whether it could match an empty string. If this is the case, the opcode in the
|
||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
When a repeated group (capturing or non-capturing) is marked as possessive by
|
||||
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
|
||||
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead
|
||||
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||
within the atomic group; the latter when there are. The distinction is needed
|
||||
for when there is a backtrack to before the group - any captures within the
|
||||
group must be reset, so it is necessary to retain backtracking points inside
|
||||
the group, even after it is complete, in order to do this. When there are no
|
||||
captures in an atomic group, all the backtracking can be discarded when it is
|
||||
complete. This is more efficient, and also uses less stack.
|
||||
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||
pointer in the subject string. In ASCII or UTF-32 mode, the count is also the
|
||||
number of code units, but in UTF-8/16 mode each character may occupy more than
|
||||
one code unit. A separate count is present in each alternative of a lookbehind
|
||||
assertion, allowing them to have different (but fixed) lengths.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat.
|
||||
|
||||
If the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name. The allows the
|
||||
matcher to check if any group with the given name is set.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the
|
||||
whole pattern") or OP_DNRREF (with data as for OP_DNCREF).
|
||||
|
||||
For a DEFINE condition, OP_FALSE is used (with no associated data). During
|
||||
compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when
|
||||
the conditional group is complete, there can be a check to ensure that it
|
||||
contains only one top-level branch. Once this has happened, the opcode is
|
||||
changed to OP_FALSE, so the matcher never sees OP_DEFINE.
|
||||
|
||||
There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which
|
||||
tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
|
||||
or OP_FALSE.
|
||||
|
||||
If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
|
||||
must start with an assertion, whose opcode normally immediately follows OP_COND
|
||||
or OP_SCOND. However, if automatic callouts are enabled, a callout is inserted
|
||||
immediately before the assertion. It is also possible to insert a manual
|
||||
callout at this point. Only assertion conditions may have callouts preceding
|
||||
the condition.
|
||||
|
||||
A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
|
||||
parts of the pattern, so this is another opcode that may appear as a condition.
|
||||
It is treated the same as OP_FALSE.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current pattern, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. OP_RECURSE is also used for
|
||||
"subroutine" calls, even though they are not strictly a recursion. Repeated
|
||||
recursions are automatically wrapped inside OP_ONCE brackets, because otherwise
|
||||
some patterns broke them. A non-repeated recursion is not wrapped in OP_ONCE
|
||||
brackets, but it is nevertheless still treated as an atomic group.
|
||||
|
||||
|
||||
Callout
|
||||
-------
|
||||
|
||||
A callout can nowadays have either a numerical argument or a string argument.
|
||||
These use OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are
|
||||
followed by two LINK_SIZE values giving the offset in the pattern string to the
|
||||
start of the following item, and another count giving the length of this item.
|
||||
These values make it possible for pcre2test to output useful tracing
|
||||
information using callouts.
|
||||
|
||||
In the case of a numeric callout, after these two values there is a single code
|
||||
unit containing the callout number, in the range 0-255, with 255 being used for
|
||||
callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT
|
||||
option. Thus, this opcode item is of fixed length:
|
||||
|
||||
[OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER]
|
||||
|
||||
For callouts with string arguments, OP_CALLOUT_STR has three more data items:
|
||||
a LINK_SIZE value giving the complete length of the entire opcode item, a
|
||||
LINK_SIZE item containing the offset within the pattern string to the start of
|
||||
the string argument, and the string itself, preceded by its starting delimiter
|
||||
and followed by a binary zero. When a callout function is called, a pointer to
|
||||
the actual string is passed, but the delimiter can be accessed as string[-1] if
|
||||
the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is
|
||||
compiled as the following bytes (decimal numbers represent binary values):
|
||||
|
||||
[OP_CALLOUT] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0]
|
||||
-------- ------- -------- -------
|
||||
| | | |
|
||||
------- LINK_SIZE items ------
|
||||
|
||||
Opcode table checking
|
||||
---------------------
|
||||
|
||||
The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
|
||||
not a real opcode, but is used to check that tables indexed by opcode are the
|
||||
correct length, in order to catch updating errors.
|
||||
|
||||
Philip Hazel
|
||||
June 2015
|
370
pcre2/INSTALL
Normal file
370
pcre2/INSTALL
Normal file
|
@ -0,0 +1,370 @@
|
|||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. This file is offered as-is,
|
||||
without warranty of any kind.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell command `./configure && make && make install'
|
||||
should configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
`INSTALL' file but do not implement all of the features documented
|
||||
below. The lack of an optional feature in a given package is not
|
||||
necessarily a bug. More recommendations for GNU packages can be found
|
||||
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||
|
||||
The `configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a `Makefile' in each directory of the package.
|
||||
It may also create one or more `.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script `config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file `config.log' containing compiler output (useful mainly for
|
||||
debugging `configure').
|
||||
|
||||
It can also use an optional file (typically called `config.cache'
|
||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||
the results of its tests to speed up reconfiguring. Caching is
|
||||
disabled by default to prevent problems with accidental use of stale
|
||||
cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how `configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the `README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point `config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file `configure.ac' (or `configure.in') is used to create
|
||||
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||
you want to change it or regenerate `configure' using a newer version
|
||||
of `autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. `cd' to the directory containing the package's source code and type
|
||||
`./configure' to configure the package for your system.
|
||||
|
||||
Running `configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
||||
3. Optionally, type `make check' to run any self-tests that come with
|
||||
the package, generally using the just-built uninstalled binaries.
|
||||
|
||||
4. Type `make install' to install the programs and any data files and
|
||||
documentation. When installing into a prefix owned by root, it is
|
||||
recommended that the package be configured and built as a regular
|
||||
user, and only the `make install' phase executed with root
|
||||
privileges.
|
||||
|
||||
5. Optionally, type `make installcheck' to repeat any self-tests, but
|
||||
this time using the binaries in their final installed location.
|
||||
This target does not install anything. Running this target as a
|
||||
regular user, particularly if the prior `make install' required
|
||||
root privileges, verifies that the installation completed
|
||||
correctly.
|
||||
|
||||
6. You can remove the program binaries and object files from the
|
||||
source code directory by typing `make clean'. To also remove the
|
||||
files that `configure' created (so you can compile the package for
|
||||
a different kind of computer), type `make distclean'. There is
|
||||
also a `make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
7. Often, you can also type `make uninstall' to remove the installed
|
||||
files again. In practice, not all packages have tested that
|
||||
uninstallation works correctly, even though it is required by the
|
||||
GNU Coding Standards.
|
||||
|
||||
8. Some packages, particularly those that use Automake, provide `make
|
||||
distcheck', which can by used by developers to test that all other
|
||||
targets like `make install' and `make uninstall' work correctly.
|
||||
This target is generally not run by end users.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the `configure' script does not know about. Run `./configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give `configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here
|
||||
is an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the `configure' script. `configure' automatically checks for the
|
||||
source code in the directory that `configure' is in and in `..'. This
|
||||
is known as a "VPATH" build.
|
||||
|
||||
With a non-GNU `make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use `make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple `-arch' options to the
|
||||
compiler but only a single `-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the `lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, `make install' installs the package's commands under
|
||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||
can specify an installation prefix other than `/usr/local' by giving
|
||||
`configure' the option `--prefix=PREFIX', where PREFIX must be an
|
||||
absolute file name.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like `--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run `configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them. In general, the
|
||||
default for these options is expressed in terms of `${prefix}', so that
|
||||
specifying just `--prefix' will affect all of the other directory
|
||||
specifications that were not explicitly provided.
|
||||
|
||||
The most portable way to affect installation locations is to pass the
|
||||
correct locations to `configure'; however, many packages provide one or
|
||||
both of the following shortcuts of passing variable assignments to the
|
||||
`make install' command line to change installation locations without
|
||||
having to reconfigure or recompile.
|
||||
|
||||
The first method involves providing an override variable for each
|
||||
affected directory. For example, `make install
|
||||
prefix=/alternate/directory' will choose an alternate location for all
|
||||
directory configuration variables that were expressed in terms of
|
||||
`${prefix}'. Any directories that were specified during `configure',
|
||||
but not in terms of `${prefix}', must each be overridden at install
|
||||
time for the entire installation to be relocated. The approach of
|
||||
makefile variable overrides for each directory variable is required by
|
||||
the GNU Coding Standards, and ideally causes no recompilation.
|
||||
However, some platforms have known limitations with the semantics of
|
||||
shared libraries that end up requiring recompilation when using this
|
||||
method, particularly noticeable in packages that use GNU Libtool.
|
||||
|
||||
The second method involves providing the `DESTDIR' variable. For
|
||||
example, `make install DESTDIR=/alternate/directory' will prepend
|
||||
`/alternate/directory' before all installation names. The approach of
|
||||
`DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||
does not work on platforms that have drive letters. On the other hand,
|
||||
it does better at avoiding recompilation issues, and works well even
|
||||
when some directory options were not specified in terms of `${prefix}'
|
||||
at `configure' time.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving `configure' the
|
||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||
|
||||
Some packages pay attention to `--enable-FEATURE' options to
|
||||
`configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||
is something like `gnu-as' or `x' (for the X Window System). The
|
||||
`README' should mention any `--enable-' and `--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, `configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the `configure' options `--x-includes=DIR' and
|
||||
`--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Some packages offer the ability to configure how verbose the
|
||||
execution of `make' will be. For these packages, running `./configure
|
||||
--enable-silent-rules' sets the default to minimal output, which can be
|
||||
overridden with `make V=1'; while running `./configure
|
||||
--disable-silent-rules' sets the default to verbose, which can be
|
||||
overridden with `make V=0'.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
|
||||
CC is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
HP-UX `make' updates targets which have the same time stamps as
|
||||
their prerequisites, which makes it generally unusable when shipped
|
||||
generated files such as `configure' are involved. Use GNU `make'
|
||||
instead.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
||||
a workaround. If GNU CC is not installed, it is therefore recommended
|
||||
to try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
|
||||
directory contains several dysfunctional programs; working variants of
|
||||
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
|
||||
in your `PATH', put it _after_ `/usr/bin'.
|
||||
|
||||
On Haiku, software installed for all users goes in `/boot/common',
|
||||
not `/usr/local'. It is recommended to use the following options:
|
||||
|
||||
./configure --prefix=/boot/common
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features `configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, `configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as `sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS
|
||||
KERNEL-OS
|
||||
|
||||
See the file `config.sub' for the possible values of each field. If
|
||||
`config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option `--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with `--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for `configure' scripts to share,
|
||||
you can create a site shell script called `config.site' that gives
|
||||
default values for variables like `CC', `cache_file', and `prefix'.
|
||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
`CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all `configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to `configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the `configure' command line, using `VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf limitation. Until the limitation is lifted, you can use
|
||||
this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
||||
`configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
Print a summary of all of the options to `configure', and exit.
|
||||
|
||||
`--help=short'
|
||||
`--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
`configure', and exit. The `short' variant lists options used
|
||||
only in the top level, while the `recursive' variant lists options
|
||||
also present in any nested packages.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
Print the version of Autoconf used to generate the `configure'
|
||||
script, and exit.
|
||||
|
||||
`--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||
disable caching.
|
||||
|
||||
`--config-cache'
|
||||
`-C'
|
||||
Alias for `--cache-file=config.cache'.
|
||||
|
||||
`--quiet'
|
||||
`--silent'
|
||||
`-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to `/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
`--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
`configure' can determine that directory automatically.
|
||||
|
||||
`--prefix=DIR'
|
||||
Use DIR as the installation prefix. *note Installation Names::
|
||||
for more details, including other options available for fine-tuning
|
||||
the installation locations.
|
||||
|
||||
`--no-create'
|
||||
`-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
83
pcre2/LICENCE
Normal file
83
pcre2/LICENCE
Normal file
|
@ -0,0 +1,83 @@
|
|||
PCRE2 LICENCE
|
||||
-------------
|
||||
|
||||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE2, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself. The data
|
||||
in the testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a just-in-time compiler that can be used to
|
||||
optimize pattern matching. This is an optional feature that can be omitted when
|
||||
the library is built.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
End
|
796
pcre2/Makefile.am
Normal file
796
pcre2/Makefile.am
Normal file
|
@ -0,0 +1,796 @@
|
|||
## Process this file with automake to produce Makefile.in.
|
||||
|
||||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
|
||||
|
||||
## Specify the documentation files that are distributed.
|
||||
|
||||
dist_doc_DATA = \
|
||||
AUTHORS \
|
||||
COPYING \
|
||||
ChangeLog \
|
||||
LICENCE \
|
||||
NEWS \
|
||||
README \
|
||||
doc/pcre2.txt \
|
||||
doc/pcre2-config.txt \
|
||||
doc/pcre2grep.txt \
|
||||
doc/pcre2test.txt
|
||||
|
||||
dist_html_DATA = \
|
||||
doc/html/NON-AUTOTOOLS-BUILD.txt \
|
||||
doc/html/README.txt \
|
||||
doc/html/index.html \
|
||||
doc/html/pcre2-config.html \
|
||||
doc/html/pcre2.html \
|
||||
doc/html/pcre2_callout_enumerate.html \
|
||||
doc/html/pcre2_code_free.html \
|
||||
doc/html/pcre2_compile.html \
|
||||
doc/html/pcre2_compile_context_copy.html \
|
||||
doc/html/pcre2_compile_context_create.html \
|
||||
doc/html/pcre2_compile_context_free.html \
|
||||
doc/html/pcre2_config.html \
|
||||
doc/html/pcre2_dfa_match.html \
|
||||
doc/html/pcre2_general_context_copy.html \
|
||||
doc/html/pcre2_general_context_create.html \
|
||||
doc/html/pcre2_general_context_free.html \
|
||||
doc/html/pcre2_get_error_message.html \
|
||||
doc/html/pcre2_get_mark.html \
|
||||
doc/html/pcre2_get_ovector_count.html \
|
||||
doc/html/pcre2_get_ovector_pointer.html \
|
||||
doc/html/pcre2_get_startchar.html \
|
||||
doc/html/pcre2_jit_compile.html \
|
||||
doc/html/pcre2_jit_free_unused_memory.html \
|
||||
doc/html/pcre2_jit_match.html \
|
||||
doc/html/pcre2_jit_stack_assign.html \
|
||||
doc/html/pcre2_jit_stack_create.html \
|
||||
doc/html/pcre2_jit_stack_free.html \
|
||||
doc/html/pcre2_maketables.html \
|
||||
doc/html/pcre2_match.html \
|
||||
doc/html/pcre2_match_context_copy.html \
|
||||
doc/html/pcre2_match_context_create.html \
|
||||
doc/html/pcre2_match_context_free.html \
|
||||
doc/html/pcre2_match_data_create.html \
|
||||
doc/html/pcre2_match_data_create_from_pattern.html \
|
||||
doc/html/pcre2_match_data_free.html \
|
||||
doc/html/pcre2_pattern_info.html \
|
||||
doc/html/pcre2_serialize_decode.html \
|
||||
doc/html/pcre2_serialize_encode.html \
|
||||
doc/html/pcre2_serialize_free.html \
|
||||
doc/html/pcre2_serialize_get_number_of_codes.html \
|
||||
doc/html/pcre2_set_bsr.html \
|
||||
doc/html/pcre2_set_callout.html \
|
||||
doc/html/pcre2_set_character_tables.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_offset_limit.html \
|
||||
doc/html/pcre2_set_newline.html \
|
||||
doc/html/pcre2_set_parens_nest_limit.html \
|
||||
doc/html/pcre2_set_recursion_limit.html \
|
||||
doc/html/pcre2_set_recursion_memory_management.html \
|
||||
doc/html/pcre2_substitute.html \
|
||||
doc/html/pcre2_substring_copy_byname.html \
|
||||
doc/html/pcre2_substring_copy_bynumber.html \
|
||||
doc/html/pcre2_substring_free.html \
|
||||
doc/html/pcre2_substring_get_byname.html \
|
||||
doc/html/pcre2_substring_get_bynumber.html \
|
||||
doc/html/pcre2_substring_length_byname.html \
|
||||
doc/html/pcre2_substring_length_bynumber.html \
|
||||
doc/html/pcre2_substring_list_free.html \
|
||||
doc/html/pcre2_substring_list_get.html \
|
||||
doc/html/pcre2_substring_nametable_scan.html \
|
||||
doc/html/pcre2_substring_number_from_name.html \
|
||||
doc/html/pcre2api.html \
|
||||
doc/html/pcre2build.html \
|
||||
doc/html/pcre2callout.html \
|
||||
doc/html/pcre2compat.html \
|
||||
doc/html/pcre2demo.html \
|
||||
doc/html/pcre2grep.html \
|
||||
doc/html/pcre2jit.html \
|
||||
doc/html/pcre2limits.html \
|
||||
doc/html/pcre2matching.html \
|
||||
doc/html/pcre2partial.html \
|
||||
doc/html/pcre2pattern.html \
|
||||
doc/html/pcre2perform.html \
|
||||
doc/html/pcre2posix.html \
|
||||
doc/html/pcre2sample.html \
|
||||
doc/html/pcre2serialize.html \
|
||||
doc/html/pcre2stack.html \
|
||||
doc/html/pcre2syntax.html \
|
||||
doc/html/pcre2test.html \
|
||||
doc/html/pcre2unicode.html
|
||||
|
||||
dist_man_MANS = \
|
||||
doc/pcre2-config.1 \
|
||||
doc/pcre2.3 \
|
||||
doc/pcre2_callout_enumerate.3 \
|
||||
doc/pcre2_code_free.3 \
|
||||
doc/pcre2_compile.3 \
|
||||
doc/pcre2_compile_context_copy.3 \
|
||||
doc/pcre2_compile_context_create.3 \
|
||||
doc/pcre2_compile_context_free.3 \
|
||||
doc/pcre2_config.3 \
|
||||
doc/pcre2_dfa_match.3 \
|
||||
doc/pcre2_general_context_copy.3 \
|
||||
doc/pcre2_general_context_create.3 \
|
||||
doc/pcre2_general_context_free.3 \
|
||||
doc/pcre2_get_error_message.3 \
|
||||
doc/pcre2_get_mark.3 \
|
||||
doc/pcre2_get_ovector_count.3 \
|
||||
doc/pcre2_get_ovector_pointer.3 \
|
||||
doc/pcre2_get_startchar.3 \
|
||||
doc/pcre2_jit_compile.3 \
|
||||
doc/pcre2_jit_free_unused_memory.3 \
|
||||
doc/pcre2_jit_match.3 \
|
||||
doc/pcre2_jit_stack_assign.3 \
|
||||
doc/pcre2_jit_stack_create.3 \
|
||||
doc/pcre2_jit_stack_free.3 \
|
||||
doc/pcre2_maketables.3 \
|
||||
doc/pcre2_match.3 \
|
||||
doc/pcre2_match_context_copy.3 \
|
||||
doc/pcre2_match_context_create.3 \
|
||||
doc/pcre2_match_context_free.3 \
|
||||
doc/pcre2_match_data_create.3 \
|
||||
doc/pcre2_match_data_create_from_pattern.3 \
|
||||
doc/pcre2_match_data_free.3 \
|
||||
doc/pcre2_pattern_info.3 \
|
||||
doc/pcre2_serialize_decode.3 \
|
||||
doc/pcre2_serialize_encode.3 \
|
||||
doc/pcre2_serialize_free.3 \
|
||||
doc/pcre2_serialize_get_number_of_codes.3 \
|
||||
doc/pcre2_set_bsr.3 \
|
||||
doc/pcre2_set_callout.3 \
|
||||
doc/pcre2_set_character_tables.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_offset_limit.3 \
|
||||
doc/pcre2_set_newline.3 \
|
||||
doc/pcre2_set_parens_nest_limit.3 \
|
||||
doc/pcre2_set_recursion_limit.3 \
|
||||
doc/pcre2_set_recursion_memory_management.3 \
|
||||
doc/pcre2_substitute.3 \
|
||||
doc/pcre2_substring_copy_byname.3 \
|
||||
doc/pcre2_substring_copy_bynumber.3 \
|
||||
doc/pcre2_substring_free.3 \
|
||||
doc/pcre2_substring_get_byname.3 \
|
||||
doc/pcre2_substring_get_bynumber.3 \
|
||||
doc/pcre2_substring_length_byname.3 \
|
||||
doc/pcre2_substring_length_bynumber.3 \
|
||||
doc/pcre2_substring_list_free.3 \
|
||||
doc/pcre2_substring_list_get.3 \
|
||||
doc/pcre2_substring_nametable_scan.3 \
|
||||
doc/pcre2_substring_number_from_name.3 \
|
||||
doc/pcre2api.3 \
|
||||
doc/pcre2build.3 \
|
||||
doc/pcre2callout.3 \
|
||||
doc/pcre2compat.3 \
|
||||
doc/pcre2demo.3 \
|
||||
doc/pcre2grep.1 \
|
||||
doc/pcre2jit.3 \
|
||||
doc/pcre2limits.3 \
|
||||
doc/pcre2matching.3 \
|
||||
doc/pcre2partial.3 \
|
||||
doc/pcre2pattern.3 \
|
||||
doc/pcre2perform.3 \
|
||||
doc/pcre2posix.3 \
|
||||
doc/pcre2sample.3 \
|
||||
doc/pcre2serialize.3 \
|
||||
doc/pcre2stack.3 \
|
||||
doc/pcre2syntax.3 \
|
||||
doc/pcre2test.1 \
|
||||
doc/pcre2unicode.3
|
||||
|
||||
# The Libtool libraries to install. We'll add to this later.
|
||||
|
||||
lib_LTLIBRARIES =
|
||||
|
||||
# Unit tests you want to run when people type 'make check'.
|
||||
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
|
||||
|
||||
TESTS =
|
||||
check_SCRIPTS =
|
||||
dist_noinst_SCRIPTS =
|
||||
|
||||
# Some of the binaries we make are to be installed, and others are
|
||||
# (non-user-visible) helper programs needed to build the libraries.
|
||||
|
||||
bin_PROGRAMS =
|
||||
noinst_PROGRAMS =
|
||||
|
||||
# Additional files to delete on 'make clean', 'make distclean',
|
||||
# and 'make maintainer-clean'.
|
||||
|
||||
CLEANFILES =
|
||||
DISTCLEANFILES = src/config.h.in~ config.h
|
||||
MAINTAINERCLEANFILES =
|
||||
|
||||
# Additional files to bundle with the distribution, over and above what
|
||||
# the Autotools include by default.
|
||||
|
||||
EXTRA_DIST =
|
||||
|
||||
# These files contain additional m4 macros that are used by autoconf.
|
||||
|
||||
EXTRA_DIST += \
|
||||
m4/ax_pthread.m4 m4/pcre2_visibility.m4
|
||||
|
||||
# These files contain maintenance information
|
||||
|
||||
EXTRA_DIST += \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
HACKING
|
||||
|
||||
# These files are used in the preparation of a release
|
||||
|
||||
EXTRA_DIST += \
|
||||
PrepareRelease \
|
||||
CheckMan \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
132html \
|
||||
doc/index.html.src
|
||||
|
||||
# These files are usable versions of pcre2.h and config.h that are distributed
|
||||
# for the benefit of people who are building PCRE2 manually, without the
|
||||
# Autotools support.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/pcre2.h.generic \
|
||||
src/config.h.generic
|
||||
|
||||
# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE
|
||||
# version number. Therefore, we can create the generic version just by copying.
|
||||
|
||||
src/pcre2.h.generic: src/pcre2.h.in configure.ac
|
||||
rm -f $@
|
||||
cp -p src/pcre2.h $@
|
||||
|
||||
# It is more complicated for config.h.generic. We need the version that results
|
||||
# from a default configuration so as to get all the default values for PCRE
|
||||
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
|
||||
# doing a configure in a temporary directory. However, some trickery is needed,
|
||||
# because the source directory may already be configured. If you just try
|
||||
# running configure in a new directory, it complains. For this reason, we move
|
||||
# config.status out of the way while doing the default configuration. The
|
||||
# resulting config.h is munged by perl to put #ifdefs round any #defines for
|
||||
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
|
||||
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
|
||||
# sure that PCRE2_EXP_DEFN is unset (in case it has visibility settings).
|
||||
|
||||
src/config.h.generic: configure.ac
|
||||
rm -rf $@ _generic
|
||||
mkdir _generic
|
||||
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
|
||||
cd _generic && $(abs_top_srcdir)/configure || :
|
||||
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
|
||||
test -f _generic/src/config.h
|
||||
perl -n \
|
||||
-e 'BEGIN{$$blank=0;}' \
|
||||
-e 'if(/PCRE2_EXP_DEFN/){print"/* #undef PCRE2_EXP_DEFN */\n";$$blank=0;next;}' \
|
||||
-e 'if(/to make a symbol visible/){next;}' \
|
||||
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
||||
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
|
||||
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
|
||||
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
|
||||
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
|
||||
_generic/src/config.h >$@
|
||||
rm -rf _generic
|
||||
|
||||
MAINTAINERCLEANFILES += src/pcre2.h.generic src/config.h.generic
|
||||
|
||||
# These are the header files we'll install. We do not distribute pcre2.h
|
||||
# because it is generated from pcre2.h.in.
|
||||
|
||||
nodist_include_HEADERS = src/pcre2.h
|
||||
include_HEADERS = src/pcre2posix.h
|
||||
|
||||
# This is the "config" script.
|
||||
|
||||
bin_SCRIPTS = pcre2-config
|
||||
|
||||
## ---------------------------------------------------------------
|
||||
## The dftables program is used to rebuild character tables before compiling
|
||||
## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
|
||||
## program. The default (when --enable-rebuild-chartables is not specified) is
|
||||
## to copy a distributed set of tables that are defined for ASCII code. In this
|
||||
## case, dftables is not needed.
|
||||
|
||||
if WITH_REBUILD_CHARTABLES
|
||||
noinst_PROGRAMS += dftables
|
||||
dftables_SOURCES = src/dftables.c
|
||||
src/pcre2_chartables.c: dftables$(EXEEXT)
|
||||
rm -f $@
|
||||
./dftables$(EXEEXT) $@
|
||||
else
|
||||
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
|
||||
rm -f $@
|
||||
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
|
||||
endif # WITH_REBUILD_CHARTABLES
|
||||
|
||||
BUILT_SOURCES = src/pcre2_chartables.c
|
||||
NODIST_SOURCES = src/pcre2_chartables.c
|
||||
|
||||
## Define the list of common sources, then arrange to build whichever of the
|
||||
## 8-, 16-, or 32-bit libraries are configured.
|
||||
|
||||
COMMON_SOURCES = \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_serialize.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_study.c \
|
||||
src/pcre2_substitute.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c
|
||||
|
||||
if WITH_PCRE2_8
|
||||
lib_LTLIBRARIES += libpcre2-8.la
|
||||
libpcre2_8_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
nodist_libpcre2_8_la_SOURCES = \
|
||||
$(NODIST_SOURCES)
|
||||
libpcre2_8_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=8 \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
libpcre2_8_la_LIBADD =
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
if WITH_PCRE2_16
|
||||
lib_LTLIBRARIES += libpcre2-16.la
|
||||
libpcre2_16_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
nodist_libpcre2_16_la_SOURCES = \
|
||||
$(NODIST_SOURCES)
|
||||
libpcre2_16_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=16 \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
libpcre2_16_la_LIBADD =
|
||||
endif # WITH_PCRE2_16
|
||||
|
||||
if WITH_PCRE2_32
|
||||
lib_LTLIBRARIES += libpcre2-32.la
|
||||
libpcre2_32_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
nodist_libpcre2_32_la_SOURCES = \
|
||||
$(NODIST_SOURCES)
|
||||
libpcre2_32_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=32 \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
libpcre2_32_la_LIBADD =
|
||||
endif # WITH_PCRE2_32
|
||||
|
||||
# The pcre2_chartables.c.dist file is the default version of
|
||||
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.
|
||||
|
||||
EXTRA_DIST += src/pcre2_chartables.c.dist
|
||||
CLEANFILES += src/pcre2_chartables.c
|
||||
|
||||
# The JIT compiler lives in a separate directory, but its files are #included
|
||||
# when pcre2_jit_compile.c is processed, so they must be distributed.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/sljit/sljitConfig.h \
|
||||
src/sljit/sljitConfigInternal.h \
|
||||
src/sljit/sljitExecAllocator.c \
|
||||
src/sljit/sljitLir.c \
|
||||
src/sljit/sljitLir.h \
|
||||
src/sljit/sljitNativeARM_32.c \
|
||||
src/sljit/sljitNativeARM_64.c \
|
||||
src/sljit/sljitNativeARM_T2_32.c \
|
||||
src/sljit/sljitNativeMIPS_32.c \
|
||||
src/sljit/sljitNativeMIPS_64.c \
|
||||
src/sljit/sljitNativeMIPS_common.c \
|
||||
src/sljit/sljitNativePPC_32.c \
|
||||
src/sljit/sljitNativePPC_64.c \
|
||||
src/sljit/sljitNativePPC_common.c \
|
||||
src/sljit/sljitNativeSPARC_32.c \
|
||||
src/sljit/sljitNativeSPARC_common.c \
|
||||
src/sljit/sljitNativeTILEGX-encoder.c \
|
||||
src/sljit/sljitNativeTILEGX_64.c \
|
||||
src/sljit/sljitNativeX86_32.c \
|
||||
src/sljit/sljitNativeX86_64.c \
|
||||
src/sljit/sljitNativeX86_common.c \
|
||||
src/sljit/sljitUtils.c
|
||||
|
||||
# Some of the JIT sources are also in separate files that are #included.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c
|
||||
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||
endif # WITH_PCRE2_32
|
||||
|
||||
if WITH_VALGRIND
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
libpcre2_16_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
libpcre2_32_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_PCRE2_32
|
||||
endif # WITH_VALGRIND
|
||||
|
||||
if WITH_GCOV
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
libpcre2_16_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
libpcre2_32_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_PCRE2_32
|
||||
endif # WITH_GCOV
|
||||
|
||||
## A version of the 8-bit library that has a POSIX API.
|
||||
|
||||
if WITH_PCRE2_8
|
||||
lib_LTLIBRARIES += libpcre2-posix.la
|
||||
libpcre2_posix_la_SOURCES = src/pcre2posix.c
|
||||
libpcre2_posix_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=8 \
|
||||
$(VISIBILITY_CFLAGS) $(AM_CFLAGS)
|
||||
libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||
libpcre2_posix_la_LIBADD = libpcre2-8.la
|
||||
if WITH_GCOV
|
||||
libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_GCOV
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
## Build pcre2grep if the 8-bit library is enabled
|
||||
|
||||
if WITH_PCRE2_8
|
||||
bin_PROGRAMS += pcre2grep
|
||||
pcre2grep_SOURCES = src/pcre2grep.c
|
||||
pcre2grep_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2grep_LDADD = $(LIBZ) $(LIBBZ2)
|
||||
pcre2grep_LDADD += libpcre2-8.la
|
||||
if WITH_GCOV
|
||||
pcre2grep_CFLAGS += $(GCOV_CFLAGS)
|
||||
pcre2grep_LDADD += $(GCOV_LIBS)
|
||||
endif # WITH_GCOV
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
## -------- Testing ----------
|
||||
|
||||
## If JIT support is enabled, arrange for the JIT test program to run.
|
||||
|
||||
if WITH_JIT
|
||||
TESTS += pcre2_jit_test
|
||||
noinst_PROGRAMS += pcre2_jit_test
|
||||
pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
|
||||
pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2_jit_test_LDADD =
|
||||
if WITH_PCRE2_8
|
||||
pcre2_jit_test_LDADD += libpcre2-8.la
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
pcre2_jit_test_LDADD += libpcre2-16.la
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
pcre2_jit_test_LDADD += libpcre2-32.la
|
||||
endif # WITH_PCRE2_32
|
||||
if WITH_GCOV
|
||||
pcre2_jit_test_CFLAGS += $(GCOV_CFLAGS)
|
||||
pcre2_jit_test_LDADD += $(GCOV_LIBS)
|
||||
endif # WITH_GCOV
|
||||
endif # WITH_JIT
|
||||
|
||||
# Build the general pcre2test program. The file src/pcre2_printint.c is
|
||||
# #included by pcre2test as many times as needed, at different code unit
|
||||
# widths.
|
||||
|
||||
bin_PROGRAMS += pcre2test
|
||||
EXTRA_DIST += src/pcre2_printint.c
|
||||
pcre2test_SOURCES = src/pcre2test.c
|
||||
pcre2test_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2test_LDADD = $(LIBREADLINE)
|
||||
|
||||
if WITH_PCRE2_8
|
||||
pcre2test_LDADD += libpcre2-8.la libpcre2-posix.la
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
if WITH_PCRE2_16
|
||||
pcre2test_LDADD += libpcre2-16.la
|
||||
endif # WITH_PCRE2_16
|
||||
|
||||
if WITH_PCRE2_32
|
||||
pcre2test_LDADD += libpcre2-32.la
|
||||
endif # WITH_PCRE2_32
|
||||
|
||||
if WITH_VALGRIND
|
||||
pcre2test_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_VALGRIND
|
||||
|
||||
if WITH_GCOV
|
||||
pcre2test_CFLAGS += $(GCOV_CFLAGS)
|
||||
pcre2test_LDADD += $(GCOV_LIBS)
|
||||
endif # WITH_GCOV
|
||||
|
||||
## The main library tests. Each test is a binary plus a script that runs that
|
||||
## binary in various ways. We install these test binaries in case folks find it
|
||||
## helpful.
|
||||
|
||||
TESTS += RunTest
|
||||
dist_noinst_SCRIPTS += RunTest
|
||||
|
||||
EXTRA_DIST += RunTest.bat
|
||||
|
||||
## When the 8-bit library is configured, pcre2grep will have been built.
|
||||
|
||||
if WITH_PCRE2_8
|
||||
TESTS += RunGrepTest
|
||||
dist_noinst_SCRIPTS += RunGrepTest
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
## Distribute all the test data files
|
||||
|
||||
EXTRA_DIST += \
|
||||
testdata/grepbinary \
|
||||
testdata/grepfilelist \
|
||||
testdata/grepinput \
|
||||
testdata/grepinput3 \
|
||||
testdata/grepinput8 \
|
||||
testdata/grepinputv \
|
||||
testdata/grepinputx \
|
||||
testdata/greplist \
|
||||
testdata/grepoutput \
|
||||
testdata/grepoutput8 \
|
||||
testdata/grepoutputN \
|
||||
testdata/greppatN4 \
|
||||
testdata/testinput1 \
|
||||
testdata/testinput2 \
|
||||
testdata/testinput3 \
|
||||
testdata/testinput4 \
|
||||
testdata/testinput5 \
|
||||
testdata/testinput6 \
|
||||
testdata/testinput7 \
|
||||
testdata/testinput8 \
|
||||
testdata/testinput9 \
|
||||
testdata/testinput10 \
|
||||
testdata/testinput11 \
|
||||
testdata/testinput12 \
|
||||
testdata/testinput13 \
|
||||
testdata/testinput14 \
|
||||
testdata/testinput15 \
|
||||
testdata/testinput16 \
|
||||
testdata/testinput17 \
|
||||
testdata/testinput18 \
|
||||
testdata/testinput19 \
|
||||
testdata/testinput20 \
|
||||
testdata/testinput21 \
|
||||
testdata/testinput22 \
|
||||
testdata/testinput23 \
|
||||
testdata/testinputEBC \
|
||||
testdata/testoutput1 \
|
||||
testdata/testoutput2 \
|
||||
testdata/testoutput3 \
|
||||
testdata/testoutput3A \
|
||||
testdata/testoutput3B \
|
||||
testdata/testoutput4 \
|
||||
testdata/testoutput5 \
|
||||
testdata/testoutput6 \
|
||||
testdata/testoutput7 \
|
||||
testdata/testoutput8-16-2 \
|
||||
testdata/testoutput8-16-3 \
|
||||
testdata/testoutput8-16-3 \
|
||||
testdata/testoutput8-32-2 \
|
||||
testdata/testoutput8-32-3 \
|
||||
testdata/testoutput8-32-4 \
|
||||
testdata/testoutput8-8-2 \
|
||||
testdata/testoutput8-8-3 \
|
||||
testdata/testoutput8-8-4 \
|
||||
testdata/testoutput9 \
|
||||
testdata/testoutput10 \
|
||||
testdata/testoutput11-16 \
|
||||
testdata/testoutput11-32 \
|
||||
testdata/testoutput12-16 \
|
||||
testdata/testoutput12-32 \
|
||||
testdata/testoutput13 \
|
||||
testdata/testoutput14-16 \
|
||||
testdata/testoutput14-32 \
|
||||
testdata/testoutput14-8 \
|
||||
testdata/testoutput15 \
|
||||
testdata/testoutput16 \
|
||||
testdata/testoutput17 \
|
||||
testdata/testoutput18 \
|
||||
testdata/testoutput19 \
|
||||
testdata/testoutput20 \
|
||||
testdata/testoutput21 \
|
||||
testdata/testoutput22-16 \
|
||||
testdata/testoutput22-32 \
|
||||
testdata/testoutput22-8 \
|
||||
testdata/testoutput23 \
|
||||
testdata/testoutputEBC \
|
||||
testdata/valgrind-jit.supp \
|
||||
testdata/wintestinput3 \
|
||||
testdata/wintestoutput3 \
|
||||
perltest.sh
|
||||
|
||||
# RunTest and RunGrepTest should clean up after themselves, but just in case
|
||||
# they don't, add their working files to CLEANFILES.
|
||||
|
||||
CLEANFILES += \
|
||||
testSinput \
|
||||
test3input \
|
||||
test3output \
|
||||
test3outputA \
|
||||
test3outputB \
|
||||
testtry \
|
||||
teststdout \
|
||||
teststderr \
|
||||
teststderrgrep \
|
||||
testtemp1grep \
|
||||
testtemp2grep \
|
||||
testtrygrep \
|
||||
testNinputgrep
|
||||
|
||||
## ------------ End of testing -------------
|
||||
|
||||
|
||||
# PCRE2 demonstration program. Not built automatcally. The point is that the
|
||||
# users should build it themselves. So just distribute the source.
|
||||
|
||||
EXTRA_DIST += src/pcre2demo.c
|
||||
|
||||
|
||||
# We have .pc files for pkg-config users.
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA =
|
||||
|
||||
if WITH_PCRE2_8
|
||||
pkgconfig_DATA += libpcre2-8.pc libpcre2-posix.pc
|
||||
endif
|
||||
|
||||
if WITH_PCRE2_16
|
||||
pkgconfig_DATA += libpcre2-16.pc
|
||||
endif
|
||||
|
||||
if WITH_PCRE2_32
|
||||
pkgconfig_DATA += libpcre2-32.pc
|
||||
endif
|
||||
|
||||
|
||||
# gcov/lcov code coverage reporting
|
||||
#
|
||||
# Coverage reporting targets:
|
||||
#
|
||||
# coverage: Create a coverage report from 'make check'
|
||||
# coverage-baseline: Capture baseline coverage information
|
||||
# coverage-reset: This zeros the coverage counters only
|
||||
# coverage-report: This creates the coverage report only
|
||||
# coverage-clean-report: This removes the generated coverage report
|
||||
# without cleaning the coverage data itself
|
||||
# coverage-clean-data: This removes the captured coverage data without
|
||||
# removing the coverage files created at compile time (*.gcno)
|
||||
# coverage-clean: This cleans all coverage data including the generated
|
||||
# coverage report.
|
||||
|
||||
if WITH_GCOV
|
||||
COVERAGE_TEST_NAME = $(PACKAGE)
|
||||
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
|
||||
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
|
||||
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
|
||||
COVERAGE_LCOV_EXTRA_FLAGS =
|
||||
COVERAGE_GENHTML_EXTRA_FLAGS =
|
||||
|
||||
coverage_quiet = $(coverage_quiet_$(V))
|
||||
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
|
||||
coverage_quiet_0 = --quiet
|
||||
|
||||
coverage-check: all
|
||||
-$(MAKE) $(AM_MAKEFLAGS) -k check
|
||||
|
||||
coverage-baseline:
|
||||
$(LCOV) $(coverage_quiet) \
|
||||
--directory $(top_builddir) \
|
||||
--output-file "$(COVERAGE_OUTPUT_FILE)" \
|
||||
--capture \
|
||||
--initial
|
||||
|
||||
coverage-report:
|
||||
$(LCOV) $(coverage_quiet) \
|
||||
--directory $(top_builddir) \
|
||||
--capture \
|
||||
--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
|
||||
--test-name "$(COVERAGE_TEST_NAME)" \
|
||||
--no-checksum \
|
||||
--compat-libtool \
|
||||
$(COVERAGE_LCOV_EXTRA_FLAGS)
|
||||
$(LCOV) $(coverage_quiet) \
|
||||
--directory $(top_builddir) \
|
||||
--output-file "$(COVERAGE_OUTPUT_FILE)" \
|
||||
--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
|
||||
"/tmp/*" \
|
||||
"/usr/include/*" \
|
||||
"$(includedir)/*"
|
||||
-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
|
||||
LANG=C $(GENHTML) $(coverage_quiet) \
|
||||
--prefix $(top_builddir) \
|
||||
--output-directory "$(COVERAGE_OUTPUT_DIR)" \
|
||||
--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
|
||||
--show-details "$(COVERAGE_OUTPUT_FILE)" \
|
||||
--legend \
|
||||
$(COVERAGE_GENHTML_EXTRA_FLAGS)
|
||||
@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
|
||||
|
||||
coverage-reset:
|
||||
-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
|
||||
|
||||
coverage-clean-report:
|
||||
-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
|
||||
-rm -rf "$(COVERAGE_OUTPUT_DIR)"
|
||||
|
||||
coverage-clean-data:
|
||||
-find $(top_builddir) -name "*.gcda" -delete
|
||||
|
||||
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
|
||||
-find $(top_builddir) -name "*.gcno" -delete
|
||||
|
||||
coverage-distclean: coverage-clean
|
||||
|
||||
coverage: coverage-reset coverage-baseline coverage-check coverage-report
|
||||
clean-local: coverage-clean
|
||||
distclean-local: coverage-distclean
|
||||
|
||||
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
|
||||
|
||||
# Without coverage support, still arrange for 'make distclean' to get rid of
|
||||
# any coverage files that may have been left from a different configuration.
|
||||
|
||||
else
|
||||
|
||||
coverage:
|
||||
@echo "Configuring with --enable-coverage is required to generate code coverage report."
|
||||
|
||||
DISTCLEANFILES += src/*.gcda src/*.gcno
|
||||
|
||||
distclean-local:
|
||||
rm -rf $(PACKAGE)-$(VERSION)-coverage*
|
||||
|
||||
endif # WITH_GCOV
|
||||
|
||||
## CMake support
|
||||
|
||||
EXTRA_DIST += \
|
||||
cmake/COPYING-CMAKE-SCRIPTS \
|
||||
cmake/FindPackageHandleStandardArgs.cmake \
|
||||
cmake/FindReadline.cmake \
|
||||
cmake/FindEditline.cmake \
|
||||
CMakeLists.txt \
|
||||
config-cmake.h.in
|
||||
|
||||
## end Makefile.am
|
3136
pcre2/Makefile.in
Normal file
3136
pcre2/Makefile.in
Normal file
File diff suppressed because it is too large
Load diff
88
pcre2/NEWS
Normal file
88
pcre2/NEWS
Normal file
|
@ -0,0 +1,88 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
||||
1. Many bugs have been fixed. A large number of them were provoked only by very
|
||||
strange pattern input, and were discovered by fuzzers. Some others were
|
||||
discovered by code auditing. See ChangeLog for details.
|
||||
|
||||
2. The Unicode tables have been updated to Unicode version 8.0.0.
|
||||
|
||||
3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
|
||||
class, where both values are literal letters in the same case, omit the
|
||||
non-letter EBCDIC code points within the range.
|
||||
|
||||
4. There have been a number of enhancements to the pcre2_substitute() function,
|
||||
giving more flexibility to replacement facilities. It is now also possible to
|
||||
cause the function to return the needed buffer size if the one given is too
|
||||
small.
|
||||
|
||||
5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such
|
||||
as (*THEN:name) to be processed for backslashes and to take note of
|
||||
PCRE2_EXTENDED.
|
||||
|
||||
6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a
|
||||
pattern uses \C, and --never-backslash-C makes it possible to compile a version
|
||||
PCRE2 in which the use of \C is always forbidden.
|
||||
|
||||
7. A limit to the length of pattern that can be handled can now be set by
|
||||
calling pcre2_set_max_pattern_length().
|
||||
|
||||
8. When matching an unanchored pattern, a match can be required to begin within
|
||||
a given number of code units after the start of the subject by calling
|
||||
pcre2_set_offset_limit().
|
||||
|
||||
9. The pcre2test program has been extended to test new facilities, and it can
|
||||
now run the tests when LF on its own is not a valid newline sequence.
|
||||
|
||||
10. The RunTest script has also been updated to enable more tests to be run.
|
||||
|
||||
11. There have been some minor performance enhancements.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments and the pcre2_callout_enumerate() function
|
||||
have been implemented.
|
||||
|
||||
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
|
||||
|
||||
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
|
||||
subject in multiline mode.
|
||||
|
||||
4. The way named subpatterns are handled has been refactored. The previous
|
||||
approach had several bugs.
|
||||
|
||||
5. The handling of \c in EBCDIC environments has been changed to conform to the
|
||||
perlebcdic document. This is an incompatible change.
|
||||
|
||||
6. Bugs have been mended, many of them discovered by fuzzers.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. Serialization and de-serialization functions have been added to the API,
|
||||
making it possible to save and restore sets of compiled patterns, though
|
||||
restoration must be done in the same environment that was used for compilation.
|
||||
|
||||
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
|
||||
creator to specify that JIT is not to be used.
|
||||
|
||||
3. A number of bugs have been fixed. In particular, bugs that caused building
|
||||
on Windows using CMake to fail have been mended.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36. New programs are recommended to use the
|
||||
new library. Programs that use the original (PCRE1) API will need changing
|
||||
before linking with the new library.
|
||||
|
||||
****
|
392
pcre2/NON-AUTOTOOLS-BUILD
Normal file
392
pcre2/NON-AUTOTOOLS-BUILD
Normal file
|
@ -0,0 +1,392 @@
|
|||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
This document has been converted from the PCRE1 document. I have removed a
|
||||
number of sections about building in various environments, as they applied only
|
||||
to PCRE1 and are probably out of date.
|
||||
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE2 C library
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Calling conventions in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE2 on Windows with CMake
|
||||
Testing with RunTest.bat
|
||||
Building PCRE2 on native z/OS and z/VM
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
The basic PCRE2 library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library.
|
||||
|
||||
The PCRE2 distribution includes a "configure" file for use by the
|
||||
configure/make (autotools) build system, as found in many Unix-like
|
||||
environments. The README file contains information about the options for
|
||||
"configure".
|
||||
|
||||
There is also support for CMake, which some users prefer, especially in Windows
|
||||
environments, though it can also be run in Unix-like environments. See the
|
||||
section entitled "Building PCRE2 on Windows with CMake" below.
|
||||
|
||||
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
|
||||
under the names src/config.h.generic and src/pcre2.h.generic. These are
|
||||
provided for those who build PCRE2 without using "configure" or CMake. If you
|
||||
use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
|
||||
|
||||
The following are generic instructions for building the PCRE2 C library "by
|
||||
hand". If you are going to use CMake, this section does not apply to you; you
|
||||
can skip ahead to the CMake section.
|
||||
|
||||
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
|
||||
macro settings that it contains to whatever is appropriate for your
|
||||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators.
|
||||
|
||||
When you compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
||||
An alternative approach is not to edit src/config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need to the
|
||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in src/config.h are used has changed between releases. (In the
|
||||
configure/make world, this is handled automatically.) When upgrading to a
|
||||
new release, you are strongly advised to review src/config.h.generic
|
||||
before re-using what you had previously.
|
||||
|
||||
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file src/pcre2_chartables.c.dist as
|
||||
src/pcre2_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
||||
if you have set up src/config.h), and then run it with the single
|
||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on a
|
||||
system that uses EBCDIC code.
|
||||
|
||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) For an 8-bit library, compile the following source files from the src
|
||||
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
|
||||
set -DHAVE_CONFIG_H if you have set up src/config.h with your
|
||||
configuration, or else use other -D settings to change the configuration
|
||||
as required.
|
||||
|
||||
pcre2_auto_possess.c
|
||||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
pcre2_match_data.c
|
||||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substitute.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
pcre2_xclass.c
|
||||
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE2 header files are first
|
||||
sought in the src directory under the current directory. Otherwise you run
|
||||
the risk of picking up a previously-installed file from somewhere else.
|
||||
|
||||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_compile.c #includes other files from the sljit subdirectory, where
|
||||
there should be 16 files, all of whose names begin with "sljit". It also
|
||||
#includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should
|
||||
not compile these yourself.
|
||||
|
||||
(5) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
|
||||
If your system has static and shared libraries, you may have to do this
|
||||
once for each type.
|
||||
|
||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the src/pcre2posix.h file and then
|
||||
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||
library.
|
||||
|
||||
(8) The pcre2test program can be linked with any combination of the 8-bit,
|
||||
16-bit and 32-bit libraries (depending on what you selected in
|
||||
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
|
||||
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
|
||||
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
|
||||
needs the pcre2posix wrapper library.
|
||||
|
||||
(9) Run pcre2test on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. There are
|
||||
comments about what each test does in the section entitled "Testing PCRE2"
|
||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||
32-bit libraries, you need to run pcre2test with the -16 option to do
|
||||
16-bit tests and with the -32 option to do 32-bit tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options are selected.
|
||||
For example, test 4 is for Unicode support, and will not run if you have
|
||||
built PCRE2 without it. See the comments at the start of each testinput
|
||||
file. If you have a suitable Unix-like shell, the RunTest script will run
|
||||
the appropriate tests for you. The command "RunTest list" will output a
|
||||
list of all the tests.
|
||||
|
||||
Note that the supplied files are in Unix format, with just LF characters
|
||||
as line terminators. You may need to edit them to change this if your
|
||||
system uses a different convention.
|
||||
|
||||
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
|
||||
by running pcre2test with the -jit option. This is done automatically by
|
||||
the RunTest script. You might also like to build and run the freestanding
|
||||
JIT test program, src/pcre2_jit_test.c.
|
||||
|
||||
(11) If you want to use the pcre2grep command, compile and link
|
||||
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
||||
need the pcre2posix library).
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
The default processor stack size of 1Mb in some Windows environments is too
|
||||
small for matching patterns that need much recursion. In particular, test 2 may
|
||||
fail because of this. Normally, running out of stack causes a crash, but there
|
||||
have been cases where the test program has just died silently. See your linker
|
||||
documentation for how to increase stack size if you experience problems. If you
|
||||
are using CMake (see "BUILDING PCRE2 ON WINDOWS WITH CMAKE" below) and the gcc
|
||||
compiler, you can increase the stack size for pcre2test and pcre2grep by
|
||||
setting the CMAKE_EXE_LINKER_FLAGS variable to "-Wl,--stack,8388608" (for
|
||||
example). The Linux default of 8Mb is a reasonable choice for the stack, though
|
||||
even that can be too small for some pattern/subject combinations.
|
||||
|
||||
PCRE2 has a compile configuration option to disable the use of stack for
|
||||
recursion so that heap is used instead. However, pattern matching is
|
||||
significantly slower when this is done. There is more about stack usage in the
|
||||
"pcre2stack" documentation.
|
||||
|
||||
|
||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
If you want to statically link a program against a PCRE2 library in the form of
|
||||
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
|
||||
|
||||
|
||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
It is possible to compile programs to use different calling conventions using
|
||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||
easier to change the calling convention for the exported functions in the
|
||||
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
|
||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||
not set, it defaults to empty; the default calling convention is then used
|
||||
(which is what is wanted most of the time).
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
|
||||
|
||||
There are two ways of building PCRE2 using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
support for building using CMake, which some users find a more straightforward
|
||||
way of building PCRE2 under Windows.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
On both MinGW and Cygwin, PCRE2 should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre2-8 and libpcre2-posix. These
|
||||
are independent libraries: when you link with libpcre2-posix you must also link
|
||||
with libpcre2-8, which contains the basic functions.
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE2 to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE2 are in UNIX format, with LF
|
||||
characters as line terminators. Unless your PCRE2 library uses a default
|
||||
newline option that includes LF as a valid newline, it may be necessary to
|
||||
change the line terminators in the test files to get some of the tests to work.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH CMAKE
|
||||
|
||||
CMake is an alternative configuration facility that can be used instead of
|
||||
"configure". CMake creates project files (make files, solution files, etc.)
|
||||
tailored to numerous development environments, including Visual Studio,
|
||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
cache can be deleted by selecting "File > Delete Cache".
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
|
||||
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
10. Hit "Generate".
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
||||
If configured with CMake, building the test project ("make test" or building
|
||||
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
|
||||
on your configuration options, possibly other test programs) in the build
|
||||
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
|
||||
exe paths.
|
||||
|
||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||
of the source directory: Open command shell window. Chdir to the location
|
||||
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
|
||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||
|
||||
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||
|
||||
Otherwise:
|
||||
|
||||
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
|
||||
have been created.
|
||||
|
||||
2. Edit RunTest.bat to indentify the full or relative location of
|
||||
the pcre2 source (wherein which the testdata folder resides), e.g.:
|
||||
|
||||
set srcdir=C:\pcre2\pcre2-10.00
|
||||
|
||||
3. In a Windows command environment, chdir to the location of your bat and
|
||||
exe programs.
|
||||
|
||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||
results, and discrepancies will be identified in the console output.
|
||||
|
||||
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
|
||||
|
||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||
applications can be supported through UNIX System Services, and in such an
|
||||
environment PCRE2 can be built in the same way as in other systems. However, in
|
||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see this web site:
|
||||
|
||||
http://www.zaconsultants.net
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
You may also download PCRE1 from WWW.CBTTAPE.ORG, file 882. Everything, source
|
||||
and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 16 July 2015
|
235
pcre2/PrepareRelease
Executable file
235
pcre2/PrepareRelease
Executable file
|
@ -0,0 +1,235 @@
|
|||
#/bin/sh
|
||||
|
||||
# Script to prepare the files for building a PCRE2 release. It does some
|
||||
# processing of the documentation, detrails files, and creates pcre2.h.generic
|
||||
# and config.h.generic (for use by builders who can't run ./configure).
|
||||
|
||||
# You must run this script before runnning "make dist". If its first argument
|
||||
# is "doc", it stops after preparing the documentation. There are no other
|
||||
# arguments. The script makes use of the following files:
|
||||
|
||||
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
|
||||
# "knows" the relevant troff constructs that are used in the PCRE2
|
||||
# man pages.
|
||||
|
||||
# CheckMan A Perl script that checks man pages for typos in the mark up.
|
||||
|
||||
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
|
||||
# removing backspaces and other redundant text so as to produce
|
||||
# a readable .txt file.
|
||||
|
||||
# Detrail A Perl script that removes trailing spaces from files.
|
||||
|
||||
# doc/index.html.src
|
||||
# A file that is copied as index.html into the doc/html directory
|
||||
# when the HTML documentation is built. It works like this so that
|
||||
# doc/html can be deleted and re-created from scratch.
|
||||
|
||||
# README & NON-AUTOTOOLS-BUILD
|
||||
# These files are copied into the doc/html directory, with .txt
|
||||
# extensions so that they can by hyperlinked from the HTML
|
||||
# documentation, because some people just go to the HTML without
|
||||
# looking for text files.
|
||||
|
||||
|
||||
# First, sort out the documentation. Remove pcre2demo.3 first because it won't
|
||||
# pass the markup check (it is created below, using markup that none of the
|
||||
# other pages use).
|
||||
|
||||
cd doc
|
||||
echo Processing documentation
|
||||
|
||||
/bin/rm -f pcre2demo.3
|
||||
|
||||
# Check the remaining man pages
|
||||
|
||||
perl ../CheckMan *.1 *.3
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
# Make Text form of the documentation. It needs some mangling to make it
|
||||
# tidy for online reading. Concatenate all the .3 stuff, but omit the
|
||||
# individual function pages.
|
||||
|
||||
cat <<End >pcre2.txt
|
||||
-----------------------------------------------------------------------------
|
||||
This file contains a concatenation of the PCRE2 man pages, converted to plain
|
||||
text format for ease of searching with a text editor, or for use on systems
|
||||
that do not have a man page processor. The small individual files that give
|
||||
synopses of each function in the library have not been included. Neither has
|
||||
the pcre2demo program. There are separate text files for the pcre2grep and
|
||||
pcre2test commands.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
End
|
||||
|
||||
echo "Making pcre2.txt"
|
||||
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
|
||||
pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \
|
||||
pcre2posix pcre2sample pcre2serialize pcre2stack pcre2syntax \
|
||||
pcre2unicode ; do
|
||||
echo " Processing $file.3"
|
||||
nroff -c -man $file.3 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
||||
/bin/rm $file.rawtxt
|
||||
echo "------------------------------------------------------------------------------" >>pcre2.txt
|
||||
if [ "$file" != "pcre2sample" ] ; then
|
||||
echo " " >>pcre2.txt
|
||||
echo " " >>pcre2.txt
|
||||
fi
|
||||
done
|
||||
|
||||
# The three commands
|
||||
for file in pcre2test pcre2grep pcre2-config ; do
|
||||
echo Making $file.txt
|
||||
nroff -c -man $file.1 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >$file.txt
|
||||
/bin/rm $file.rawtxt
|
||||
done
|
||||
|
||||
|
||||
# Make pcre2demo.3 from the pcre2demo.c source file
|
||||
|
||||
echo "Making pcre2demo.3"
|
||||
perl <<"END" >pcre2demo.3
|
||||
open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
|
||||
open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
|
||||
print OUT ".\\\" Start example.\n" .
|
||||
".de EX\n" .
|
||||
". nr mE \\\\n(.f\n" .
|
||||
". nf\n" .
|
||||
". nh\n" .
|
||||
". ft CW\n" .
|
||||
"..\n" .
|
||||
".\n" .
|
||||
".\n" .
|
||||
".\\\" End example.\n" .
|
||||
".de EE\n" .
|
||||
". ft \\\\n(mE\n" .
|
||||
". fi\n" .
|
||||
". hy \\\\n(HY\n" .
|
||||
"..\n" .
|
||||
".\n" .
|
||||
".EX\n" ;
|
||||
while (<IN>)
|
||||
{
|
||||
s/\\/\\e/g;
|
||||
print OUT;
|
||||
}
|
||||
print OUT ".EE\n";
|
||||
close(IN);
|
||||
close(OUT);
|
||||
END
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# Make HTML form of the documentation.
|
||||
|
||||
echo "Making HTML documentation"
|
||||
/bin/rm html/*
|
||||
cp index.html.src html/index.html
|
||||
cp ../README html/README.txt
|
||||
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
|
||||
|
||||
for file in *.1 ; do
|
||||
base=`basename $file .1`
|
||||
echo " Making $base.html"
|
||||
perl ../132html -toc $base <$file >html/$base.html
|
||||
done
|
||||
|
||||
# Exclude table of contents for function summaries. It seems that expr
|
||||
# forces an anchored regex. Also exclude them for small pages that have
|
||||
# only one section.
|
||||
|
||||
for file in *.3 ; do
|
||||
base=`basename $file .3`
|
||||
toc=-toc
|
||||
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
|
||||
if [ "$base" = "pcre2sample" ] || \
|
||||
[ "$base" = "pcre2stack" ] || \
|
||||
[ "$base" = "pcre2compat" ] || \
|
||||
[ "$base" = "pcre2limits" ] || \
|
||||
[ "$base" = "pcre2unicode" ] ; then
|
||||
toc=""
|
||||
fi
|
||||
echo " Making $base.html"
|
||||
perl ../132html $toc $base <$file >html/$base.html
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
done
|
||||
|
||||
# End of documentation processing; stop if only documentation required.
|
||||
|
||||
cd ..
|
||||
echo Documentation done
|
||||
if [ "$1" = "doc" ] ; then exit; fi
|
||||
|
||||
# These files are detrailed; do not detrail the test data because there may be
|
||||
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
||||
# line endings and the detrail script removes all trailing white space. The
|
||||
# configure files are also omitted from the detrailing.
|
||||
|
||||
files="\
|
||||
Makefile.am \
|
||||
configure.ac \
|
||||
README \
|
||||
LICENCE \
|
||||
COPYING \
|
||||
AUTHORS \
|
||||
NEWS \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
INSTALL \
|
||||
132html \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
ChangeLog \
|
||||
CMakeLists.txt \
|
||||
RunGrepTest \
|
||||
RunTest \
|
||||
pcre2-config.in \
|
||||
perltest.sh \
|
||||
libpcre2-8.pc.in \
|
||||
libpcre2-16.pc.in \
|
||||
libpcre2-32.pc.in \
|
||||
libpcre2-posix.pc.in \
|
||||
src/dftables.c \
|
||||
src/pcre2.h.in \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c \
|
||||
src/pcre2_jit_test.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_printint.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_study.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c \
|
||||
src/pcre2demo.c \
|
||||
src/pcre2grep.c \
|
||||
src/pcre2posix.c \
|
||||
src/pcre2posix.h \
|
||||
src/pcre2test.c"
|
||||
|
||||
echo Detrailing
|
||||
perl ./Detrail $files doc/p* doc/html/*
|
||||
|
||||
echo Done
|
||||
|
||||
#End
|
843
pcre2/README
Normal file
843
pcre2/README
Normal file
|
@ -0,0 +1,843 @@
|
|||
README file for PCRE2 (Perl-compatible regular expression library)
|
||||
------------------------------------------------------------------
|
||||
|
||||
PCRE2 is a re-working of the original PCRE library to provide an entirely new
|
||||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
subscribe or manage your subscription here:
|
||||
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
||||
The PCRE2 APIs
|
||||
Documentation for PCRE2
|
||||
Contributions by users of PCRE2
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
Building PCRE2 without using autotools
|
||||
Building PCRE2 using autotools
|
||||
Retrieving configuration information
|
||||
Shared libraries
|
||||
Cross-compiling using autotools
|
||||
Making new tarballs
|
||||
Testing PCRE2
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE2 APIs
|
||||
--------------
|
||||
|
||||
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. There are no C++
|
||||
wrappers.
|
||||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These can be found in a library called libpcre2posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcre2posix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||
renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE2 and there is already a POSIX
|
||||
regex library installed on your system, as well as worrying about the regex.h
|
||||
header file (as mentioned above), you must also take care when linking programs
|
||||
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
|
||||
pick up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE2 with the addition of
|
||||
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
|
||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||
effect of renaming the functions so that the names no longer clash. Of course,
|
||||
you have to do the same thing for your applications, or write them using the
|
||||
new names.
|
||||
|
||||
|
||||
Documentation for PCRE2
|
||||
-----------------------
|
||||
|
||||
If you install PCRE2 in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre2". The one that is
|
||||
just called "pcre2" lists all the others. In addition to these man pages, the
|
||||
PCRE2 documentation is supplied in two other forms:
|
||||
|
||||
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
|
||||
doc/pcre2test.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except the
|
||||
listing of pcre2demo.c and those that summarize individual functions. The
|
||||
other two are the text forms of the section 1 man pages for the pcre2grep
|
||||
and pcre2test commands. These text forms are provided for ease of scanning
|
||||
with text editors or similar tools. They are installed in
|
||||
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
|
||||
(defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre2/html.
|
||||
|
||||
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
---------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the comments in the file
|
||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||
many Unix-like systems.
|
||||
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
PCRE2 has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE2 on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
file for ways of building PCRE2 without using autotools.
|
||||
|
||||
|
||||
Building PCRE2 using autotools
|
||||
------------------------------
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
||||
To build PCRE2 on system that supports autotools, first run the "configure"
|
||||
command from the PCRE2 distribution directory, with your current directory set
|
||||
to the directory where you want the files to be created. This command is a
|
||||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE2 within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
This command specifies that the C compiler should be run with the flags '-O2
|
||||
-Wall' instead of the default, and that "make install" should install PCRE2
|
||||
under /opt/local instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE2 source
|
||||
into /source/pcre2/pcre2-xxx, but you want to build it in
|
||||
/build/pcre2/pcre2-xxx:
|
||||
|
||||
cd /build/pcre2/pcre2-xxx
|
||||
/source/pcre2/pcre2-xxx/configure
|
||||
|
||||
PCRE2 is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE2
|
||||
library. They are also documented in the pcre2build man page.
|
||||
|
||||
. By default, both shared and static libraries are built. You can change this
|
||||
by adding one of these options to the "configure" command:
|
||||
|
||||
--disable-shared
|
||||
--disable-static
|
||||
|
||||
(See also "Shared libraries on Unix-like systems" below.)
|
||||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
|
||||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
||||
. If you do not want to make use of the support for UTF-8 Unicode character
|
||||
strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit
|
||||
library, or UTF-32 Unicode character strings in the 32-bit library, you can
|
||||
add --disable-unicode to the "configure" command. This reduces the size of
|
||||
the libraries. It is not possible to configure one library with Unicode
|
||||
support, and another without, in the same configuration.
|
||||
|
||||
When Unicode support is available, the use of a UTF encoding still has to be
|
||||
enabled by setting the PCRE2_UTF option at run time or starting a pattern
|
||||
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
|
||||
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is
|
||||
not possible to use both --enable-unicode and --enable-ebcdic at the same
|
||||
time.
|
||||
|
||||
As well as supporting UTF strings, Unicode support includes support for the
|
||||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
Escape sequences such as \d and \w in patterns do not by default make use of
|
||||
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
|
||||
or starting a pattern with (*UCP).
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE2 can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
|
||||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE2 considers
|
||||
to be the end of a line (see above). However, the caller of PCRE2 can
|
||||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. In a pattern, the escape sequence \C matches a single code unit, even in a
|
||||
UTF mode. This can be dangerous because it breaks up multi-code-unit
|
||||
characters. You can build PCRE2 with the use of \C permanently locked out by
|
||||
adding --enable-never-backslash-C (note the upper case C) to the "configure"
|
||||
command. When \C is allowed by the library, individual applications can lock
|
||||
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
|
||||
|
||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre2_match() can supply their own value. There is more discussion on the
|
||||
pcre2api man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcre2stack man page.
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
||||
ignored, as 4-byte offsets are always used.
|
||||
|
||||
. You can build PCRE2 so that its internal match() function that is called from
|
||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||
blocks obtained from the heap to save data that would otherwise be saved on
|
||||
the stack. To build PCRE2 like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE2 runs more slowly in this mode, but it may
|
||||
be necessary in environments with limited stack sizes. This applies only to
|
||||
the normal execution of the pcre2_match() function; if JIT support is being
|
||||
successfully used, it is not relevant. Equally, it does not apply to
|
||||
pcre2_dfa_match(), which does not use deeply nested recursion. There is a
|
||||
discussion about stack sizes in the pcre2stack man page.
|
||||
|
||||
. For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
||||
not specify this option, pcre2_chartables.c is created as a copy of
|
||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||
information.
|
||||
|
||||
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
|
||||
character code (as opposed to ASCII/Unicode) by specifying
|
||||
|
||||
--enable-ebcdic --disable-unicode
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
|
||||
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||
instead of the default 0x15.
|
||||
|
||||
. If you specify --enable-debug, additional debugging code is included in the
|
||||
build. This option is intended for use by the PCRE2 maintainers.
|
||||
|
||||
. In environments where valgrind is installed, if you specify
|
||||
|
||||
--enable-valgrind
|
||||
|
||||
PCRE2 will use valgrind annotations to mark certain memory regions as
|
||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE2 itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||
is installed, if you specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
the build process implements a code coverage report for the test suite. The
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE2 for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE2. There is more information about coverage
|
||||
reporting in the "pcre2build" documentation.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||
specifying one or both of
|
||||
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default size (in bytes) of the internal buffer used by pcre2grep can be
|
||||
set by, for example:
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
|
||||
The value must be a plain integer. The default is 20480.
|
||||
|
||||
. It is possible to compile pcre2test so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
||||
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
|
||||
|
||||
If this is done, when pcre2test's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
pcre2test linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
Enabling libreadline causes the -lreadline option to be added to the
|
||||
pcre2test build. In many operating environments with a sytem-installed
|
||||
readline library this is sufficient. However, in some environments (e.g. if
|
||||
an unmodified distribution version of readline is in use), it may be
|
||||
necessary to specify something like LIBS="-lncurses" as well. This is
|
||||
because, to quote the readline INSTALL, "Readline uses the termcap functions,
|
||||
but does not link with the termcap or curses library itself, allowing
|
||||
applications which link with readline the to choose an appropriate library."
|
||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile the makefile that builds the library
|
||||
. src/config.h build-time configuration options for the library
|
||||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
. libpcre2-32.pc )
|
||||
. libpcre2-posix.pc )
|
||||
. libtool script that builds shared and/or static libraries
|
||||
|
||||
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
|
||||
tarballs under the names config.h.generic and pcre2.h.generic. These are
|
||||
provided for those who have to build PCRE2 without using "configure" or CMake.
|
||||
If you use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". This builds whichever of the
|
||||
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
|
||||
program called pcre2test. If you enabled JIT support with --enable-jit, another
|
||||
test program called pcre2_jit_test is built as well. If the 8-bit library is
|
||||
built, libpcre2-posix and the pcre2grep command are also built. Running
|
||||
"make" with the -j option may speed up compilation on multiprocessor systems.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE2
|
||||
tests are given below in a separate section of this document. The -j option of
|
||||
"make" can also be used when running the tests.
|
||||
|
||||
You can use "make install" to install PCRE2 into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcre2test
|
||||
pcre2grep (if 8-bit support is enabled)
|
||||
pcre2-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre2-8 (if 8-bit support is enabled)
|
||||
libpcre2-16 (if 16-bit support is enabled)
|
||||
libpcre2-32 (if 32-bit support is enabled)
|
||||
libpcre2-posix (if 8-bit support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
|
||||
Header files (include):
|
||||
pcre2.h
|
||||
pcre2posix.h
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcre2grep.1
|
||||
pcre2test.1
|
||||
pcre2-config.1
|
||||
pcre2.3
|
||||
pcre2*.3 (lots more pages, all starting "pcre2")
|
||||
|
||||
HTML documentation (share/doc/pcre2/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre2):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre2.txt (a concatenation of the man(3) pages)
|
||||
pcre2test.txt the pcre2test man page
|
||||
pcre2grep.txt the pcre2grep man page
|
||||
pcre2-config.txt the pcre2-config man page
|
||||
|
||||
If you want to remove PCRE2 from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
remove any directories, because these are often shared with other programs.
|
||||
|
||||
|
||||
Retrieving configuration information
|
||||
------------------------------------
|
||||
|
||||
Running "make install" installs the command pcre2-config, which can be used to
|
||||
recall information about the PCRE2 configuration and installation. For example:
|
||||
|
||||
pcre2-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre2-config --libs8
|
||||
|
||||
outputs information about where the 8-bit library is installed. This command
|
||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
obtain a list of possible arguments.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
about installed libraries. Instead of separate commands for each library, a
|
||||
single command is used. For example:
|
||||
|
||||
pkg-config --libs libpcre2-16
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries
|
||||
----------------
|
||||
|
||||
The default distribution builds PCRE2 as shared libraries and static libraries,
|
||||
as long as the operating system supports shared libraries. Shared library
|
||||
support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcre2test and pcre2grep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcre2grep and pcre2test are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE2 using static libraries only you must use --disable-shared when
|
||||
configuring it. For example:
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling using autotools
|
||||
-------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||
tables that assumes ASCII code. Cross-compiling with the default tables should
|
||||
not be a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
||||
|
||||
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE2
|
||||
------------
|
||||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the pcre2grep command.
|
||||
When JIT support is enabled, a third test program called pcre2_jit_test is
|
||||
built. Both the scripts and all the program tests are run if you obey "make
|
||||
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
|
||||
|
||||
The RunTest script runs the pcre2test test program (which is documented in its
|
||||
own man page) on each of the relevant testinput files in the testdata
|
||||
directory, and compares the output with the contents of the corresponding
|
||||
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||
from pcre2test. Other files whose names begin with "test" are used as working
|
||||
files in some tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options were selected. For
|
||||
example, the tests for UTF-8/16/32 features are run only when Unicode support
|
||||
is available. RunTest outputs a comment when it skips a test.
|
||||
|
||||
Many (but not all) of the tests that are not skipped are run twice if JIT
|
||||
support is available. On the second run, JIT compilation is forced. This
|
||||
testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
on the RunTest command line. To run pcre2test on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
RunTest 2 7 11
|
||||
|
||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
end), or a number preceded by ~ to exclude a test. For example:
|
||||
|
||||
Runtest 3-15 ~10
|
||||
|
||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||
except test 13. Whatever order the arguments are in, the tests are always run
|
||||
in numerical order.
|
||||
|
||||
You can also call RunTest with the single argument "list" to cause it to output
|
||||
a list of tests.
|
||||
|
||||
The test sequence starts with "test 0", which is a special test that has no
|
||||
input file, and whose output is not checked. This is because it will be
|
||||
different on different hardware and with different configurations. The test
|
||||
exists in order to exercise some of pcre2test's code that would not otherwise
|
||||
be run.
|
||||
|
||||
Tests 1 and 2 can always be run, as they expect only plain text strings (not
|
||||
UTF) and make no use of Unicode properties. The first test file can be fed
|
||||
directly into the perltest.sh script to check that Perl gives the same results.
|
||||
The only difference you should see is in the first few lines, where the Perl
|
||||
version is given instead of the PCRE2 version. The second set of tests check
|
||||
auxiliary functions, error detection, and run-time flags that are specific to
|
||||
PCRE2. It also uses the debugging flags to check some of the internals of
|
||||
pcre2_compile().
|
||||
|
||||
If you build PCRE2 with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. For example, where the comparison test output contains
|
||||
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
|
||||
cases. This is not a bug in PCRE2.
|
||||
|
||||
Test 3 checks pcre2_maketables(), the facility for building a set of character
|
||||
tables for a specific locale and using them instead of the default tables. The
|
||||
script uses the "locale" command to check for the availability of the "fr_FR",
|
||||
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
|
||||
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
|
||||
the list of available locales, the third test cannot be run, and a comment is
|
||||
output to say why. If running this test produces an error like this:
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
it means that the given locale is not available on your system, despite being
|
||||
listed by "locale". This does not mean that PCRE2 is broken. There are three
|
||||
alternative output files for the third test, because three different versions
|
||||
of the French locale have been encountered. The test passes if its output
|
||||
matches any one of them.
|
||||
|
||||
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
|
||||
with the perltest.sh script, and test 5 checking PCRE2-specific things.
|
||||
|
||||
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
|
||||
non-UTF mode and UTF-mode with Unicode property support, respectively.
|
||||
|
||||
Test 8 checks some internal offsets and code size features; it is run only when
|
||||
the default "link size" of 2 is set (in other cases the sizes change) and when
|
||||
Unicode support is enabled.
|
||||
|
||||
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
|
||||
16-bit and 32-bit modes. These are tests that generate different output in
|
||||
8-bit mode. Each pair are for general cases and Unicode support, respectively.
|
||||
Test 13 checks the handling of non-UTF characters greater than 255 by
|
||||
pcre2_dfa_match() in 16-bit and 32-bit modes.
|
||||
|
||||
Test 14 contains a number of tests that must not be run with JIT. They check,
|
||||
among other non-JIT things, the match-limiting features of the intepretive
|
||||
matcher.
|
||||
|
||||
Test 15 is run only when JIT support is not available. It checks that an
|
||||
attempt to use JIT has the expected behaviour.
|
||||
|
||||
Test 16 is run only when JIT support is available. It checks JIT complete and
|
||||
partial modes, match-limiting under JIT, and other JIT-specific features.
|
||||
|
||||
Tests 17 and 18 are run only in 8-bit mode. They check the POSIX interface to
|
||||
the 8-bit library, without and with Unicode support, respectively.
|
||||
|
||||
Test 19 checks the serialization functions by writing a set of compiled
|
||||
patterns to a file, and then reloading and checking them.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, a set of tables that is
|
||||
built into the library is used. The pcre2_maketables() function can be called
|
||||
by an application to create a new set of tables in the current locale. This are
|
||||
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
|
||||
compile context.
|
||||
|
||||
The source file called pcre2_chartables.c contains the default set of tables.
|
||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||
islower(), etc. to build the table sources. This means that the default C
|
||||
locale which is set for your system will control the contents of these default
|
||||
tables. You can change the default tables by editing pcre2_chartables.c and
|
||||
then re-building PCRE2. If you do this, you should take care to ensure that the
|
||||
file does not get automatically re-generated. The best way to do this is to
|
||||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre2_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256. The final 256-byte table has bits indicating various character types,
|
||||
as follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE2 to malfunction.
|
||||
|
||||
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below.
|
||||
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||
ASCII coding; unless --enable-rebuild-chartables is
|
||||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
src/pcre2posix.h header for the external POSIX wrapper API
|
||||
src/pcre2_internal.h header for internal use
|
||||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
|
||||
sljit/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
src/pcre2demo.c simple demonstration of coding calls to PCRE2
|
||||
src/pcre2grep.c source of a grep utility that uses PCRE2
|
||||
src/pcre2test.c comprehensive test program
|
||||
src/pcre2_printint.c part of pcre2test
|
||||
src/pcre2_jit_test.c JIT test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE2
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE2
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE2
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcre2grep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE2
|
||||
doc/*.1 man page sources for pcre2grep and pcre2test
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for building PCRE2 "by hand"
|
||||
|
||||
pcre2.h.generic ) a version of the public PCRE2 header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 16 October 2015
|
633
pcre2/RunGrepTest
Executable file
633
pcre2/RunGrepTest
Executable file
|
@ -0,0 +1,633 @@
|
|||
#! /bin/sh
|
||||
|
||||
# Run pcre2grep tests. The assumption is that the PCRE2 tests check the library
|
||||
# itself. What we are checking here is the file handling and options that are
|
||||
# supported by pcre2grep. This script must be run in the build directory.
|
||||
|
||||
# Set the C locale, so that sort(1) behaves predictably.
|
||||
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
# Remove any non-default colouring and aliases that the caller may have set.
|
||||
|
||||
unset PCRE2GREP_COLOUR PCRE2GREP_COLOR
|
||||
unset cp ls mv rm
|
||||
|
||||
# Remember the current (build) directory, set the program to be tested, and
|
||||
# valgrind settings when requested.
|
||||
|
||||
builddir=`pwd`
|
||||
pcre2grep=$builddir/pcre2grep
|
||||
pcre2test=$builddir/pcre2test
|
||||
|
||||
if [ ! -x $pcre2grep ] ; then
|
||||
echo "** $pcre2grep does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x $pcre2test ] ; then
|
||||
echo "** $pcre2test does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
valgrind=
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";;
|
||||
*) echo "RunGrepTest: Unknown argument $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
pcre2grep_version=`$pcre2grep -V`
|
||||
if [ "$valgrind" = "" ] ; then
|
||||
echo "Testing $pcre2grep_version"
|
||||
else
|
||||
echo "Testing $pcre2grep_version using valgrind"
|
||||
fi
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems have a diff
|
||||
# that lacks a -u option. Try to deal with this; better do the test for the -b
|
||||
# option as well.
|
||||
|
||||
cf="diff"
|
||||
diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
|
||||
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
|
||||
|
||||
# If this test is being run from "make check", $srcdir will be set. If not, set
|
||||
# it to the current or parent directory, whichever one contains the test data.
|
||||
# Subsequently, we run most of the pcre2grep tests in the source directory so
|
||||
# that the file names in the output are always the same.
|
||||
|
||||
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
|
||||
if [ -d "./testdata" ] ; then
|
||||
srcdir=.
|
||||
elif [ -d "../testdata" ] ; then
|
||||
srcdir=..
|
||||
else
|
||||
echo "Cannot find the testdata directory"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for the availability of UTF-8 support
|
||||
|
||||
$pcre2test -C unicode >/dev/null
|
||||
utf8=$?
|
||||
|
||||
# Check default newline convention. If it does not include LF, force LF.
|
||||
|
||||
nl=`$pcre2test -C newline`
|
||||
if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then
|
||||
pcre2grep="$pcre2grep -N LF"
|
||||
echo "Default newline setting forced to LF"
|
||||
fi
|
||||
|
||||
# ------ Function to run and check a special pcre2grep arguments test -------
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind $pcre2grep $1 >>testtrygrep 2>&1
|
||||
if [ $? -ne $2 ] ; then
|
||||
echo "** pcre2grep $1 failed - check testtrygrep"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ------ Normal tests ------
|
||||
|
||||
echo "Testing pcre2grep main features"
|
||||
|
||||
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
|
||||
echo seventeen >testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "======== STDERR ========" >>testtrygrep
|
||||
cat teststderrgrep >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Fx "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -F "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -F -e DATA -e "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -c lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -c -l lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
|
||||
echo testdata/grepinput3 >testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >testtemp2grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
|
||||
echo "a" >testtemp1grep
|
||||
echo "aaaaa" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 108 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -lq PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 109 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# These tests require UTF-8 support
|
||||
|
||||
if [ $utf8 -ne 0 ] ; then
|
||||
echo "Testing pcre2grep UTF-8 features"
|
||||
|
||||
echo "---------------------------- Test U1 ------------------------------" >testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
else
|
||||
echo "Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library"
|
||||
fi
|
||||
|
||||
|
||||
# We go to some contortions to try to ensure that the tests for the various
|
||||
# newline settings will work in environments where the normal newline sequence
|
||||
# is not \n. Do not use exported files, whose line endings might be changed.
|
||||
# Instead, create an input file using printf so that its contents are exactly
|
||||
# what we want. Note the messy fudge to get printf to write a string that
|
||||
# starts with a hyphen. These tests are run in the build directory.
|
||||
|
||||
echo "Testing pcre2grep newline settings"
|
||||
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
|
||||
|
||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
|
||||
$valgrind $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
|
||||
pattern=`printf 'def\rjkl'`
|
||||
$valgrind $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# Finally, some tests to exercise code that is not tested above, just to be
|
||||
# sure that it runs OK. Doing this improves the coverage statistics. The output
|
||||
# is not checked.
|
||||
|
||||
echo "Testing miscellaneous pcre2grep arguments (unchecked)"
|
||||
echo '' >testtrygrep
|
||||
checkspecial '-xxxxx' 2
|
||||
checkspecial '--help' 0
|
||||
checkspecial '--line-buffered --colour=auto abc /dev/null' 1
|
||||
|
||||
# Clean up local working files
|
||||
rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
|
||||
|
||||
exit 0
|
||||
|
||||
# End
|
850
pcre2/RunTest
Executable file
850
pcre2/RunTest
Executable file
|
@ -0,0 +1,850 @@
|
|||
#! /bin/sh
|
||||
|
||||
###############################################################################
|
||||
# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
|
||||
# selected, depending on which build-time options were used.
|
||||
#
|
||||
# When JIT support is available, all appropriate tests are run with and without
|
||||
# JIT, unless "-nojit" is given on the command line. There are also two tests
|
||||
# for JIT-specific features, one to be run when JIT support is available
|
||||
# (unless "-nojit" is specified), and one when it is not.
|
||||
#
|
||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||
# command line.
|
||||
#
|
||||
# As well as "-nojit", "-8", "-16", and "-32", arguments for this script are
|
||||
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
|
||||
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||
# in numerical order.
|
||||
#
|
||||
# Inappropriate tests are automatically skipped (with a comment to say so). For
|
||||
# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
|
||||
# support is compiled, test 15 is skipped.
|
||||
#
|
||||
# Other arguments can be one of the words "-valgrind", "-valgrind-log", or
|
||||
# "-sim" followed by an argument to run cross-compiled executables under a
|
||||
# simulator, for example:
|
||||
#
|
||||
# RunTest 3 -sim "qemu-arm -s 8388608"
|
||||
#
|
||||
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
|
||||
# be given without the leading "-" character.
|
||||
#
|
||||
# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need
|
||||
# very much more stack than normal. In environments where the stack can be
|
||||
# set at runtime, -bigstack sets a gigantic stack.
|
||||
#
|
||||
# There are two special cases where only one argument is allowed:
|
||||
#
|
||||
# If the first and only argument is "ebcdic", the script runs the special
|
||||
# EBCDIC test that can be useful for checking certain EBCDIC features, even
|
||||
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
|
||||
# this test to be run.
|
||||
#
|
||||
# If the script is obeyed as "RunTest list", a list of available tests is
|
||||
# output, but none of them are run.
|
||||
###############################################################################
|
||||
|
||||
# Define test titles in variables so that they can be output as a list. Some
|
||||
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
||||
|
||||
title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)"
|
||||
title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
|
||||
title2="Test 2: API, errors, internals, and non-Perl stuff"
|
||||
title3="Test 3: Locale-specific features"
|
||||
title4A="Test 4: UTF"
|
||||
title4B=" and Unicode property support (compatible with Perl >= 5.10)"
|
||||
title5A="Test 5: API, internals, and non-Perl stuff for UTF"
|
||||
title5B=" and UCP support"
|
||||
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
|
||||
title7A="Test 7: DFA matching with UTF"
|
||||
title7B=" and Unicode property support"
|
||||
title8="Test 8: Internal offsets and code size tests"
|
||||
title9="Test 9: Specials for the basic 8-bit library"
|
||||
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
|
||||
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
||||
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
||||
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
||||
title14="Test 14: DFA specials for UTF and UCP support"
|
||||
title15="Test 15: Non-JIT limits and other non-JIT tests"
|
||||
title16="Test 16: JIT-specific features when JIT is not available"
|
||||
title17="Test 17: JIT-specific features when JIT is available"
|
||||
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
|
||||
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
|
||||
title20="Test 20: Serialization tests"
|
||||
title21="Test 21: \C tests without UTF (supported for DFA matching)"
|
||||
title22="Test 22: \C tests with UTF (not supported for DFA matching)"
|
||||
title23="Test 23: \C disabled test"
|
||||
maxtest=23
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title0
|
||||
echo $title1
|
||||
echo $title2 "(not UTF or UCP)"
|
||||
echo $title3
|
||||
echo $title4A $title4B
|
||||
echo $title5A $title5B
|
||||
echo $title6
|
||||
echo $title7A $title7B
|
||||
echo $title8
|
||||
echo $title9
|
||||
echo $title10
|
||||
echo $title11
|
||||
echo $title12
|
||||
echo $title13
|
||||
echo $title14
|
||||
echo $title15
|
||||
echo $title16
|
||||
echo $title17
|
||||
echo $title18
|
||||
echo $title19
|
||||
echo $title20
|
||||
echo $title21
|
||||
echo $title22
|
||||
echo $title23
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems
|
||||
# have a diff that lacks a -u option. Try to deal with this.
|
||||
|
||||
cf="diff"
|
||||
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||
|
||||
# Find the test data
|
||||
|
||||
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
|
||||
testdata="$srcdir/testdata"
|
||||
elif [ -d "./testdata" ] ; then
|
||||
testdata=./testdata
|
||||
elif [ -d "../testdata" ] ; then
|
||||
testdata=../testdata
|
||||
else
|
||||
echo "Cannot find the testdata directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# ------ Function to check results of a test -------
|
||||
|
||||
# This function is called with three parameters:
|
||||
#
|
||||
# $1 the value of $? after a call to pcre2test
|
||||
# $2 the suffix of the output file to compare with
|
||||
# $3 the $opt value (empty, -jit, or -dfa)
|
||||
#
|
||||
# Note: must define using name(), not "function name", for Solaris.
|
||||
|
||||
checkresult()
|
||||
{
|
||||
if [ $1 -ne 0 ] ; then
|
||||
echo "** pcre2test failed - check testtry"
|
||||
exit 1
|
||||
fi
|
||||
case "$3" in
|
||||
-jit) with=" with JIT";;
|
||||
-dfa) with=" with DFA";;
|
||||
*) with="";;
|
||||
esac
|
||||
$cf $testdata/testoutput$2 testtry
|
||||
if [ $? != 0 ] ; then
|
||||
echo ""
|
||||
echo "** Test $2 failed$with"
|
||||
exit 1
|
||||
fi
|
||||
echo " OK$with"
|
||||
}
|
||||
|
||||
|
||||
# ------ Function to run and check a special pcre2test arguments test -------
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind $vjs ./pcre2test $1 >>testtry
|
||||
if [ $? -ne 0 ] ; then
|
||||
echo "** pcre2test $1 failed - check testtry"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
# ------ Special EBCDIC Test -------
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
|
||||
$valgrind ./pcre2test -C ebcdic >/dev/null
|
||||
ebcdic=$?
|
||||
if [ $ebcdic -ne 1 ] ; then
|
||||
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
|
||||
exit 1
|
||||
fi
|
||||
for opt in "" "-dfa"; do
|
||||
./pcre2test -q $opt $testdata/testinputEBC >testtry
|
||||
checkresult $? EBC "$opt"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
# ------ Normal Tests ------
|
||||
|
||||
# Default values
|
||||
|
||||
arg8=
|
||||
arg16=
|
||||
arg32=
|
||||
nojit=
|
||||
bigstack=
|
||||
sim=
|
||||
skip=
|
||||
valgrind=
|
||||
vjs=
|
||||
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
|
||||
# Process options and select which tests to run; for those that are explicitly
|
||||
# requested, check that the necessary optional facilities are available.
|
||||
|
||||
do0=no
|
||||
do1=no
|
||||
do2=no
|
||||
do3=no
|
||||
do4=no
|
||||
do5=no
|
||||
do6=no
|
||||
do7=no
|
||||
do8=no
|
||||
do9=no
|
||||
do10=no
|
||||
do11=no
|
||||
do12=no
|
||||
do13=no
|
||||
do14=no
|
||||
do15=no
|
||||
do16=no
|
||||
do17=no
|
||||
do18=no
|
||||
do19=no
|
||||
do20=no
|
||||
do21=no
|
||||
do22=no
|
||||
do23=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
0) do0=yes;;
|
||||
1) do1=yes;;
|
||||
2) do2=yes;;
|
||||
3) do3=yes;;
|
||||
4) do4=yes;;
|
||||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
7) do7=yes;;
|
||||
8) do8=yes;;
|
||||
9) do9=yes;;
|
||||
10) do10=yes;;
|
||||
11) do11=yes;;
|
||||
12) do12=yes;;
|
||||
13) do13=yes;;
|
||||
14) do14=yes;;
|
||||
15) do15=yes;;
|
||||
16) do16=yes;;
|
||||
17) do17=yes;;
|
||||
18) do18=yes;;
|
||||
19) do19=yes;;
|
||||
20) do20=yes;;
|
||||
21) do21=yes;;
|
||||
22) do22=yes;;
|
||||
23) do23=yes;;
|
||||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
bigstack|-bigstack) bigstack=yes;;
|
||||
nojit|-nojit) nojit=yes;;
|
||||
sim|-sim) shift; sim=$1;;
|
||||
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
|
||||
~*)
|
||||
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
|
||||
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
|
||||
else
|
||||
echo "Unknown option or test selector '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*-*)
|
||||
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
|
||||
tf=`expr "$1" : '\([0-9]*\)'`
|
||||
tt=`expr "$1" : '.*-\([0-9]*\)'`
|
||||
if [ "$tt" = "" ] ; then tt=$maxtest; fi
|
||||
if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
while expr "$tf" "<=" "$tt" >/dev/null; do
|
||||
eval do${tf}=yes
|
||||
tf=`expr $tf + 1`
|
||||
done
|
||||
else
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*) echo "Unknown option or test selector '$1'"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Find which optional facilities are available.
|
||||
|
||||
$sim ./pcre2test -C linksize >/dev/null
|
||||
link_size=$?
|
||||
if [ $link_size -lt 2 ] ; then
|
||||
echo "RunTest: Failed to find internal link size"
|
||||
exit 1
|
||||
fi
|
||||
if [ $link_size -gt 4 ] ; then
|
||||
echo "RunTest: Failed to find internal link size"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# If it is possible to set the system stack size, arrange to set a value for
|
||||
# test 2, which needs more than the even the Linux default when PCRE2 has been
|
||||
# compiled by gcc with -fsanitize=address. When the compiler is clang, sanitize
|
||||
# options require an even bigger stack for test 2, and an increased stack for
|
||||
# some of the other tests.
|
||||
|
||||
$sim ./pcre2test -S 1 /dev/null /dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$bigstack" = "" ] ; then
|
||||
test2stack="-S 16"
|
||||
defaultstack=""
|
||||
else
|
||||
test2stack="-S 1024"
|
||||
defaultstack="-S 64"
|
||||
fi
|
||||
else
|
||||
test2stack=""
|
||||
defaultstack=""
|
||||
fi
|
||||
|
||||
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
||||
# one need be.
|
||||
|
||||
$sim ./pcre2test -C pcre2-8 >/dev/null
|
||||
support8=$?
|
||||
$sim ./pcre2test -C pcre2-16 >/dev/null
|
||||
support16=$?
|
||||
$sim ./pcre2test -C pcre2-32 >/dev/null
|
||||
support32=$?
|
||||
|
||||
# \C may be disabled
|
||||
|
||||
$sim ./pcre2test -C backslash-C >/dev/null
|
||||
supportBSC=$?
|
||||
|
||||
# Initialize all bitsizes skipped
|
||||
|
||||
test8=skip
|
||||
test16=skip
|
||||
test32=skip
|
||||
|
||||
# If no bitsize arguments, select all that are available
|
||||
|
||||
if [ "$arg8$arg16$arg32" = "" ] ; then
|
||||
if [ $support8 -ne 0 ] ; then
|
||||
test8=-8
|
||||
fi
|
||||
if [ $support16 -ne 0 ] ; then
|
||||
test16=-16
|
||||
fi
|
||||
if [ $support32 -ne 0 ] ; then
|
||||
test32=-32
|
||||
fi
|
||||
|
||||
# Otherwise, select requested bit sizes
|
||||
|
||||
else
|
||||
if [ "$arg8" = yes ] ; then
|
||||
if [ $support8 -eq 0 ] ; then
|
||||
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test8=-8
|
||||
fi
|
||||
if [ "$arg16" = yes ] ; then
|
||||
if [ $support16 -eq 0 ] ; then
|
||||
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test16=-16
|
||||
fi
|
||||
if [ "$arg32" = yes ] ; then
|
||||
if [ $support32 -eq 0 ] ; then
|
||||
echo "Cannot run 32-bit library tests: 32-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test32=-32
|
||||
fi
|
||||
fi
|
||||
|
||||
# UTF support is implied by Unicode support, and it always applies to all bit
|
||||
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
|
||||
# UTF-32 support.
|
||||
|
||||
$sim ./pcre2test -C unicode >/dev/null
|
||||
utf=$?
|
||||
|
||||
# When JIT is used with valgrind, we need to set up valgrind suppressions as
|
||||
# otherwise there are a lot of false positive valgrind reports when the
|
||||
# the hardware supports SSE2.
|
||||
|
||||
jitopt=
|
||||
$sim ./pcre2test -C jit >/dev/null
|
||||
jit=$?
|
||||
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-jit
|
||||
if [ "$valgrind" != "" ] ; then
|
||||
vjs="--suppressions=$testdata/valgrind-jit.supp"
|
||||
fi
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all. Those that are not
|
||||
# relevant will be automatically skipped.
|
||||
|
||||
if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
|
||||
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
||||
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
|
||||
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no \
|
||||
]; then
|
||||
do0=yes
|
||||
do1=yes
|
||||
do2=yes
|
||||
do3=yes
|
||||
do4=yes
|
||||
do5=yes
|
||||
do6=yes
|
||||
do7=yes
|
||||
do8=yes
|
||||
do9=yes
|
||||
do10=yes
|
||||
do11=yes
|
||||
do12=yes
|
||||
do13=yes
|
||||
do14=yes
|
||||
do15=yes
|
||||
do16=yes
|
||||
do17=yes
|
||||
do18=yes
|
||||
do19=yes
|
||||
do20=yes
|
||||
do21=yes
|
||||
do22=yes
|
||||
do23=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
# only of explicit skips.
|
||||
|
||||
for i in $skip; do eval do$i=no; done
|
||||
|
||||
# Show which release and which test data
|
||||
|
||||
echo ""
|
||||
echo PCRE2 C library tests using test data from $testdata
|
||||
$sim ./pcre2test /dev/null
|
||||
echo ""
|
||||
|
||||
for bmode in "$test8" "$test16" "$test32"; do
|
||||
case "$bmode" in
|
||||
skip) continue;;
|
||||
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
|
||||
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
|
||||
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
|
||||
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
|
||||
-8) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
|
||||
esac
|
||||
|
||||
# Test 0 is a special test. Its output is not checked, because it will
|
||||
# be different on different hardware and with different configurations.
|
||||
# Running this test just exercises the code.
|
||||
|
||||
if [ $do0 = yes ] ; then
|
||||
echo $title0
|
||||
echo '/abc/jit,memory' >testSinput
|
||||
echo ' abc' >>testSinput
|
||||
echo '' >testtry
|
||||
checkspecial '-C'
|
||||
checkspecial '--help'
|
||||
checkspecial '-S 1 -t 10 testSinput'
|
||||
echo " OK"
|
||||
fi
|
||||
|
||||
# Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8
|
||||
|
||||
if [ $do1 = yes ] ; then
|
||||
echo $title1
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput1 testtry
|
||||
checkresult $? 1 "$opt"
|
||||
done
|
||||
fi
|
||||
|
||||
# PCRE2 tests that are not Perl-compatible: API, errors, internals
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
echo $title2 "(excluding UTF-$bits)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
echo " "
|
||||
echo "** Test 2 requires a lot of stack. If it has crashed with a"
|
||||
echo "** segmentation fault, it may be that you do not have enough"
|
||||
echo "** stack available by default. Please see the 'pcre2stack' man"
|
||||
echo "** page for a discussion of PCRE2's stack usage."
|
||||
echo " "
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Locale-specific tests, provided that either the "fr_FR", "fr_CA", "french"
|
||||
# or "fr" locale is available. The first two are Unix-like standards; the
|
||||
# last two are for Windows. Unfortunately, different versions of the French
|
||||
# locale give different outputs for some items. This test passes if the
|
||||
# output matches any one of the alternative output files.
|
||||
|
||||
if [ $do3 = yes ] ; then
|
||||
locale=
|
||||
|
||||
# In some environments locales that are listed by the "locale -a"
|
||||
# command do not seem to work with setlocale(). Therefore, we do
|
||||
# a preliminary test to see if pcre2test can set one before going
|
||||
# on to use it.
|
||||
|
||||
for loc in 'fr_FR' 'french' 'fr' 'fr_CA'; do
|
||||
locale -a | grep "^$loc\$" >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "/a/locale=$loc" | \
|
||||
$sim $valgrind ./pcre2test -q $bmode | \
|
||||
grep "Failed to set locale" >/dev/null
|
||||
if [ $? -ne 0 ] ; then
|
||||
locale=$loc
|
||||
if [ "$locale" = "fr_FR" ] ; then
|
||||
infile=$testdata/testinput3
|
||||
outfile=$testdata/testoutput3
|
||||
outfile2=$testdata/testoutput3A
|
||||
outfile3=$testdata/testoutput3B
|
||||
else
|
||||
infile=test3input
|
||||
outfile=test3output
|
||||
outfile2=test3outputA
|
||||
outfile3=test3outputB
|
||||
sed "s/fr_FR/$loc/" $testdata/testinput3 >test3input
|
||||
sed "s/fr_FR/$loc/" $testdata/testoutput3 >test3output
|
||||
sed "s/fr_FR/$loc/" $testdata/testoutput3A >test3outputA
|
||||
sed "s/fr_FR/$loc/" $testdata/testoutput3B >test3outputB
|
||||
fi
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$locale" != "" ] ; then
|
||||
echo $title3 "(using '$locale' locale)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $infile testtry
|
||||
if [ $? = 0 ] ; then
|
||||
case "$opt" in
|
||||
-jit) with=" with JIT";;
|
||||
*) with="";;
|
||||
esac
|
||||
if $cf $outfile testtry >teststdout || \
|
||||
$cf $outfile2 testtry >teststdout || \
|
||||
$cf $outfile3 testtry >teststdout
|
||||
then
|
||||
echo " OK$with"
|
||||
else
|
||||
echo "** Locale test did not run successfully$with. The output did not match"
|
||||
echo " $outfile, $outfile2 or $outfile3."
|
||||
echo " This may mean that there is a problem with the locale settings rather"
|
||||
echo " than a bug in PCRE2."
|
||||
exit 1
|
||||
fi
|
||||
else exit 1
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr_CA',"
|
||||
echo "'fr' or 'french' locales can be set, or the \"locale\" command is"
|
||||
echo "not available to check for them."
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for UTF and Unicode property support
|
||||
|
||||
if [ $do4 = yes ] ; then
|
||||
echo ${title4A}-${bits}${title4B}
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput4 testtry
|
||||
checkresult $? 4 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $do5 = yes ] ; then
|
||||
echo ${title5A}-${bits}$title5B
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput5 testtry
|
||||
checkresult $? 5 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for DFA matching support
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo $title6
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput6 testtry
|
||||
checkresult $? 6 ""
|
||||
fi
|
||||
|
||||
if [ $do7 = yes ] ; then
|
||||
echo ${title7A}-${bits}$title7B
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput7 testtry
|
||||
checkresult $? 7 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test of internal offsets and code sizes. This test is run only when there
|
||||
# is UTF/UCP support. The actual tests are mostly the same as in some of the
|
||||
# above, but in this test we inspect some offsets and sizes. This is a
|
||||
# doublecheck for the maintainer, just in case something changes unexpectely.
|
||||
# The output from this test is different in 8-bit, 16-bit, and 32-bit modes
|
||||
# and for different link sizes, so there are different output files for each
|
||||
# mode and link size.
|
||||
|
||||
if [ $do8 = yes ] ; then
|
||||
echo $title8
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput8 testtry
|
||||
checkresult $? 8-$bits-$link_size ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 8-bit-specific features
|
||||
|
||||
if [ "$do9" = yes ] ; then
|
||||
echo $title9
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput9 testtry
|
||||
checkresult $? 9 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for UTF-8 and UCP 8-bit-specific features
|
||||
|
||||
if [ "$do10" = yes ] ; then
|
||||
echo $title10
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput10 testtry
|
||||
checkresult $? 10 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
|
||||
|
||||
if [ $do11 = yes ] ; then
|
||||
echo $title11
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput11 testtry
|
||||
checkresult $? 11-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
|
||||
# is different for the two widths.
|
||||
|
||||
if [ $do12 = yes ] ; then
|
||||
echo $title12
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput12 testtry
|
||||
checkresult $? 12-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16/32-bit-specific features in DFA non-UTF modes
|
||||
|
||||
if [ $do13 = yes ] ; then
|
||||
echo $title13
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput13 testtry
|
||||
checkresult $? 13 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for DFA UTF and UCP features. Output is different for the different widths.
|
||||
|
||||
if [ $do14 = yes ] ; then
|
||||
echo $title14
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
|
||||
checkresult $? 14-$bits ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test non-JIT match and recursion limits
|
||||
|
||||
if [ $do15 = yes ] ; then
|
||||
echo $title15
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
|
||||
checkresult $? 15 ""
|
||||
fi
|
||||
|
||||
# Test JIT-specific features when JIT is not available
|
||||
|
||||
if [ $do16 = yes ] ; then
|
||||
echo $title16
|
||||
if [ $jit -ne 0 ] ; then
|
||||
echo " Skipped because JIT is available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
|
||||
checkresult $? 16 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test JIT-specific features when JIT is available
|
||||
|
||||
if [ $do17 = yes ] ; then
|
||||
echo $title17
|
||||
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||
echo " Skipped because JIT is not available or nojit was specified"
|
||||
else
|
||||
$sim $valgrind $vjs ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
|
||||
checkresult $? 17 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for the POSIX interface without UTF/UCP (8-bit only)
|
||||
|
||||
if [ $do18 = yes ] ; then
|
||||
echo $title18
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
|
||||
checkresult $? 18 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for the POSIX interface with UTF/UCP (8-bit only)
|
||||
|
||||
if [ $do19 = yes ] ; then
|
||||
echo $title19
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
|
||||
checkresult $? 19 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Serialization tests
|
||||
|
||||
if [ $do20 = yes ] ; then
|
||||
echo $title20
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput20 testtry
|
||||
checkresult $? 20 ""
|
||||
fi
|
||||
|
||||
# \C tests without UTF - DFA matching is supported
|
||||
|
||||
if [ "$do21" = yes ] ; then
|
||||
echo $title21
|
||||
if [ $supportBSC -eq 0 ] ; then
|
||||
echo " Skipped because \C is disabled"
|
||||
else
|
||||
for opt in "" $jitopt -dfa; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput21 testtry
|
||||
checkresult $? 21 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# \C tests with UTF - DFA matching is not supported for \C in UTF mode
|
||||
|
||||
if [ "$do22" = yes ] ; then
|
||||
echo $title22
|
||||
if [ $supportBSC -eq 0 ] ; then
|
||||
echo " Skipped because \C is disabled"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput22 testtry
|
||||
checkresult $? 22-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test when \C is disabled
|
||||
|
||||
if [ "$do23" = yes ] ; then
|
||||
echo $title23
|
||||
if [ $supportBSC -ne 0 ] ; then
|
||||
echo " Skipped because \C is not disabled"
|
||||
else
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput23 testtry
|
||||
checkresult $? 23 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# End of loop for 8/16/32-bit tests
|
||||
done
|
||||
|
||||
# Clean up local working files
|
||||
rm -f testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
|
||||
|
||||
# End
|
1531
pcre2/aclocal.m4
vendored
Normal file
1531
pcre2/aclocal.m4
vendored
Normal file
File diff suppressed because it is too large
Load diff
270
pcre2/ar-lib
Executable file
270
pcre2/ar-lib
Executable file
|
@ -0,0 +1,270 @@
|
|||
#! /bin/sh
|
||||
# Wrapper for Microsoft lib.exe
|
||||
|
||||
me=ar-lib
|
||||
scriptversion=2012-03-01.08; # UTC
|
||||
|
||||
# Copyright (C) 2010-2014 Free Software Foundation, Inc.
|
||||
# Written by Peter Rosin <peda@lysator.liu.se>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
|
||||
# func_error message
|
||||
func_error ()
|
||||
{
|
||||
echo "$me: $1" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv in
|
||||
mingw)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_at_file at_file operation archive
|
||||
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
|
||||
# for each of them.
|
||||
# When interpreting the content of the @FILE, do NOT use func_file_conv,
|
||||
# since the user would need to supply preconverted file names to
|
||||
# binutils ar, at least for MinGW.
|
||||
func_at_file ()
|
||||
{
|
||||
operation=$2
|
||||
archive=$3
|
||||
at_file_contents=`cat "$1"`
|
||||
eval set x "$at_file_contents"
|
||||
shift
|
||||
|
||||
for member
|
||||
do
|
||||
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
|
||||
done
|
||||
}
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
func_error "no command. Try '$0 --help' for more information."
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<EOF
|
||||
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
|
||||
|
||||
Members may be specified in a file named with @FILE.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "$me, version $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
if test $# -lt 3; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
|
||||
AR=$1
|
||||
shift
|
||||
while :
|
||||
do
|
||||
if test $# -lt 2; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
case $1 in
|
||||
-lib | -LIB \
|
||||
| -ltcg | -LTCG \
|
||||
| -machine* | -MACHINE* \
|
||||
| -subsystem* | -SUBSYSTEM* \
|
||||
| -verbose | -VERBOSE \
|
||||
| -wx* | -WX* )
|
||||
AR="$AR $1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
action=$1
|
||||
shift
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
orig_archive=$1
|
||||
shift
|
||||
func_file_conv "$orig_archive"
|
||||
archive=$file
|
||||
|
||||
# strip leading dash in $action
|
||||
action=${action#-}
|
||||
|
||||
delete=
|
||||
extract=
|
||||
list=
|
||||
quick=
|
||||
replace=
|
||||
index=
|
||||
create=
|
||||
|
||||
while test -n "$action"
|
||||
do
|
||||
case $action in
|
||||
d*) delete=yes ;;
|
||||
x*) extract=yes ;;
|
||||
t*) list=yes ;;
|
||||
q*) quick=yes ;;
|
||||
r*) replace=yes ;;
|
||||
s*) index=yes ;;
|
||||
S*) ;; # the index is always updated implicitly
|
||||
c*) create=yes ;;
|
||||
u*) ;; # TODO: don't ignore the update modifier
|
||||
v*) ;; # TODO: don't ignore the verbose modifier
|
||||
*)
|
||||
func_error "unknown action specified"
|
||||
;;
|
||||
esac
|
||||
action=${action#?}
|
||||
done
|
||||
|
||||
case $delete$extract$list$quick$replace,$index in
|
||||
yes,* | ,yes)
|
||||
;;
|
||||
yesyes*)
|
||||
func_error "more than one action specified"
|
||||
;;
|
||||
*)
|
||||
func_error "no action specified"
|
||||
;;
|
||||
esac
|
||||
|
||||
if test -n "$delete"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -REMOVE "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
elif test -n "$extract"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
if test $# -gt 0; then
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -EXTRACT "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
|
||||
do
|
||||
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
|
||||
done
|
||||
fi
|
||||
|
||||
elif test -n "$quick$replace"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
if test -z "$create"; then
|
||||
echo "$me: creating $orig_archive"
|
||||
fi
|
||||
orig_archive=
|
||||
else
|
||||
orig_archive=$archive
|
||||
fi
|
||||
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_file_conv "${1#@}"
|
||||
set x "$@" "@$file"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
set x "$@" "$file"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
shift
|
||||
done
|
||||
|
||||
if test -n "$orig_archive"; then
|
||||
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
|
||||
else
|
||||
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
|
||||
fi
|
||||
|
||||
elif test -n "$list"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
$AR -NOLOGO -LIST "$archive" || exit $?
|
||||
fi
|
22
pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
22
pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
|
@ -0,0 +1,22 @@
|
|||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
17
pcre2/cmake/FindEditline.cmake
Normal file
17
pcre2/cmake/FindEditline.cmake
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Modified from FindReadline.cmake (PH Feb 2012)
|
||||
|
||||
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(EDITLINE_FOUND TRUE)
|
||||
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
|
||||
/usr/include/editline
|
||||
/usr/include/edit/readline
|
||||
/usr/include/readline
|
||||
)
|
||||
|
||||
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
|
||||
|
||||
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
|
58
pcre2/cmake/FindPackageHandleStandardArgs.cmake
Normal file
58
pcre2/cmake/FindPackageHandleStandardArgs.cmake
Normal file
|
@ -0,0 +1,58 @@
|
|||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
|
||||
# This macro is intended to be used in FindXXX.cmake modules files.
|
||||
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
|
||||
# it also sets the <UPPERCASED_NAME>_FOUND variable.
|
||||
# The package is found if all variables listed are TRUE.
|
||||
# Example:
|
||||
#
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
|
||||
#
|
||||
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
|
||||
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
|
||||
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
|
||||
# independent whether QUIET was used or not.
|
||||
# If it is found, the location is reported using the VAR1 argument, so
|
||||
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
|
||||
# If the second argument is DEFAULT_MSG, the message in the failure case will
|
||||
# be "Could NOT find LibXml2", if you don't like this message you can specify
|
||||
# your own custom failure message there.
|
||||
|
||||
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
|
||||
|
||||
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
|
||||
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
|
||||
STRING(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
|
||||
SET(${_NAME_UPPER}_FOUND TRUE)
|
||||
IF(NOT ${_VAR1})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_VAR1})
|
||||
|
||||
FOREACH(_CURRENT_VAR ${ARGN})
|
||||
IF(NOT ${_CURRENT_VAR})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_CURRENT_VAR})
|
||||
ENDFOREACH(_CURRENT_VAR)
|
||||
|
||||
IF (${_NAME_UPPER}_FOUND)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ELSE (${_NAME_UPPER}_FOUND)
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "${_FAIL_MESSAGE}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ENDIF (${_NAME_UPPER}_FOUND)
|
||||
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
|
29
pcre2/cmake/FindReadline.cmake
Normal file
29
pcre2/cmake/FindReadline.cmake
Normal file
|
@ -0,0 +1,29 @@
|
|||
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
|
||||
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
|
||||
# --> BSD licensed
|
||||
#
|
||||
# GNU Readline library finder
|
||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(READLINE_FOUND TRUE)
|
||||
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
|
||||
/usr/include/readline
|
||||
)
|
||||
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
|
||||
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
|
||||
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
347
pcre2/compile
Executable file
347
pcre2/compile
Executable file
|
@ -0,0 +1,347 @@
|
|||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand '-c -o'.
|
||||
|
||||
scriptversion=2012-10-14.11; # UTC
|
||||
|
||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
nl='
|
||||
'
|
||||
|
||||
# We need space, tab and new line, in precisely that order. Quoting is
|
||||
# there to prevent tools from complaining about whitespace usage.
|
||||
IFS=" "" $nl"
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file lazy
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts. If the determined conversion
|
||||
# type is listed in (the comma separated) LAZY, no conversion will
|
||||
# take place.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv/,$2, in
|
||||
*,$file_conv,*)
|
||||
;;
|
||||
mingw/*)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin/*)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine/*)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_cl_dashL linkdir
|
||||
# Make cl look for libraries in LINKDIR
|
||||
func_cl_dashL ()
|
||||
{
|
||||
func_file_conv "$1"
|
||||
if test -z "$lib_path"; then
|
||||
lib_path=$file
|
||||
else
|
||||
lib_path="$lib_path;$file"
|
||||
fi
|
||||
linker_opts="$linker_opts -LIBPATH:$file"
|
||||
}
|
||||
|
||||
# func_cl_dashl library
|
||||
# Do a library search-path lookup for cl
|
||||
func_cl_dashl ()
|
||||
{
|
||||
lib=$1
|
||||
found=no
|
||||
save_IFS=$IFS
|
||||
IFS=';'
|
||||
for dir in $lib_path $LIB
|
||||
do
|
||||
IFS=$save_IFS
|
||||
if $shared && test -f "$dir/$lib.dll.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.dll.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/$lib.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/lib$lib.a"; then
|
||||
found=yes
|
||||
lib=$dir/lib$lib.a
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$save_IFS
|
||||
|
||||
if test "$found" != yes; then
|
||||
lib=$lib.lib
|
||||
fi
|
||||
}
|
||||
|
||||
# func_cl_wrapper cl arg...
|
||||
# Adjust compile command to suit cl
|
||||
func_cl_wrapper ()
|
||||
{
|
||||
# Assume a capable shell
|
||||
lib_path=
|
||||
shared=:
|
||||
linker_opts=
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.[oO][bB][jJ])
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fo"$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fe"$file"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I)
|
||||
eat=1
|
||||
func_file_conv "$2" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-I*)
|
||||
func_file_conv "${1#-I}" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-l)
|
||||
eat=1
|
||||
func_cl_dashl "$2"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-l*)
|
||||
func_cl_dashl "${1#-l}"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-L)
|
||||
eat=1
|
||||
func_cl_dashL "$2"
|
||||
;;
|
||||
-L*)
|
||||
func_cl_dashL "${1#-L}"
|
||||
;;
|
||||
-static)
|
||||
shared=false
|
||||
;;
|
||||
-Wl,*)
|
||||
arg=${1#-Wl,}
|
||||
save_ifs="$IFS"; IFS=','
|
||||
for flag in $arg; do
|
||||
IFS="$save_ifs"
|
||||
linker_opts="$linker_opts $flag"
|
||||
done
|
||||
IFS="$save_ifs"
|
||||
;;
|
||||
-Xlinker)
|
||||
eat=1
|
||||
linker_opts="$linker_opts $2"
|
||||
;;
|
||||
-*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
|
||||
func_file_conv "$1"
|
||||
set x "$@" -Tp"$file"
|
||||
shift
|
||||
;;
|
||||
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
|
||||
func_file_conv "$1" mingw
|
||||
set x "$@" "$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test -n "$linker_opts"; then
|
||||
linker_opts="-link$linker_opts"
|
||||
fi
|
||||
exec "$@" $linker_opts
|
||||
exit 1
|
||||
}
|
||||
|
||||
eat=
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Wrapper for compilers which do not understand '-c -o'.
|
||||
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||
arguments, and rename the output as expected.
|
||||
|
||||
If you are trying to build a whole package this is not the
|
||||
right script to run: please start by reading the file 'INSTALL'.
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "compile $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
|
||||
func_cl_wrapper "$@" # Doesn't return...
|
||||
;;
|
||||
esac
|
||||
|
||||
ofile=
|
||||
cfile=
|
||||
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
# So we strip '-o arg' only if arg is an object.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.obj)
|
||||
ofile=$2
|
||||
;;
|
||||
*)
|
||||
set x "$@" -o "$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*.c)
|
||||
cfile=$1
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$ofile" || test -z "$cfile"; then
|
||||
# If no '-o' option was seen then we might have been invoked from a
|
||||
# pattern rule where we don't need one. That is ok -- this is a
|
||||
# normal compilation that the losing compiler can handle. If no
|
||||
# '.c' file was seen then we are probably linking. That is also
|
||||
# ok.
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Name of file we expect compiler to create.
|
||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||
|
||||
# Create the lock directory.
|
||||
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
|
||||
# that we are using for the .o file. Also, base the name on the expected
|
||||
# object file name, since that is what matters with a parallel build.
|
||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||
while true; do
|
||||
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
# FIXME: race condition here if user kills between mkdir and trap.
|
||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||
|
||||
# Run the compile.
|
||||
"$@"
|
||||
ret=$?
|
||||
|
||||
if test -f "$cofile"; then
|
||||
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||
elif test -f "${cofile}bj"; then
|
||||
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||
fi
|
||||
|
||||
rmdir "$lockdir"
|
||||
exit $ret
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
48
pcre2/config-cmake.h.in
Normal file
48
pcre2/config-cmake.h.in
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
#cmakedefine HAVE_STDINT_H 1
|
||||
#cmakedefine HAVE_STRERROR 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_BCOPY 1
|
||||
#cmakedefine HAVE_MEMMOVE 1
|
||||
|
||||
#cmakedefine PCRE2_STATIC 1
|
||||
|
||||
#cmakedefine SUPPORT_PCRE2_8 1
|
||||
#cmakedefine SUPPORT_PCRE2_16 1
|
||||
#cmakedefine SUPPORT_PCRE2_32 1
|
||||
#cmakedefine PCRE2_DEBUG 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
#cmakedefine SUPPORT_LIBEDIT 1
|
||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||
#cmakedefine SUPPORT_LIBZ 1
|
||||
|
||||
#cmakedefine SUPPORT_JIT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||
#cmakedefine SUPPORT_UNICODE 1
|
||||
#cmakedefine SUPPORT_VALGRIND 1
|
||||
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine EBCDIC_NL25 1
|
||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_RECURSION @PCRE2_MATCH_LIMIT_RECURSION@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||
|
||||
#define MAX_NAME_SIZE 32
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* end config.h for CMake builds */
|
1421
pcre2/config.guess
vendored
Executable file
1421
pcre2/config.guess
vendored
Executable file
File diff suppressed because it is too large
Load diff
1807
pcre2/config.sub
vendored
Executable file
1807
pcre2/config.sub
vendored
Executable file
File diff suppressed because it is too large
Load diff
18296
pcre2/configure
vendored
Executable file
18296
pcre2/configure
vendored
Executable file
File diff suppressed because it is too large
Load diff
927
pcre2/configure.ac
Normal file
927
pcre2/configure.ac
Normal file
|
@ -0,0 +1,927 @@
|
|||
dnl Process this file with autoconf to produce a configure script.
|
||||
|
||||
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
|
||||
dnl a check further down that throws an error if 08 or 09 are used.
|
||||
|
||||
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [21])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2016-01-12])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [3:0:3])
|
||||
m4_define(libpcre2_16_version, [3:0:3])
|
||||
m4_define(libpcre2_32_version, [3:0:3])
|
||||
m4_define(libpcre2_posix_version, [0:1:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||
AC_CONFIG_SRCDIR([src/pcre2.h.in])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
AC_CONFIG_HEADERS(src/config.h)
|
||||
|
||||
# FISH PATCH
|
||||
# Enable maintainer mode to avoid spurious rebuilds due to timestamps in git
|
||||
# not being stored. Discussion in https://github.com/fish-shell/fish-shell/issues/2469
|
||||
AM_MAINTAINER_MODE
|
||||
# END FISH PATCH
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any
|
||||
# other compiler. There doesn't seem to be a standard way of getting rid of the
|
||||
# -g (which I don't think is needed for a production library). This fudge seems
|
||||
# to achieve the necessary. First, we remember the externally set values of
|
||||
# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is
|
||||
# not set, it will be set to Autoconf's defaults. Afterwards, if the original
|
||||
# values were not set, remove the -g from the Autoconf defaults.
|
||||
|
||||
remember_set_CFLAGS="$CFLAGS"
|
||||
|
||||
AC_PROG_CC
|
||||
AM_PROG_CC_C_O
|
||||
|
||||
if test "x$remember_set_CFLAGS" = "x"
|
||||
then
|
||||
if test "$CFLAGS" = "-g -O2"
|
||||
then
|
||||
CFLAGS="-O2"
|
||||
elif test "$CFLAGS" = "-g"
|
||||
then
|
||||
CFLAGS=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for a 64-bit integer type
|
||||
AC_TYPE_INT64_T
|
||||
|
||||
AC_PROG_INSTALL
|
||||
AC_LIBTOOL_WIN32_DLL
|
||||
LT_INIT
|
||||
AC_PROG_LN_S
|
||||
|
||||
# Check for GCC visibility feature
|
||||
|
||||
PCRE2_VISIBILITY
|
||||
|
||||
# Versioning
|
||||
|
||||
PCRE2_MAJOR="pcre2_major"
|
||||
PCRE2_MINOR="pcre2_minor"
|
||||
PCRE2_PRERELEASE="pcre2_prerelease"
|
||||
PCRE2_DATE="pcre2_date"
|
||||
|
||||
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
|
||||
then
|
||||
echo "***"
|
||||
echo "*** Minor version number $PCRE2_MINOR must not be used. ***"
|
||||
echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***"
|
||||
echo "***"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AC_SUBST(PCRE2_MAJOR)
|
||||
AC_SUBST(PCRE2_MINOR)
|
||||
AC_SUBST(PCRE2_PRERELEASE)
|
||||
AC_SUBST(PCRE2_DATE)
|
||||
|
||||
# Set a more sensible default value for $(htmldir).
|
||||
if test "x$htmldir" = 'x${docdir}'
|
||||
then
|
||||
htmldir='${docdir}/html'
|
||||
fi
|
||||
|
||||
# Force an error for PCRE1 size options
|
||||
AC_ARG_ENABLE(pcre8,,,enable_pcre8=no)
|
||||
AC_ARG_ENABLE(pcre16,,,enable_pcre16=no)
|
||||
AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
||||
|
||||
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||
then
|
||||
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle --disable-pcre2-8 (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2-8,
|
||||
AS_HELP_STRING([--disable-pcre2-8],
|
||||
[disable 8 bit character support]),
|
||||
, enable_pcre2_8=unset)
|
||||
AC_SUBST(enable_pcre2_8)
|
||||
|
||||
# Handle --enable-pcre2-16 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-16,
|
||||
AS_HELP_STRING([--enable-pcre2-16],
|
||||
[enable 16 bit character support]),
|
||||
, enable_pcre2_16=unset)
|
||||
AC_SUBST(enable_pcre2_16)
|
||||
|
||||
# Handle --enable-pcre2-32 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-32,
|
||||
AS_HELP_STRING([--enable-pcre2-32],
|
||||
[enable 32 bit character support]),
|
||||
, enable_pcre2_32=unset)
|
||||
AC_SUBST(enable_pcre2_32)
|
||||
|
||||
# Handle --dnable-debug (disabled by default)
|
||||
AC_ARG_ENABLE(debug,
|
||||
AS_HELP_STRING([--enable-debug],
|
||||
[enable debugging code]),
|
||||
, enable_debug=no)
|
||||
|
||||
# Handle --enable-jit (disabled by default)
|
||||
AC_ARG_ENABLE(jit,
|
||||
AS_HELP_STRING([--enable-jit],
|
||||
[enable Just-In-Time compiling support]),
|
||||
, enable_jit=no)
|
||||
|
||||
# Handle --disable-pcre2grep-jit (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-jit,
|
||||
AS_HELP_STRING([--disable-pcre2grep-jit],
|
||||
[disable JIT support in pcre2grep]),
|
||||
, enable_pcre2grep_jit=yes)
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||
[rebuild character tables in current locale]),
|
||||
, enable_rebuild_chartables=no)
|
||||
|
||||
# Handle --disable-unicode (enabled by default)
|
||||
AC_ARG_ENABLE(unicode,
|
||||
AS_HELP_STRING([--disable-unicode],
|
||||
[disable Unicode support]),
|
||||
, enable_unicode=unset)
|
||||
|
||||
# Handle newline options
|
||||
ac_pcre2_newline=lf
|
||||
AC_ARG_ENABLE(newline-is-cr,
|
||||
AS_HELP_STRING([--enable-newline-is-cr],
|
||||
[use CR as newline character]),
|
||||
ac_pcre2_newline=cr)
|
||||
AC_ARG_ENABLE(newline-is-lf,
|
||||
AS_HELP_STRING([--enable-newline-is-lf],
|
||||
[use LF as newline character (default)]),
|
||||
ac_pcre2_newline=lf)
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||
[use CRLF as newline sequence]),
|
||||
ac_pcre2_newline=crlf)
|
||||
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||
[use CR, LF, or CRLF as newline sequence]),
|
||||
ac_pcre2_newline=anycrlf)
|
||||
AC_ARG_ENABLE(newline-is-any,
|
||||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre2_newline=any)
|
||||
enable_newline="$ac_pcre2_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
AC_ARG_ENABLE(bsr-anycrlf,
|
||||
AS_HELP_STRING([--enable-bsr-anycrlf],
|
||||
[\R matches only CR, LF, CRLF by default]),
|
||||
, enable_bsr_anycrlf=no)
|
||||
|
||||
# Handle --enable-never-backslash-C
|
||||
AC_ARG_ENABLE(never-backslash-C,
|
||||
AS_HELP_STRING([--enable-never-backslash-C],
|
||||
[use of \C causes an error]),
|
||||
, enable_never_backslash_C=no)
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
, enable_ebcdic=no)
|
||||
|
||||
# Handle --enable-ebcdic-nl25
|
||||
AC_ARG_ENABLE(ebcdic-nl25,
|
||||
AS_HELP_STRING([--enable-ebcdic-nl25],
|
||||
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
|
||||
, enable_ebcdic_nl25=no)
|
||||
|
||||
# Handle --disable-stack-for-recursion
|
||||
AC_ARG_ENABLE(stack-for-recursion,
|
||||
AS_HELP_STRING([--disable-stack-for-recursion],
|
||||
[don't use stack recursion when matching]),
|
||||
, enable_stack_for_recursion=yes)
|
||||
|
||||
# Handle --enable-pcre2grep-libz
|
||||
AC_ARG_ENABLE(pcre2grep-libz,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libz],
|
||||
[link pcre2grep with libz to handle .gz files]),
|
||||
, enable_pcre2grep_libz=no)
|
||||
|
||||
# Handle --enable-pcre2grep-libbz2
|
||||
AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libbz2],
|
||||
[link pcre2grep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcre2grep_libbz2=no)
|
||||
|
||||
# Handle --with-pcre2grep-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||
[pcre2grep buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcre2grep_bufsize=20480)
|
||||
|
||||
# Handle --enable-pcre2test-libedit
|
||||
AC_ARG_ENABLE(pcre2test-libedit,
|
||||
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||
[link pcre2test with libedit]),
|
||||
, enable_pcre2test_libedit=no)
|
||||
|
||||
# Handle --enable-pcre2test-libreadline
|
||||
AC_ARG_ENABLE(pcre2test-libreadline,
|
||||
AS_HELP_STRING([--enable-pcre2test-libreadline],
|
||||
[link pcre2test with libreadline]),
|
||||
, enable_pcre2test_libreadline=no)
|
||||
|
||||
# Handle --with-link-size=N
|
||||
AC_ARG_WITH(link-size,
|
||||
AS_HELP_STRING([--with-link-size=N],
|
||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-parens-nest-limit=N
|
||||
AC_ARG_WITH(parens-nest-limit,
|
||||
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
[default limit on internal looping (default=10000000)]),
|
||||
, with_match_limit=10000000)
|
||||
|
||||
# Handle --with-match-limit_recursion=N
|
||||
#
|
||||
# Note: In config.h, the default is to define MATCH_LIMIT_RECURSION
|
||||
# symbolically as MATCH_LIMIT, which in turn is defined to be some numeric
|
||||
# value (e.g. 10000000). MATCH_LIMIT_RECURSION can otherwise be set to some
|
||||
# different numeric value (or even the same numeric value as MATCH_LIMIT,
|
||||
# though no longer defined in terms of the latter).
|
||||
#
|
||||
AC_ARG_WITH(match-limit-recursion,
|
||||
AS_HELP_STRING([--with-match-limit-recursion=N],
|
||||
[default limit on internal recursion (default=MATCH_LIMIT)]),
|
||||
, with_match_limit_recursion=MATCH_LIMIT)
|
||||
|
||||
# Handle --enable-valgrind
|
||||
AC_ARG_ENABLE(valgrind,
|
||||
AS_HELP_STRING([--enable-valgrind],
|
||||
[valgrind support]),
|
||||
, enable_valgrind=no)
|
||||
|
||||
# Enable code coverage reports using gcov
|
||||
AC_ARG_ENABLE(coverage,
|
||||
AS_HELP_STRING([--enable-coverage],
|
||||
[enable code coverage reports using gcov]),
|
||||
, enable_coverage=no)
|
||||
|
||||
# Set the default value for pcre2-8
|
||||
if test "x$enable_pcre2_8" = "xunset"
|
||||
then
|
||||
enable_pcre2_8=yes
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-16
|
||||
if test "x$enable_pcre2_16" = "xunset"
|
||||
then
|
||||
enable_pcre2_16=no
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-32
|
||||
if test "x$enable_pcre2_32" = "xunset"
|
||||
then
|
||||
enable_pcre2_32=no
|
||||
fi
|
||||
|
||||
# Make sure at least one library is selected
|
||||
if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono"
|
||||
then
|
||||
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||
fi
|
||||
|
||||
# Unicode is enabled by default.
|
||||
if test "x$enable_unicode" = "xunset"
|
||||
then
|
||||
enable_unicode=yes
|
||||
fi
|
||||
|
||||
# Convert the newline identifier into the appropriate integer value. These must
|
||||
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
|
||||
|
||||
case "$enable_newline" in
|
||||
cr) ac_pcre2_newline_value=1 ;;
|
||||
lf) ac_pcre2_newline_value=2 ;;
|
||||
crlf) ac_pcre2_newline_value=3 ;;
|
||||
any) ac_pcre2_newline_value=4 ;;
|
||||
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
|
||||
;;
|
||||
esac
|
||||
|
||||
# --enable-ebcdic-nl25 implies --enable-ebcdic
|
||||
if test "x$enable_ebcdic_nl25" = "xyes"; then
|
||||
enable_ebcdic=yes
|
||||
fi
|
||||
|
||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode. Also, EBCDIC cannot be used with
|
||||
# 16-bit and 32-bit libraries.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||
fi
|
||||
if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then
|
||||
AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check argument to --with-link-size
|
||||
case "$with_link_size" in
|
||||
2|3|4) ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option])
|
||||
;;
|
||||
esac
|
||||
|
||||
AH_TOP([
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
# Checks for header files.
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
|
||||
# Conditional compilation
|
||||
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
|
||||
AC_C_CONST
|
||||
AC_TYPE_SIZE_T
|
||||
|
||||
# Checks for library functions.
|
||||
|
||||
AC_CHECK_FUNCS(bcopy memmove strerror)
|
||||
|
||||
# Check for the availability of libz (aka zlib)
|
||||
|
||||
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||
|
||||
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
|
||||
# as for libz. However, this had the following problem, diagnosed and fixed by
|
||||
# a user:
|
||||
#
|
||||
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
|
||||
# under Win32.
|
||||
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
|
||||
# therefore missing the function definition.
|
||||
# - The compiler thus generates a "C" signature for the test function.
|
||||
# - The linker fails to find the "C" function.
|
||||
# - PCRE2 fails to configure if asked to do so against libbz2.
|
||||
#
|
||||
# Solution:
|
||||
#
|
||||
# - Replace the AC_CHECK_LIB test with a custom test.
|
||||
|
||||
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||
# Original test
|
||||
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||
#
|
||||
# Custom test follows
|
||||
|
||||
AC_MSG_CHECKING([for libbz2])
|
||||
OLD_LIBS="$LIBS"
|
||||
LIBS="$LIBS -lbz2"
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#ifdef HAVE_BZLIB_H
|
||||
#include <bzlib.h>
|
||||
#endif]],
|
||||
[[return (int)BZ2_bzopen("conftest", "rb");]])],
|
||||
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
|
||||
AC_MSG_RESULT([no]))
|
||||
LIBS="$OLD_LIBS"
|
||||
|
||||
# Check for the availabiity of libreadline
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
|
||||
[LIBREADLINE=""],
|
||||
[-ltermcap])],
|
||||
[-lncursesw])],
|
||||
[-lncurses])],
|
||||
[-lcurses])],
|
||||
[-ltinfo])])
|
||||
AC_SUBST(LIBREADLINE)
|
||||
if test -n "$LIBREADLINE"; then
|
||||
if test "$LIBREADLINE" != "-lreadline"; then
|
||||
echo "-lreadline needs $LIBREADLINE"
|
||||
LIBREADLINE="-lreadline $LIBREADLINE"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# Check for the availability of libedit. Different distributions put its
|
||||
# headers in different places. Try to cover the most common ones.
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
|
||||
[AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
|
||||
[AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
|
||||
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
|
||||
fi
|
||||
|
||||
# This facilitates -ansi builds under Linux
|
||||
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
|
||||
|
||||
PCRE2_STATIC_CFLAG=""
|
||||
if test "x$enable_shared" = "xno" ; then
|
||||
AC_DEFINE([PCRE2_STATIC], [1], [
|
||||
Define to any value if linking statically (TODO: make nice with Libtool)])
|
||||
PCRE2_STATIC_CFLAG="-DPCRE2_STATIC"
|
||||
fi
|
||||
AC_SUBST(PCRE2_STATIC_CFLAG)
|
||||
|
||||
# Here is where PCRE2-specific defines are handled
|
||||
|
||||
if test "$enable_pcre2_8" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_8], [], [
|
||||
Define to any value to enable the 8 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_16" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_16], [], [
|
||||
Define to any value to enable the 16 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_32" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_32], [], [
|
||||
Define to any value to enable the 32 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_debug" = "yes"; then
|
||||
AC_DEFINE([PCRE2_DEBUG], [], [
|
||||
Define to any value to include debugging code.])
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
||||
if test "$enable_jit" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||
CC="$PTHREAD_CC"
|
||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
enable_pcre2grep_jit="no"
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_jit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
|
||||
Define to any value to enable JIT support in pcre2grep.])
|
||||
fi
|
||||
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||
Define to any value to enable support for Unicode and UTF encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible
|
||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||
code *or* ASCII/Unicode, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
AC_DEFINE([HEAP_MATCH_RECURSE], [], [
|
||||
PCRE2 uses recursive function calls to handle backtracking while
|
||||
matching. This can sometimes be a problem on systems that have
|
||||
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
|
||||
version that doesn't use recursion in the match() function; instead
|
||||
it creates its own stack by steam using memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libz, so that it is
|
||||
able to handle .gz files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files.])
|
||||
fi
|
||||
|
||||
if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
|
||||
with_pcre2grep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||
The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
|
||||
to hold parts of the file it is searching. This is also the minimum value.
|
||||
The actual amount of memory used by pcre2grep is three times this number,
|
||||
because it allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||
Define to any value to allow pcre2test to be linked with libedit.])
|
||||
LIBREADLINE="$LIBEDIT"
|
||||
elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||
Define to any value to allow pcre2test to be linked with libreadline.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
and 5 (ANYCRLF).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||
fi
|
||||
|
||||
if test "$enable_never_backslash_C" = "yes"; then
|
||||
AC_DEFINE([NEVER_BACKSLASH_C], [], [
|
||||
Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
allows for compiled patterns up to 64K long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4
|
||||
bytes instead. This allows for longer patterns in extreme cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre2_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular
|
||||
expressions that take for ever to determine that they do not match.
|
||||
The default is set very large so that it does not accidentally catch
|
||||
legitimate cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
||||
The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable
|
||||
to limit the depth of recursive calls of match() more strictly, in
|
||||
order to restrict the maximum amount of stack (or heap, if
|
||||
HEAP_MATCH_RECURSE is defined) that is used. The value of
|
||||
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
||||
have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
|
||||
There is a runtime method for setting a different limit.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_COUNT], [10000], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AH_VERBATIM([PCRE2_EXP_DEFN], [
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN])
|
||||
|
||||
if test "$enable_ebcdic" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32.])
|
||||
fi
|
||||
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
|
||||
In an EBCDIC environment, define this macro to any value to arrange for
|
||||
the NL character to be 0x25 instead of the default 0x15. NL plays the role
|
||||
that LF does in an ASCII/Unicode environment.])
|
||||
fi
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||
Define to any value for valgrind support to find invalid memory reads.])
|
||||
fi
|
||||
|
||||
# Platform specific issues
|
||||
NO_UNDEFINED=
|
||||
EXPORT_ALL_SYMBOLS=
|
||||
case $host_os in
|
||||
cygwin* | mingw* )
|
||||
if test X"$enable_shared" = Xyes; then
|
||||
NO_UNDEFINED="-no-undefined"
|
||||
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# The extra LDFLAGS for each particular library. The libpcre2*_version values
|
||||
# are m4 variables, assigned above.
|
||||
|
||||
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_8_version"
|
||||
|
||||
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_16_version"
|
||||
|
||||
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_32_version"
|
||||
|
||||
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_posix_version"
|
||||
|
||||
AC_SUBST(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||
|
||||
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||
# optimization makes it run faster.
|
||||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||
# specified, the relevant library is available.
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
if test "$HAVE_ZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBZ" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBZ="-lz"
|
||||
fi
|
||||
AC_SUBST(LIBZ)
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
if test "$HAVE_BZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBBZ2" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBBZ2="-lbz2"
|
||||
fi
|
||||
AC_SUBST(LIBBZ2)
|
||||
|
||||
# Similarly for --enable-pcre2test-readline
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
|
||||
"$HAVE_READLINE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
|
||||
echo "** nor readline/readline.h was found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBEDIT"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
if test "$HAVE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_HISTORY_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBREADLINE"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Handle valgrind support
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
m4_ifdef([PKG_CHECK_MODULES],
|
||||
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
|
||||
[AC_MSG_ERROR([pkg-config not supported])])
|
||||
fi
|
||||
|
||||
# Handle code coverage reporting support
|
||||
if test "$enable_coverage" = "yes"; then
|
||||
if test "x$GCC" != "xyes"; then
|
||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||
fi
|
||||
|
||||
# ccache is incompatible with gcov
|
||||
AC_PATH_PROG([SHTOOL],[shtool],[false])
|
||||
case `$SHTOOL path $CC` in
|
||||
*ccache*) cc_ccache=yes;;
|
||||
*) cc_ccache=no;;
|
||||
esac
|
||||
|
||||
if test "$cc_ccache" = "yes"; then
|
||||
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
|
||||
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([LCOV],[the ltp lcov program])
|
||||
AC_PATH_PROG([LCOV],[lcov],[false])
|
||||
if test "x$LCOV" = "xfalse"; then
|
||||
AC_MSG_ERROR([lcov not found])
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
|
||||
AC_PATH_PROG([GENHTML],[genhtml],[false])
|
||||
if test "x$GENHTML" = "xfalse"; then
|
||||
AC_MSG_ERROR([genhtml not found])
|
||||
fi
|
||||
|
||||
# Set flags needed for gcov
|
||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_LIBS="-lgcov"
|
||||
AC_SUBST([GCOV_CFLAGS])
|
||||
AC_SUBST([GCOV_CXXFLAGS])
|
||||
AC_SUBST([GCOV_LIBS])
|
||||
fi # enable_coverage
|
||||
|
||||
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
|
||||
|
||||
# Produce these files, in addition to config.h.
|
||||
AC_CONFIG_FILES(
|
||||
Makefile
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
pcre2-config
|
||||
src/pcre2.h
|
||||
)
|
||||
|
||||
# Make the generated script files executable.
|
||||
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre2-config])
|
||||
|
||||
# Make sure that pcre2_chartables.c is removed in case the method for
|
||||
# creating it was changed by reconfiguration.
|
||||
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre2_chartables.c])
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
# Print out a nice little message after configure is run displaying the
|
||||
# chosen options.
|
||||
|
||||
ebcdic_nl_code=n/a
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
ebcdic_nl_code=0x25
|
||||
elif test "$enable_ebcdic" = "yes"; then
|
||||
ebcdic_nl_code=0x15
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
$PACKAGE-$VERSION configuration summary:
|
||||
|
||||
Install prefix .................. : ${prefix}
|
||||
C preprocessor .................. : ${CPP}
|
||||
C compiler ...................... : ${CC}
|
||||
Linker .......................... : ${LD}
|
||||
C preprocessor flags ............ : ${CPPFLAGS}
|
||||
C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||
Linker flags .................... : ${LDFLAGS}
|
||||
Extra libraries ................. : ${LIBS}
|
||||
|
||||
Build 8-bit pcre2 library ....... : ${enable_pcre2_8}
|
||||
Build 16-bit pcre2 library ...... : ${enable_pcre2_16}
|
||||
Build 32-bit pcre2 library ...... : ${enable_pcre2_32}
|
||||
Include debugging code .......... : ${enable_debug}
|
||||
Enable JIT compiling support .... : ${enable_jit}
|
||||
Enable Unicode support .......... : ${enable_unicode}
|
||||
Newline char/sequence ........... : ${enable_newline}
|
||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||
\C is disabled .................. : ${enable_never_backslash_C}
|
||||
EBCDIC coding ................... : ${enable_ebcdic}
|
||||
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||
Internal link size .............. : ${with_link_size}
|
||||
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
||||
Match limit ..................... : ${with_match_limit}
|
||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||
Build shared libs ............... : ${enable_shared}
|
||||
Build static libs ............... : ${enable_static}
|
||||
Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
|
||||
Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
|
||||
Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
|
||||
Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
|
||||
Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
|
||||
Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
|
||||
Valgrind support ................ : ${enable_valgrind}
|
||||
Code coverage ................... : ${enable_coverage}
|
||||
|
||||
EOF
|
||||
|
||||
dnl end configure.ac
|
791
pcre2/depcomp
Executable file
791
pcre2/depcomp
Executable file
|
@ -0,0 +1,791 @@
|
|||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2013-05-30.07; # UTC
|
||||
|
||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by 'PROGRAMS ARGS'.
|
||||
object Object file output by 'PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputting dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
# Get the directory component of the given path, and save it in the
|
||||
# global variables '$dir'. Note that this directory component will
|
||||
# be either empty or ending with a '/' character. This is deliberate.
|
||||
set_dir_from ()
|
||||
{
|
||||
case $1 in
|
||||
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
|
||||
*) dir=;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get the suffix-stripped basename of the given path, and save it the
|
||||
# global variable '$base'.
|
||||
set_base_from ()
|
||||
{
|
||||
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
|
||||
}
|
||||
|
||||
# If no dependency file was actually created by the compiler invocation,
|
||||
# we still have to create a dummy depfile, to avoid errors with the
|
||||
# Makefile "include basename.Plo" scheme.
|
||||
make_dummy_depfile ()
|
||||
{
|
||||
echo "#dummy" > "$depfile"
|
||||
}
|
||||
|
||||
# Factor out some common post-processing of the generated depfile.
|
||||
# Requires the auxiliary global variable '$tmpdepfile' to be set.
|
||||
aix_post_process_depfile ()
|
||||
{
|
||||
# If the compiler actually managed to produce a dependency file,
|
||||
# post-process it.
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependency.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# $object: dependency.h
|
||||
# and one to simply output
|
||||
# dependency.h:
|
||||
# which is needed to avoid the deleted-header problem.
|
||||
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
|
||||
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
|
||||
} > "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
}
|
||||
|
||||
# A tabulation character.
|
||||
tab=' '
|
||||
# A newline character.
|
||||
nl='
|
||||
'
|
||||
# Character ranges might be problematic outside the C locale.
|
||||
# These definitions help.
|
||||
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
lower=abcdefghijklmnopqrstuvwxyz
|
||||
digits=0123456789
|
||||
alpha=${upper}${lower}
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Avoid interferences from the environment.
|
||||
gccflag= dashmflag=
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
cygpath_u="cygpath -u -f -"
|
||||
if test "$depmode" = msvcmsys; then
|
||||
# This is just like msvisualcpp but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvisualcpp
|
||||
fi
|
||||
|
||||
if test "$depmode" = msvc7msys; then
|
||||
# This is just like msvc7 but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvc7
|
||||
fi
|
||||
|
||||
if test "$depmode" = xlc; then
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
|
||||
gccflag=-qmakedep=gcc,-MF
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
|
||||
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
|
||||
## (see the conditional assignment to $gccflag above).
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say). Also, it might not be
|
||||
## supported by the other compilers which use the 'gcc' depmode.
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The second -e expression handles DOS-style file names with drive
|
||||
# letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the "deleted header file" problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
## Some versions of gcc put a space before the ':'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||
## to the object. Take care to not repeat it in the output.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
|
||||
| tr "$nl" ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
xlc)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
tcc)
|
||||
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
|
||||
# FIXME: That version still under development at the moment of writing.
|
||||
# Make that this statement remains true also for stable, released
|
||||
# versions.
|
||||
# It will wrap lines (doesn't matter whether long or short) with a
|
||||
# trailing '\', as in:
|
||||
#
|
||||
# foo.o : \
|
||||
# foo.c \
|
||||
# foo.h \
|
||||
#
|
||||
# It will put a trailing '\' even on the last line, and will use leading
|
||||
# spaces rather than leading tabs (at least since its commit 0394caf7
|
||||
# "Emit spaces for -MD").
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
|
||||
# We have to change lines of the first kind to '$object: \'.
|
||||
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
|
||||
# And for each line of the second kind, we have to emit a 'dep.h:'
|
||||
# dummy dependency, to avoid the deleted-header problem.
|
||||
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
## The order of this option in the case statement is important, since the
|
||||
## shell code in configure will try each of these formats in the order
|
||||
## listed in this file. A plain '-MD' option would be understood by many
|
||||
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||
pgcc)
|
||||
# Portland's C compiler understands '-MD'.
|
||||
# Will always output deps to 'file.d' where file is the root name of the
|
||||
# source file under compilation, even if file resides in a subdirectory.
|
||||
# The object file name does not affect the name of the '.d' file.
|
||||
# pgcc 10.2 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\' :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
set_dir_from "$object"
|
||||
# Use the source, not the object, to determine the base name, since
|
||||
# that's sadly what pgcc will do too.
|
||||
set_base_from "$source"
|
||||
tmpdepfile=$base.d
|
||||
|
||||
# For projects that build the same source file twice into different object
|
||||
# files, the pgcc approach of using the *source* file root name can cause
|
||||
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||
# the same $tmpdepfile.
|
||||
lockdir=$base.d-lock
|
||||
trap "
|
||||
echo '$0: caught signal, cleaning up...' >&2
|
||||
rmdir '$lockdir'
|
||||
exit 1
|
||||
" 1 2 13 15
|
||||
numtries=100
|
||||
i=$numtries
|
||||
while test $i -gt 0; do
|
||||
# mkdir is a portable test-and-set.
|
||||
if mkdir "$lockdir" 2>/dev/null; then
|
||||
# This process acquired the lock.
|
||||
"$@" -MD
|
||||
stat=$?
|
||||
# Release the lock.
|
||||
rmdir "$lockdir"
|
||||
break
|
||||
else
|
||||
# If the lock is being held by a different process, wait
|
||||
# until the winning process is done or we timeout.
|
||||
while test -d "$lockdir" && test $i -gt 0; do
|
||||
sleep 1
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
fi
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
trap - 1 2 13 15
|
||||
if test $i -le 0; then
|
||||
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||
echo "$0: check lockdir '$lockdir'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add 'dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
s/ \\*$//
|
||||
s/$/:/
|
||||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in 'foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# Libtool generates 2 separate objects for the 2 libraries. These
|
||||
# two compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
|
||||
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
# Same post-processing that is required for AIX mode.
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
msvc7)
|
||||
if test "$libtool" = yes; then
|
||||
showIncludes=-Wc,-showIncludes
|
||||
else
|
||||
showIncludes=-showIncludes
|
||||
fi
|
||||
"$@" $showIncludes > "$tmpdepfile"
|
||||
stat=$?
|
||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The first sed program below extracts the file names and escapes
|
||||
# backslashes for cygpath. The second sed program outputs the file
|
||||
# name when reading, but also accumulates all include files in the
|
||||
# hold buffer in order to output them again at the end. This only
|
||||
# works with sed implementations that can handle large buffers.
|
||||
sed < "$tmpdepfile" -n '
|
||||
/^Note: including file: *\(.*\)/ {
|
||||
s//\1/
|
||||
s/\\/\\\\/g
|
||||
p
|
||||
}' | $cygpath_u | sort -u | sed -n '
|
||||
s/ /\\ /g
|
||||
s/\(.*\)/'"$tab"'\1 \\/p
|
||||
s/.\(.*\) \\/\1:/
|
||||
H
|
||||
$ {
|
||||
s/.*/'"$tab"'/
|
||||
G
|
||||
p
|
||||
}' >> "$depfile"
|
||||
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvc7msys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for ':'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this sed invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no eat=no
|
||||
for arg
|
||||
do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
if test $eat = yes; then
|
||||
eat=no
|
||||
continue
|
||||
fi
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-arch)
|
||||
eat=yes ;;
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
# makedepend may prepend the VPATH from the source file name to the object.
|
||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process the last invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed '1,2d' "$tmpdepfile" \
|
||||
| tr ' ' "$nl" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E \
|
||||
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
| sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E 2>/dev/null |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||
echo "$tab" >> "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvcmsys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
501
pcre2/install-sh
Executable file
501
pcre2/install-sh
Executable file
|
@ -0,0 +1,501 @@
|
|||
#!/bin/sh
|
||||
# install - install a program, script, or datafile
|
||||
|
||||
scriptversion=2013-12-25.23; # UTC
|
||||
|
||||
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||
# following copyright and license.
|
||||
#
|
||||
# Copyright (C) 1994 X Consortium
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# Except as contained in this notice, the name of the X Consortium shall not
|
||||
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||
# ings in this Software without prior written authorization from the X Consor-
|
||||
# tium.
|
||||
#
|
||||
#
|
||||
# FSF changes to this file are in the public domain.
|
||||
#
|
||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||
# 'make' implicit rules from creating a file called install from it
|
||||
# when there is no Makefile.
|
||||
#
|
||||
# This script is compatible with the BSD install script, but was written
|
||||
# from scratch.
|
||||
|
||||
tab=' '
|
||||
nl='
|
||||
'
|
||||
IFS=" $tab$nl"
|
||||
|
||||
# Set DOITPROG to "echo" to test this script.
|
||||
|
||||
doit=${DOITPROG-}
|
||||
doit_exec=${doit:-exec}
|
||||
|
||||
# Put in absolute file names if you don't have them in your path;
|
||||
# or use environment vars.
|
||||
|
||||
chgrpprog=${CHGRPPROG-chgrp}
|
||||
chmodprog=${CHMODPROG-chmod}
|
||||
chownprog=${CHOWNPROG-chown}
|
||||
cmpprog=${CMPPROG-cmp}
|
||||
cpprog=${CPPROG-cp}
|
||||
mkdirprog=${MKDIRPROG-mkdir}
|
||||
mvprog=${MVPROG-mv}
|
||||
rmprog=${RMPROG-rm}
|
||||
stripprog=${STRIPPROG-strip}
|
||||
|
||||
posix_mkdir=
|
||||
|
||||
# Desired mode of installed file.
|
||||
mode=0755
|
||||
|
||||
chgrpcmd=
|
||||
chmodcmd=$chmodprog
|
||||
chowncmd=
|
||||
mvcmd=$mvprog
|
||||
rmcmd="$rmprog -f"
|
||||
stripcmd=
|
||||
|
||||
src=
|
||||
dst=
|
||||
dir_arg=
|
||||
dst_arg=
|
||||
|
||||
copy_on_change=false
|
||||
is_target_a_directory=possibly
|
||||
|
||||
usage="\
|
||||
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||
or: $0 [OPTION]... -d DIRECTORIES...
|
||||
|
||||
In the 1st form, copy SRCFILE to DSTFILE.
|
||||
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||
In the 4th, create DIRECTORIES.
|
||||
|
||||
Options:
|
||||
--help display this help and exit.
|
||||
--version display version info and exit.
|
||||
|
||||
-c (ignored)
|
||||
-C install only if different (preserve the last data modification time)
|
||||
-d create directories instead of installing files.
|
||||
-g GROUP $chgrpprog installed files to GROUP.
|
||||
-m MODE $chmodprog installed files to MODE.
|
||||
-o USER $chownprog installed files to USER.
|
||||
-s $stripprog installed files.
|
||||
-t DIRECTORY install into DIRECTORY.
|
||||
-T report an error if DSTFILE is a directory.
|
||||
|
||||
Environment variables override the default commands:
|
||||
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||
RMPROG STRIPPROG
|
||||
"
|
||||
|
||||
while test $# -ne 0; do
|
||||
case $1 in
|
||||
-c) ;;
|
||||
|
||||
-C) copy_on_change=true;;
|
||||
|
||||
-d) dir_arg=true;;
|
||||
|
||||
-g) chgrpcmd="$chgrpprog $2"
|
||||
shift;;
|
||||
|
||||
--help) echo "$usage"; exit $?;;
|
||||
|
||||
-m) mode=$2
|
||||
case $mode in
|
||||
*' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
|
||||
echo "$0: invalid mode: $mode" >&2
|
||||
exit 1;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-o) chowncmd="$chownprog $2"
|
||||
shift;;
|
||||
|
||||
-s) stripcmd=$stripprog;;
|
||||
|
||||
-t)
|
||||
is_target_a_directory=always
|
||||
dst_arg=$2
|
||||
# Protect names problematic for 'test' and other utilities.
|
||||
case $dst_arg in
|
||||
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-T) is_target_a_directory=never;;
|
||||
|
||||
--version) echo "$0 $scriptversion"; exit $?;;
|
||||
|
||||
--) shift
|
||||
break;;
|
||||
|
||||
-*) echo "$0: invalid option: $1" >&2
|
||||
exit 1;;
|
||||
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# We allow the use of options -d and -T together, by making -d
|
||||
# take the precedence; this is for compatibility with GNU install.
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
if test -n "$dst_arg"; then
|
||||
echo "$0: target directory not allowed when installing a directory." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||
# When -d is used, all remaining arguments are directories to create.
|
||||
# When -t is used, the destination is already specified.
|
||||
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||
for arg
|
||||
do
|
||||
if test -n "$dst_arg"; then
|
||||
# $@ is not empty: it contains at least $arg.
|
||||
set fnord "$@" "$dst_arg"
|
||||
shift # fnord
|
||||
fi
|
||||
shift # arg
|
||||
dst_arg=$arg
|
||||
# Protect names problematic for 'test' and other utilities.
|
||||
case $dst_arg in
|
||||
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
if test -z "$dir_arg"; then
|
||||
echo "$0: no input file specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
# It's OK to call 'install-sh -d' without argument.
|
||||
# This can happen when creating conditional directories.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
if test $# -gt 1 || test "$is_target_a_directory" = always; then
|
||||
if test ! -d "$dst_arg"; then
|
||||
echo "$0: $dst_arg: Is not a directory." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
do_exit='(exit $ret); exit $ret'
|
||||
trap "ret=129; $do_exit" 1
|
||||
trap "ret=130; $do_exit" 2
|
||||
trap "ret=141; $do_exit" 13
|
||||
trap "ret=143; $do_exit" 15
|
||||
|
||||
# Set umask so as not to create temps with too-generous modes.
|
||||
# However, 'strip' requires both read and write access to temps.
|
||||
case $mode in
|
||||
# Optimize common cases.
|
||||
*644) cp_umask=133;;
|
||||
*755) cp_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw='% 200'
|
||||
fi
|
||||
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||
*)
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw=,u+rw
|
||||
fi
|
||||
cp_umask=$mode$u_plus_rw;;
|
||||
esac
|
||||
fi
|
||||
|
||||
for src
|
||||
do
|
||||
# Protect names problematic for 'test' and other utilities.
|
||||
case $src in
|
||||
-* | [=\(\)!]) src=./$src;;
|
||||
esac
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
dst=$src
|
||||
dstdir=$dst
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
else
|
||||
|
||||
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||
# might cause directories to be created, which would be especially bad
|
||||
# if $src (and thus $dsttmp) contains '*'.
|
||||
if test ! -f "$src" && test ! -d "$src"; then
|
||||
echo "$0: $src does not exist." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$dst_arg"; then
|
||||
echo "$0: no destination specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
dst=$dst_arg
|
||||
|
||||
# If destination is a directory, append the input filename; won't work
|
||||
# if double slashes aren't ignored.
|
||||
if test -d "$dst"; then
|
||||
if test "$is_target_a_directory" = never; then
|
||||
echo "$0: $dst_arg: Is a directory" >&2
|
||||
exit 1
|
||||
fi
|
||||
dstdir=$dst
|
||||
dst=$dstdir/`basename "$src"`
|
||||
dstdir_status=0
|
||||
else
|
||||
dstdir=`dirname "$dst"`
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
obsolete_mkdir_used=false
|
||||
|
||||
if test $dstdir_status != 0; then
|
||||
case $posix_mkdir in
|
||||
'')
|
||||
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||
umask=`umask`
|
||||
case $stripcmd.$umask in
|
||||
# Optimize common cases.
|
||||
*[2367][2367]) mkdir_umask=$umask;;
|
||||
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
mkdir_umask=`expr $umask + 22 \
|
||||
- $umask % 100 % 40 + $umask % 20 \
|
||||
- $umask % 10 % 4 + $umask % 2
|
||||
`;;
|
||||
*) mkdir_umask=$umask,go-w;;
|
||||
esac
|
||||
|
||||
# With -d, create the new directory with the user-specified mode.
|
||||
# Otherwise, rely on $mkdir_umask.
|
||||
if test -n "$dir_arg"; then
|
||||
mkdir_mode=-m$mode
|
||||
else
|
||||
mkdir_mode=
|
||||
fi
|
||||
|
||||
posix_mkdir=false
|
||||
case $umask in
|
||||
*[123567][0-7][0-7])
|
||||
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||
;;
|
||||
*)
|
||||
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||
|
||||
if (umask $mkdir_umask &&
|
||||
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||
then
|
||||
if test -z "$dir_arg" || {
|
||||
# Check for POSIX incompatibilities with -m.
|
||||
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||
# other-writable bit of parent directory when it shouldn't.
|
||||
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||
case $ls_ld_tmpdir in
|
||||
d????-?r-*) different_mode=700;;
|
||||
d????-?--*) different_mode=755;;
|
||||
*) false;;
|
||||
esac &&
|
||||
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||
}
|
||||
}
|
||||
then posix_mkdir=:
|
||||
fi
|
||||
rmdir "$tmpdir/d" "$tmpdir"
|
||||
else
|
||||
# Remove any dirs left behind by ancient mkdir implementations.
|
||||
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||
fi
|
||||
trap '' 0;;
|
||||
esac;;
|
||||
esac
|
||||
|
||||
if
|
||||
$posix_mkdir && (
|
||||
umask $mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||
)
|
||||
then :
|
||||
else
|
||||
|
||||
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||
# or it failed possibly due to a race condition. Create the
|
||||
# directory the slow way, step by step, checking for races as we go.
|
||||
|
||||
case $dstdir in
|
||||
/*) prefix='/';;
|
||||
[-=\(\)!]*) prefix='./';;
|
||||
*) prefix='';;
|
||||
esac
|
||||
|
||||
oIFS=$IFS
|
||||
IFS=/
|
||||
set -f
|
||||
set fnord $dstdir
|
||||
shift
|
||||
set +f
|
||||
IFS=$oIFS
|
||||
|
||||
prefixes=
|
||||
|
||||
for d
|
||||
do
|
||||
test X"$d" = X && continue
|
||||
|
||||
prefix=$prefix$d
|
||||
if test -d "$prefix"; then
|
||||
prefixes=
|
||||
else
|
||||
if $posix_mkdir; then
|
||||
(umask=$mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||
# Don't fail if two instances are running concurrently.
|
||||
test -d "$prefix" || exit 1
|
||||
else
|
||||
case $prefix in
|
||||
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||
*) qprefix=$prefix;;
|
||||
esac
|
||||
prefixes="$prefixes '$qprefix'"
|
||||
fi
|
||||
fi
|
||||
prefix=$prefix/
|
||||
done
|
||||
|
||||
if test -n "$prefixes"; then
|
||||
# Don't fail if two instances are running concurrently.
|
||||
(umask $mkdir_umask &&
|
||||
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||
test -d "$dstdir" || exit 1
|
||||
obsolete_mkdir_used=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||
else
|
||||
|
||||
# Make a couple of temp file names in the proper directory.
|
||||
dsttmp=$dstdir/_inst.$$_
|
||||
rmtmp=$dstdir/_rm.$$_
|
||||
|
||||
# Trap to clean up those temp files at exit.
|
||||
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||
|
||||
# Copy the file name to the temp name.
|
||||
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||
|
||||
# and set any options; do chmod last to preserve setuid bits.
|
||||
#
|
||||
# If any of these fail, we abort the whole thing. If we want to
|
||||
# ignore errors from any of these, just make sure not to ignore
|
||||
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||
#
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||
|
||||
# If -C, don't bother to copy if it wouldn't change the file.
|
||||
if $copy_on_change &&
|
||||
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||
set -f &&
|
||||
set X $old && old=:$2:$4:$5:$6 &&
|
||||
set X $new && new=:$2:$4:$5:$6 &&
|
||||
set +f &&
|
||||
test "$old" = "$new" &&
|
||||
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||
then
|
||||
rm -f "$dsttmp"
|
||||
else
|
||||
# Rename the file to the real destination.
|
||||
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||
|
||||
# The rename failed, perhaps because mv can't rename something else
|
||||
# to itself, or perhaps because mv is so ancient that it does not
|
||||
# support -f.
|
||||
{
|
||||
# Now remove or move aside any old file at destination location.
|
||||
# We try this two ways since rm can't unlink itself on some
|
||||
# systems and the destination file might be busy for other
|
||||
# reasons. In this case, the final cleanup might fail but the new
|
||||
# file should still install successfully.
|
||||
{
|
||||
test ! -f "$dst" ||
|
||||
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||
} ||
|
||||
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||
(exit 1); exit 1
|
||||
}
|
||||
} &&
|
||||
|
||||
# Now rename the file to the real destination.
|
||||
$doit $mvcmd "$dsttmp" "$dst"
|
||||
}
|
||||
fi || exit 1
|
||||
|
||||
trap '' 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
13
pcre2/libpcre2-16.pc.in
Normal file
13
pcre2/libpcre2-16.pc.in
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-16
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-16
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
13
pcre2/libpcre2-32.pc.in
Normal file
13
pcre2/libpcre2-32.pc.in
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-32
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-32
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
13
pcre2/libpcre2-8.pc.in
Normal file
13
pcre2/libpcre2-8.pc.in
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-8
|
||||
Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-8
|
||||
Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
13
pcre2/libpcre2-posix.pc.in
Normal file
13
pcre2/libpcre2-posix.pc.in
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Package Information for pkg-config
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libpcre2-posix
|
||||
Description: Posix compatible interface to libpcre2-8
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -lpcre2-posix
|
||||
Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
|
||||
Requires.private: libpcre2-8
|
11147
pcre2/ltmain.sh
Normal file
11147
pcre2/ltmain.sh
Normal file
File diff suppressed because it is too large
Load diff
309
pcre2/m4/ax_pthread.m4
Normal file
309
pcre2/m4/ax_pthread.m4
Normal file
|
@ -0,0 +1,309 @@
|
|||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_pthread.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# This macro figures out how to build C programs using POSIX threads. It
|
||||
# sets the PTHREAD_LIBS output variable to the threads library and linker
|
||||
# flags, and the PTHREAD_CFLAGS output variable to any special C compiler
|
||||
# flags that are needed. (The user can also force certain compiler
|
||||
# flags/libs to be tested by setting these environment variables.)
|
||||
#
|
||||
# Also sets PTHREAD_CC to any special C compiler that is needed for
|
||||
# multi-threaded programs (defaults to the value of CC otherwise). (This
|
||||
# is necessary on AIX to use the special cc_r compiler alias.)
|
||||
#
|
||||
# NOTE: You are assumed to not only compile your program with these flags,
|
||||
# but also link it with them as well. e.g. you should link with
|
||||
# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
|
||||
#
|
||||
# If you are only building threads programs, you may wish to use these
|
||||
# variables in your default LIBS, CFLAGS, and CC:
|
||||
#
|
||||
# LIBS="$PTHREAD_LIBS $LIBS"
|
||||
# CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
# CC="$PTHREAD_CC"
|
||||
#
|
||||
# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
|
||||
# has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name
|
||||
# (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
|
||||
#
|
||||
# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
|
||||
# PTHREAD_PRIO_INHERIT symbol is defined when compiling with
|
||||
# PTHREAD_CFLAGS.
|
||||
#
|
||||
# ACTION-IF-FOUND is a list of shell commands to run if a threads library
|
||||
# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
|
||||
# is not found. If ACTION-IF-FOUND is not specified, the default action
|
||||
# will define HAVE_PTHREAD.
|
||||
#
|
||||
# Please let the authors know if this macro fails on any platform, or if
|
||||
# you have any other suggestions or comments. This macro was based on work
|
||||
# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
|
||||
# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
|
||||
# Alejandro Forero Cuervo to the autoconf macro repository. We are also
|
||||
# grateful for the helpful feedback of numerous users.
|
||||
#
|
||||
# Updated for Autoconf 2.68 by Daniel Richard G.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
|
||||
# Copyright (c) 2011 Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation, either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 18
|
||||
|
||||
AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
|
||||
AC_DEFUN([AX_PTHREAD], [
|
||||
AC_REQUIRE([AC_CANONICAL_HOST])
|
||||
AC_LANG_PUSH([C])
|
||||
ax_pthread_ok=no
|
||||
|
||||
# We used to check for pthread.h first, but this fails if pthread.h
|
||||
# requires special compiler flags (e.g. on True64 or Sequent).
|
||||
# It gets checked for in the link test anyway.
|
||||
|
||||
# First of all, check if the user has set any of the PTHREAD_LIBS,
|
||||
# etcetera environment variables, and if threads linking works using
|
||||
# them:
|
||||
if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
|
||||
save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
save_LIBS="$LIBS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS])
|
||||
AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes)
|
||||
AC_MSG_RESULT($ax_pthread_ok)
|
||||
if test x"$ax_pthread_ok" = xno; then
|
||||
PTHREAD_LIBS=""
|
||||
PTHREAD_CFLAGS=""
|
||||
fi
|
||||
LIBS="$save_LIBS"
|
||||
CFLAGS="$save_CFLAGS"
|
||||
fi
|
||||
|
||||
# We must check for the threads library under a number of different
|
||||
# names; the ordering is very important because some systems
|
||||
# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
|
||||
# libraries is broken (non-POSIX).
|
||||
|
||||
# Create a list of thread flags to try. Items starting with a "-" are
|
||||
# C compiler flags, and other items are library names, except for "none"
|
||||
# which indicates that we try without any flags at all, and "pthread-config"
|
||||
# which is a program returning the flags for the Pth emulation library.
|
||||
|
||||
ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
|
||||
|
||||
# The ordering *is* (sometimes) important. Some notes on the
|
||||
# individual items follow:
|
||||
|
||||
# pthreads: AIX (must check this before -lpthread)
|
||||
# none: in case threads are in libc; should be tried before -Kthread and
|
||||
# other compiler flags to prevent continual compiler warnings
|
||||
# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
|
||||
# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
|
||||
# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
|
||||
# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads)
|
||||
# -pthreads: Solaris/gcc
|
||||
# -mthreads: Mingw32/gcc, Lynx/gcc
|
||||
# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
|
||||
# doesn't hurt to check since this sometimes defines pthreads too;
|
||||
# also defines -D_REENTRANT)
|
||||
# ... -mt is also the pthreads flag for HP/aCC
|
||||
# pthread: Linux, etcetera
|
||||
# --thread-safe: KAI C++
|
||||
# pthread-config: use pthread-config program (for GNU Pth library)
|
||||
|
||||
case ${host_os} in
|
||||
solaris*)
|
||||
|
||||
# On Solaris (at least, for some versions), libc contains stubbed
|
||||
# (non-functional) versions of the pthreads routines, so link-based
|
||||
# tests will erroneously succeed. (We need to link with -pthreads/-mt/
|
||||
# -lpthread.) (The stubs are missing pthread_cleanup_push, or rather
|
||||
# a function called by this macro, so we could check for that, but
|
||||
# who knows whether they'll stub that too in a future libc.) So,
|
||||
# we'll just look for -pthreads and -lpthread first:
|
||||
|
||||
ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags"
|
||||
;;
|
||||
|
||||
darwin*)
|
||||
ax_pthread_flags="-pthread $ax_pthread_flags"
|
||||
;;
|
||||
esac
|
||||
|
||||
if test x"$ax_pthread_ok" = xno; then
|
||||
for flag in $ax_pthread_flags; do
|
||||
|
||||
case $flag in
|
||||
none)
|
||||
AC_MSG_CHECKING([whether pthreads work without any flags])
|
||||
;;
|
||||
|
||||
-*)
|
||||
AC_MSG_CHECKING([whether pthreads work with $flag])
|
||||
PTHREAD_CFLAGS="$flag"
|
||||
;;
|
||||
|
||||
pthread-config)
|
||||
AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no)
|
||||
if test x"$ax_pthread_config" = xno; then continue; fi
|
||||
PTHREAD_CFLAGS="`pthread-config --cflags`"
|
||||
PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
|
||||
;;
|
||||
|
||||
*)
|
||||
AC_MSG_CHECKING([for the pthreads library -l$flag])
|
||||
PTHREAD_LIBS="-l$flag"
|
||||
;;
|
||||
esac
|
||||
|
||||
save_LIBS="$LIBS"
|
||||
save_CFLAGS="$CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
|
||||
# Check for various functions. We must include pthread.h,
|
||||
# since some functions may be macros. (On the Sequent, we
|
||||
# need a special flag -Kthread to make this header compile.)
|
||||
# We check for pthread_join because it is in -lpthread on IRIX
|
||||
# while pthread_create is in libc. We check for pthread_attr_init
|
||||
# due to DEC craziness with -lpthreads. We check for
|
||||
# pthread_cleanup_push because it is one of the few pthread
|
||||
# functions on Solaris that doesn't have a non-functional libc stub.
|
||||
# We try pthread_create on general principles.
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
|
||||
static void routine(void *a) { a = 0; }
|
||||
static void *start_routine(void *a) { return a; }],
|
||||
[pthread_t th; pthread_attr_t attr;
|
||||
pthread_create(&th, 0, start_routine, 0);
|
||||
pthread_join(th, 0);
|
||||
pthread_attr_init(&attr);
|
||||
pthread_cleanup_push(routine, 0);
|
||||
pthread_cleanup_pop(0) /* ; */])],
|
||||
[ax_pthread_ok=yes],
|
||||
[])
|
||||
|
||||
LIBS="$save_LIBS"
|
||||
CFLAGS="$save_CFLAGS"
|
||||
|
||||
AC_MSG_RESULT($ax_pthread_ok)
|
||||
if test "x$ax_pthread_ok" = xyes; then
|
||||
break;
|
||||
fi
|
||||
|
||||
PTHREAD_LIBS=""
|
||||
PTHREAD_CFLAGS=""
|
||||
done
|
||||
fi
|
||||
|
||||
# Various other checks:
|
||||
if test "x$ax_pthread_ok" = xyes; then
|
||||
save_LIBS="$LIBS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
|
||||
# Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
|
||||
AC_MSG_CHECKING([for joinable pthread attribute])
|
||||
attr_name=unknown
|
||||
for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
|
||||
[int attr = $attr; return attr /* ; */])],
|
||||
[attr_name=$attr; break],
|
||||
[])
|
||||
done
|
||||
AC_MSG_RESULT($attr_name)
|
||||
if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
|
||||
AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name,
|
||||
[Define to necessary symbol if this constant
|
||||
uses a non-standard name on your system.])
|
||||
fi
|
||||
|
||||
AC_MSG_CHECKING([if more special flags are required for pthreads])
|
||||
flag=no
|
||||
case ${host_os} in
|
||||
aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";;
|
||||
osf* | hpux*) flag="-D_REENTRANT";;
|
||||
solaris*)
|
||||
if test "$GCC" = "yes"; then
|
||||
flag="-D_REENTRANT"
|
||||
else
|
||||
flag="-mt -D_REENTRANT"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
AC_MSG_RESULT(${flag})
|
||||
if test "x$flag" != xno; then
|
||||
PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
|
||||
fi
|
||||
|
||||
AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
|
||||
ax_cv_PTHREAD_PRIO_INHERIT, [
|
||||
AC_LINK_IFELSE([
|
||||
AC_LANG_PROGRAM([[#include <pthread.h>]], [[int i = PTHREAD_PRIO_INHERIT;]])],
|
||||
[ax_cv_PTHREAD_PRIO_INHERIT=yes],
|
||||
[ax_cv_PTHREAD_PRIO_INHERIT=no])
|
||||
])
|
||||
AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"],
|
||||
AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.]))
|
||||
|
||||
LIBS="$save_LIBS"
|
||||
CFLAGS="$save_CFLAGS"
|
||||
|
||||
# More AIX lossage: must compile with xlc_r or cc_r
|
||||
if test x"$GCC" != xyes; then
|
||||
AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC})
|
||||
else
|
||||
PTHREAD_CC=$CC
|
||||
fi
|
||||
else
|
||||
PTHREAD_CC="$CC"
|
||||
fi
|
||||
|
||||
AC_SUBST(PTHREAD_LIBS)
|
||||
AC_SUBST(PTHREAD_CFLAGS)
|
||||
AC_SUBST(PTHREAD_CC)
|
||||
|
||||
# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
|
||||
if test x"$ax_pthread_ok" = xyes; then
|
||||
ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1])
|
||||
:
|
||||
else
|
||||
ax_pthread_ok=no
|
||||
$2
|
||||
fi
|
||||
AC_LANG_POP
|
||||
])dnl AX_PTHREAD
|
8369
pcre2/m4/libtool.m4
vendored
Normal file
8369
pcre2/m4/libtool.m4
vendored
Normal file
File diff suppressed because it is too large
Load diff
437
pcre2/m4/ltoptions.m4
vendored
Normal file
437
pcre2/m4/ltoptions.m4
vendored
Normal file
|
@ -0,0 +1,437 @@
|
|||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
#
|
||||
# Copyright (C) 2004-2005, 2007-2009, 2011-2015 Free Software
|
||||
# Foundation, Inc.
|
||||
# Written by Gary V. Vaughan, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 8 ltoptions.m4
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
|
||||
|
||||
|
||||
# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
|
||||
# ------------------------------------------
|
||||
m4_define([_LT_MANGLE_OPTION],
|
||||
[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
|
||||
|
||||
|
||||
# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
|
||||
# ---------------------------------------
|
||||
# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
|
||||
# matching handler defined, dispatch to it. Other OPTION-NAMEs are
|
||||
# saved as a flag.
|
||||
m4_define([_LT_SET_OPTION],
|
||||
[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
|
||||
m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
|
||||
_LT_MANGLE_DEFUN([$1], [$2]),
|
||||
[m4_warning([Unknown $1 option '$2'])])[]dnl
|
||||
])
|
||||
|
||||
|
||||
# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
|
||||
# ------------------------------------------------------------
|
||||
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||
m4_define([_LT_IF_OPTION],
|
||||
[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
|
||||
|
||||
|
||||
# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
|
||||
# -------------------------------------------------------
|
||||
# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
|
||||
# are set.
|
||||
m4_define([_LT_UNLESS_OPTIONS],
|
||||
[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
|
||||
[m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
|
||||
[m4_define([$0_found])])])[]dnl
|
||||
m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
|
||||
])[]dnl
|
||||
])
|
||||
|
||||
|
||||
# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
|
||||
# ----------------------------------------
|
||||
# OPTION-LIST is a space-separated list of Libtool options associated
|
||||
# with MACRO-NAME. If any OPTION has a matching handler declared with
|
||||
# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
|
||||
# the unknown option and exit.
|
||||
m4_defun([_LT_SET_OPTIONS],
|
||||
[# Set options
|
||||
m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
|
||||
[_LT_SET_OPTION([$1], _LT_Option)])
|
||||
|
||||
m4_if([$1],[LT_INIT],[
|
||||
dnl
|
||||
dnl Simply set some default values (i.e off) if boolean options were not
|
||||
dnl specified:
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
|
||||
])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
|
||||
])
|
||||
dnl
|
||||
dnl If no reference was made to various pairs of opposing options, then
|
||||
dnl we run the default mode handler for the pair. For example, if neither
|
||||
dnl 'shared' nor 'disable-shared' was passed, we enable building of shared
|
||||
dnl archives by default:
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
|
||||
[_LT_ENABLE_FAST_INSTALL])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4],
|
||||
[_LT_WITH_AIX_SONAME([aix])])
|
||||
])
|
||||
])# _LT_SET_OPTIONS
|
||||
|
||||
|
||||
## --------------------------------- ##
|
||||
## Macros to handle LT_INIT options. ##
|
||||
## --------------------------------- ##
|
||||
|
||||
# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
|
||||
# -----------------------------------------
|
||||
m4_define([_LT_MANGLE_DEFUN],
|
||||
[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
|
||||
|
||||
|
||||
# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
|
||||
# -----------------------------------------------
|
||||
m4_define([LT_OPTION_DEFINE],
|
||||
[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
|
||||
])# LT_OPTION_DEFINE
|
||||
|
||||
|
||||
# dlopen
|
||||
# ------
|
||||
LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
|
||||
])
|
||||
|
||||
AU_DEFUN([AC_LIBTOOL_DLOPEN],
|
||||
[_LT_SET_OPTION([LT_INIT], [dlopen])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the 'dlopen' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
|
||||
|
||||
|
||||
# win32-dll
|
||||
# ---------
|
||||
# Declare package support for building win32 dll's.
|
||||
LT_OPTION_DEFINE([LT_INIT], [win32-dll],
|
||||
[enable_win32_dll=yes
|
||||
|
||||
case $host in
|
||||
*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
|
||||
AC_CHECK_TOOL(AS, as, false)
|
||||
AC_CHECK_TOOL(DLLTOOL, dlltool, false)
|
||||
AC_CHECK_TOOL(OBJDUMP, objdump, false)
|
||||
;;
|
||||
esac
|
||||
|
||||
test -z "$AS" && AS=as
|
||||
_LT_DECL([], [AS], [1], [Assembler program])dnl
|
||||
|
||||
test -z "$DLLTOOL" && DLLTOOL=dlltool
|
||||
_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
|
||||
|
||||
test -z "$OBJDUMP" && OBJDUMP=objdump
|
||||
_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
|
||||
])# win32-dll
|
||||
|
||||
AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
|
||||
[AC_REQUIRE([AC_CANONICAL_HOST])dnl
|
||||
_LT_SET_OPTION([LT_INIT], [win32-dll])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the 'win32-dll' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
|
||||
|
||||
|
||||
# _LT_ENABLE_SHARED([DEFAULT])
|
||||
# ----------------------------
|
||||
# implement the --enable-shared flag, and supports the 'shared' and
|
||||
# 'disable-shared' LT_INIT options.
|
||||
# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'.
|
||||
m4_define([_LT_ENABLE_SHARED],
|
||||
[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([shared],
|
||||
[AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
|
||||
[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_shared=yes ;;
|
||||
no) enable_shared=no ;;
|
||||
*)
|
||||
enable_shared=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
|
||||
for pkg in $enableval; do
|
||||
IFS=$lt_save_ifs
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_shared=yes
|
||||
fi
|
||||
done
|
||||
IFS=$lt_save_ifs
|
||||
;;
|
||||
esac],
|
||||
[enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
|
||||
|
||||
_LT_DECL([build_libtool_libs], [enable_shared], [0],
|
||||
[Whether or not to build shared libraries])
|
||||
])# _LT_ENABLE_SHARED
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
|
||||
|
||||
# Old names:
|
||||
AC_DEFUN([AC_ENABLE_SHARED],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
|
||||
])
|
||||
|
||||
AC_DEFUN([AC_DISABLE_SHARED],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-shared])
|
||||
])
|
||||
|
||||
AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
|
||||
AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AM_ENABLE_SHARED], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_SHARED], [])
|
||||
|
||||
|
||||
|
||||
# _LT_ENABLE_STATIC([DEFAULT])
|
||||
# ----------------------------
|
||||
# implement the --enable-static flag, and support the 'static' and
|
||||
# 'disable-static' LT_INIT options.
|
||||
# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'.
|
||||
m4_define([_LT_ENABLE_STATIC],
|
||||
[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([static],
|
||||
[AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
|
||||
[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_static=yes ;;
|
||||
no) enable_static=no ;;
|
||||
*)
|
||||
enable_static=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
|
||||
for pkg in $enableval; do
|
||||
IFS=$lt_save_ifs
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_static=yes
|
||||
fi
|
||||
done
|
||||
IFS=$lt_save_ifs
|
||||
;;
|
||||
esac],
|
||||
[enable_static=]_LT_ENABLE_STATIC_DEFAULT)
|
||||
|
||||
_LT_DECL([build_old_libs], [enable_static], [0],
|
||||
[Whether or not to build static libraries])
|
||||
])# _LT_ENABLE_STATIC
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
|
||||
|
||||
# Old names:
|
||||
AC_DEFUN([AC_ENABLE_STATIC],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
|
||||
])
|
||||
|
||||
AC_DEFUN([AC_DISABLE_STATIC],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-static])
|
||||
])
|
||||
|
||||
AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
|
||||
AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AM_ENABLE_STATIC], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_STATIC], [])
|
||||
|
||||
|
||||
|
||||
# _LT_ENABLE_FAST_INSTALL([DEFAULT])
|
||||
# ----------------------------------
|
||||
# implement the --enable-fast-install flag, and support the 'fast-install'
|
||||
# and 'disable-fast-install' LT_INIT options.
|
||||
# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'.
|
||||
m4_define([_LT_ENABLE_FAST_INSTALL],
|
||||
[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([fast-install],
|
||||
[AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
|
||||
[optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_fast_install=yes ;;
|
||||
no) enable_fast_install=no ;;
|
||||
*)
|
||||
enable_fast_install=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
|
||||
for pkg in $enableval; do
|
||||
IFS=$lt_save_ifs
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_fast_install=yes
|
||||
fi
|
||||
done
|
||||
IFS=$lt_save_ifs
|
||||
;;
|
||||
esac],
|
||||
[enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
|
||||
|
||||
_LT_DECL([fast_install], [enable_fast_install], [0],
|
||||
[Whether or not to optimize for fast installation])dnl
|
||||
])# _LT_ENABLE_FAST_INSTALL
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
|
||||
|
||||
# Old names:
|
||||
AU_DEFUN([AC_ENABLE_FAST_INSTALL],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
|
||||
the 'fast-install' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
AU_DEFUN([AC_DISABLE_FAST_INSTALL],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
|
||||
the 'disable-fast-install' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
|
||||
|
||||
|
||||
# _LT_WITH_AIX_SONAME([DEFAULT])
|
||||
# ----------------------------------
|
||||
# implement the --with-aix-soname flag, and support the `aix-soname=aix'
|
||||
# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT
|
||||
# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'.
|
||||
m4_define([_LT_WITH_AIX_SONAME],
|
||||
[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl
|
||||
shared_archive_member_spec=
|
||||
case $host,$enable_shared in
|
||||
power*-*-aix[[5-9]]*,yes)
|
||||
AC_MSG_CHECKING([which variant of shared library versioning to provide])
|
||||
AC_ARG_WITH([aix-soname],
|
||||
[AS_HELP_STRING([--with-aix-soname=aix|svr4|both],
|
||||
[shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])],
|
||||
[case $withval in
|
||||
aix|svr4|both)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([Unknown argument to --with-aix-soname])
|
||||
;;
|
||||
esac
|
||||
lt_cv_with_aix_soname=$with_aix_soname],
|
||||
[AC_CACHE_VAL([lt_cv_with_aix_soname],
|
||||
[lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT)
|
||||
with_aix_soname=$lt_cv_with_aix_soname])
|
||||
AC_MSG_RESULT([$with_aix_soname])
|
||||
if test aix != "$with_aix_soname"; then
|
||||
# For the AIX way of multilib, we name the shared archive member
|
||||
# based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o',
|
||||
# and 'shr.imp' or 'shr_64.imp', respectively, for the Import File.
|
||||
# Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag,
|
||||
# the AIX toolchain works better with OBJECT_MODE set (default 32).
|
||||
if test 64 = "${OBJECT_MODE-32}"; then
|
||||
shared_archive_member_spec=shr_64
|
||||
else
|
||||
shared_archive_member_spec=shr
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
with_aix_soname=aix
|
||||
;;
|
||||
esac
|
||||
|
||||
_LT_DECL([], [shared_archive_member_spec], [0],
|
||||
[Shared archive member basename, for filename based shared library versioning on AIX])dnl
|
||||
])# _LT_WITH_AIX_SONAME
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])])
|
||||
|
||||
|
||||
# _LT_WITH_PIC([MODE])
|
||||
# --------------------
|
||||
# implement the --with-pic flag, and support the 'pic-only' and 'no-pic'
|
||||
# LT_INIT options.
|
||||
# MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'.
|
||||
m4_define([_LT_WITH_PIC],
|
||||
[AC_ARG_WITH([pic],
|
||||
[AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
|
||||
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
|
||||
[lt_p=${PACKAGE-default}
|
||||
case $withval in
|
||||
yes|no) pic_mode=$withval ;;
|
||||
*)
|
||||
pic_mode=default
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
|
||||
for lt_pkg in $withval; do
|
||||
IFS=$lt_save_ifs
|
||||
if test "X$lt_pkg" = "X$lt_p"; then
|
||||
pic_mode=yes
|
||||
fi
|
||||
done
|
||||
IFS=$lt_save_ifs
|
||||
;;
|
||||
esac],
|
||||
[pic_mode=m4_default([$1], [default])])
|
||||
|
||||
_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
|
||||
])# _LT_WITH_PIC
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
|
||||
|
||||
# Old name:
|
||||
AU_DEFUN([AC_LIBTOOL_PICMODE],
|
||||
[_LT_SET_OPTION([LT_INIT], [pic-only])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the 'pic-only' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
|
||||
|
||||
## ----------------- ##
|
||||
## LTDL_INIT Options ##
|
||||
## ----------------- ##
|
||||
|
||||
m4_define([_LTDL_MODE], [])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
|
||||
[m4_define([_LTDL_MODE], [nonrecursive])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [recursive],
|
||||
[m4_define([_LTDL_MODE], [recursive])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [subproject],
|
||||
[m4_define([_LTDL_MODE], [subproject])])
|
||||
|
||||
m4_define([_LTDL_TYPE], [])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [installable],
|
||||
[m4_define([_LTDL_TYPE], [installable])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [convenience],
|
||||
[m4_define([_LTDL_TYPE], [convenience])])
|
124
pcre2/m4/ltsugar.m4
vendored
Normal file
124
pcre2/m4/ltsugar.m4
vendored
Normal file
|
@ -0,0 +1,124 @@
|
|||
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
|
||||
#
|
||||
# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software
|
||||
# Foundation, Inc.
|
||||
# Written by Gary V. Vaughan, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6 ltsugar.m4
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
|
||||
|
||||
|
||||
# lt_join(SEP, ARG1, [ARG2...])
|
||||
# -----------------------------
|
||||
# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
|
||||
# associated separator.
|
||||
# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
|
||||
# versions in m4sugar had bugs.
|
||||
m4_define([lt_join],
|
||||
[m4_if([$#], [1], [],
|
||||
[$#], [2], [[$2]],
|
||||
[m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
|
||||
m4_define([_lt_join],
|
||||
[m4_if([$#$2], [2], [],
|
||||
[m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
|
||||
|
||||
|
||||
# lt_car(LIST)
|
||||
# lt_cdr(LIST)
|
||||
# ------------
|
||||
# Manipulate m4 lists.
|
||||
# These macros are necessary as long as will still need to support
|
||||
# Autoconf-2.59, which quotes differently.
|
||||
m4_define([lt_car], [[$1]])
|
||||
m4_define([lt_cdr],
|
||||
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
|
||||
[$#], 1, [],
|
||||
[m4_dquote(m4_shift($@))])])
|
||||
m4_define([lt_unquote], $1)
|
||||
|
||||
|
||||
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
|
||||
# ------------------------------------------
|
||||
# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'.
|
||||
# Note that neither SEPARATOR nor STRING are expanded; they are appended
|
||||
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
|
||||
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
|
||||
# than defined and empty).
|
||||
#
|
||||
# This macro is needed until we can rely on Autoconf 2.62, since earlier
|
||||
# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
|
||||
m4_define([lt_append],
|
||||
[m4_define([$1],
|
||||
m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
|
||||
|
||||
|
||||
|
||||
# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
|
||||
# ----------------------------------------------------------
|
||||
# Produce a SEP delimited list of all paired combinations of elements of
|
||||
# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list
|
||||
# has the form PREFIXmINFIXSUFFIXn.
|
||||
# Needed until we can rely on m4_combine added in Autoconf 2.62.
|
||||
m4_define([lt_combine],
|
||||
[m4_if(m4_eval([$# > 3]), [1],
|
||||
[m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
|
||||
[[m4_foreach([_Lt_prefix], [$2],
|
||||
[m4_foreach([_Lt_suffix],
|
||||
]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
|
||||
[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
|
||||
|
||||
|
||||
# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
|
||||
# -----------------------------------------------------------------------
|
||||
# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
|
||||
# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
|
||||
m4_define([lt_if_append_uniq],
|
||||
[m4_ifdef([$1],
|
||||
[m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
|
||||
[lt_append([$1], [$2], [$3])$4],
|
||||
[$5])],
|
||||
[lt_append([$1], [$2], [$3])$4])])
|
||||
|
||||
|
||||
# lt_dict_add(DICT, KEY, VALUE)
|
||||
# -----------------------------
|
||||
m4_define([lt_dict_add],
|
||||
[m4_define([$1($2)], [$3])])
|
||||
|
||||
|
||||
# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
|
||||
# --------------------------------------------
|
||||
m4_define([lt_dict_add_subkey],
|
||||
[m4_define([$1($2:$3)], [$4])])
|
||||
|
||||
|
||||
# lt_dict_fetch(DICT, KEY, [SUBKEY])
|
||||
# ----------------------------------
|
||||
m4_define([lt_dict_fetch],
|
||||
[m4_ifval([$3],
|
||||
m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
|
||||
m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
|
||||
|
||||
|
||||
# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
|
||||
# -----------------------------------------------------------------
|
||||
m4_define([lt_if_dict_fetch],
|
||||
[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
|
||||
[$5],
|
||||
[$6])])
|
||||
|
||||
|
||||
# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
|
||||
# --------------------------------------------------------------
|
||||
m4_define([lt_dict_filter],
|
||||
[m4_if([$5], [], [],
|
||||
[lt_join(m4_quote(m4_default([$4], [[, ]])),
|
||||
lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
|
||||
[lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
|
||||
])
|
23
pcre2/m4/ltversion.m4
vendored
Normal file
23
pcre2/m4/ltversion.m4
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
# ltversion.m4 -- version numbers -*- Autoconf -*-
|
||||
#
|
||||
# Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc.
|
||||
# Written by Scott James Remnant, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# @configure_input@
|
||||
|
||||
# serial 4179 ltversion.m4
|
||||
# This file is part of GNU Libtool
|
||||
|
||||
m4_define([LT_PACKAGE_VERSION], [2.4.6])
|
||||
m4_define([LT_PACKAGE_REVISION], [2.4.6])
|
||||
|
||||
AC_DEFUN([LTVERSION_VERSION],
|
||||
[macro_version='2.4.6'
|
||||
macro_revision='2.4.6'
|
||||
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
|
||||
_LT_DECL(, macro_revision, 0)
|
||||
])
|
99
pcre2/m4/lt~obsolete.m4
vendored
Normal file
99
pcre2/m4/lt~obsolete.m4
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
|
||||
#
|
||||
# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software
|
||||
# Foundation, Inc.
|
||||
# Written by Scott James Remnant, 2004.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 5 lt~obsolete.m4
|
||||
|
||||
# These exist entirely to fool aclocal when bootstrapping libtool.
|
||||
#
|
||||
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN),
|
||||
# which have later been changed to m4_define as they aren't part of the
|
||||
# exported API, or moved to Autoconf or Automake where they belong.
|
||||
#
|
||||
# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN
|
||||
# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
|
||||
# using a macro with the same name in our local m4/libtool.m4 it'll
|
||||
# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
|
||||
# and doesn't know about Autoconf macros at all.)
|
||||
#
|
||||
# So we provide this file, which has a silly filename so it's always
|
||||
# included after everything else. This provides aclocal with the
|
||||
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
|
||||
# because those macros already exist, or will be overwritten later.
|
||||
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
|
||||
#
|
||||
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
|
||||
# Yes, that means every name once taken will need to remain here until
|
||||
# we give up compatibility with versions before 1.7, at which point
|
||||
# we need to keep only those names which we still refer to.
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
|
||||
|
||||
m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
|
||||
m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])])
|
||||
m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
|
||||
m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])])
|
||||
m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
|
||||
m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])])
|
||||
m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])])
|
||||
m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
|
||||
m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])])
|
||||
m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])])
|
||||
m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
|
||||
m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
|
||||
m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
|
||||
m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
|
||||
m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])])
|
||||
m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])])
|
||||
m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
|
||||
m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
|
||||
m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
|
||||
m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])])
|
||||
m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])])
|
||||
m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])])
|
||||
m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
|
||||
m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
|
||||
m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])])
|
||||
m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])])
|
||||
m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
|
||||
m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])])
|
||||
m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
|
||||
m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])])
|
||||
m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])])
|
||||
m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])])
|
||||
m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
|
||||
m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
|
||||
m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])])
|
||||
m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
|
||||
m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
|
||||
m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])])
|
||||
m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])])
|
||||
m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])
|
87
pcre2/m4/pcre2_visibility.m4
Normal file
87
pcre2/m4/pcre2_visibility.m4
Normal file
|
@ -0,0 +1,87 @@
|
|||
# visibility.m4 serial 4 (gettext-0.18.2)
|
||||
dnl Copyright (C) 2005, 2008, 2010-2011 Free Software Foundation, Inc.
|
||||
dnl This file is free software; the Free Software Foundation
|
||||
dnl gives unlimited permission to copy and/or distribute it,
|
||||
dnl with or without modifications, as long as this notice is preserved.
|
||||
|
||||
dnl From Bruno Haible.
|
||||
|
||||
dnl Tests whether the compiler supports the command-line option
|
||||
dnl -fvisibility=hidden and the function and variable attributes
|
||||
dnl __attribute__((__visibility__("hidden"))) and
|
||||
dnl __attribute__((__visibility__("default"))).
|
||||
dnl Does *not* test for __visibility__("protected") - which has tricky
|
||||
dnl semantics (see the 'vismain' test in glibc) and does not exist e.g. on
|
||||
dnl MacOS X.
|
||||
dnl Does *not* test for __visibility__("internal") - which has processor
|
||||
dnl dependent semantics.
|
||||
dnl Does *not* test for #pragma GCC visibility push(hidden) - which is
|
||||
dnl "really only recommended for legacy code".
|
||||
dnl Set the variable CFLAG_VISIBILITY.
|
||||
dnl Defines and sets the variable HAVE_VISIBILITY.
|
||||
|
||||
dnl Modified to fit with PCRE build environment by Cristian Rodríguez.
|
||||
dnl Adjusted for PCRE2 by PH
|
||||
|
||||
AC_DEFUN([PCRE2_VISIBILITY],
|
||||
[
|
||||
AC_REQUIRE([AC_PROG_CC])
|
||||
VISIBILITY_CFLAGS=
|
||||
VISIBILITY_CXXFLAGS=
|
||||
HAVE_VISIBILITY=0
|
||||
if test -n "$GCC"; then
|
||||
dnl First, check whether -Werror can be added to the command line, or
|
||||
dnl whether it leads to an error because of some other option that the
|
||||
dnl user has put into $CC $CFLAGS $CPPFLAGS.
|
||||
AC_MSG_CHECKING([whether the -Werror option is usable])
|
||||
AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [
|
||||
pcre2_save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -Werror"
|
||||
AC_COMPILE_IFELSE(
|
||||
[AC_LANG_PROGRAM([[]], [[]])],
|
||||
[pcre2_cv_cc_vis_werror=yes],
|
||||
[pcre2_cv_cc_vis_werror=no])
|
||||
CFLAGS="$pcre2_save_CFLAGS"])
|
||||
AC_MSG_RESULT([$pcre2_cv_cc_vis_werror])
|
||||
dnl Now check whether visibility declarations are supported.
|
||||
AC_MSG_CHECKING([for simple visibility declarations])
|
||||
AC_CACHE_VAL([pcre2_cv_cc_visibility], [
|
||||
pcre2_save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -fvisibility=hidden"
|
||||
dnl We use the option -Werror and a function dummyfunc, because on some
|
||||
dnl platforms (Cygwin 1.7) the use of -fvisibility triggers a warning
|
||||
dnl "visibility attribute not supported in this configuration; ignored"
|
||||
dnl at the first function definition in every compilation unit, and we
|
||||
dnl don't want to use the option in this case.
|
||||
if test $pcre2_cv_cc_vis_werror = yes; then
|
||||
CFLAGS="$CFLAGS -Werror"
|
||||
fi
|
||||
AC_COMPILE_IFELSE(
|
||||
[AC_LANG_PROGRAM(
|
||||
[[extern __attribute__((__visibility__("hidden"))) int hiddenvar;
|
||||
extern __attribute__((__visibility__("default"))) int exportedvar;
|
||||
extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void);
|
||||
extern __attribute__((__visibility__("default"))) int exportedfunc (void);
|
||||
void dummyfunc (void) {}
|
||||
]],
|
||||
[[]])],
|
||||
[pcre2_cv_cc_visibility=yes],
|
||||
[pcre2_cv_cc_visibility=no])
|
||||
CFLAGS="$pcre2_save_CFLAGS"])
|
||||
AC_MSG_RESULT([$pcre2_cv_cc_visibility])
|
||||
if test $pcre2_cv_cc_visibility = yes; then
|
||||
VISIBILITY_CFLAGS="-fvisibility=hidden"
|
||||
VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden"
|
||||
HAVE_VISIBILITY=1
|
||||
AC_DEFINE(PCRE2_EXP_DECL, [extern __attribute__ ((visibility ("default")))], [to make a symbol visible])
|
||||
AC_DEFINE(PCRE2_EXP_DEFN, [__attribute__ ((visibility ("default")))], [to make a symbol visible])
|
||||
AC_DEFINE(PCRE2POSIX_EXP_DECL, [extern __attribute__ ((visibility ("default")))], [to make a symbol visible])
|
||||
AC_DEFINE(PCRE2POSIX_EXP_DEFN, [extern __attribute__ ((visibility ("default")))], [to make a symbol visible])
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([VISIBILITY_CFLAGS])
|
||||
AC_SUBST([VISIBILITY_CXXFLAGS])
|
||||
AC_SUBST([HAVE_VISIBILITY])
|
||||
AC_DEFINE_UNQUOTED([HAVE_VISIBILITY], [$HAVE_VISIBILITY],
|
||||
[Define to 1 if the compiler supports simple visibility declarations.])
|
||||
])
|
215
pcre2/missing
Executable file
215
pcre2/missing
Executable file
|
@ -0,0 +1,215 @@
|
|||
#! /bin/sh
|
||||
# Common wrapper for a few potentially missing GNU programs.
|
||||
|
||||
scriptversion=2013-10-28.13; # UTC
|
||||
|
||||
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
|
||||
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo 1>&2 "Try '$0 --help' for more information"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case $1 in
|
||||
|
||||
--is-lightweight)
|
||||
# Used by our autoconf macros to check whether the available missing
|
||||
# script is modern enough.
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--run)
|
||||
# Back-compat with the calling convention used by older automake.
|
||||
shift
|
||||
;;
|
||||
|
||||
-h|--h|--he|--hel|--help)
|
||||
echo "\
|
||||
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||
|
||||
Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
|
||||
to PROGRAM being missing or too old.
|
||||
|
||||
Options:
|
||||
-h, --help display this help and exit
|
||||
-v, --version output version information and exit
|
||||
|
||||
Supported PROGRAM values:
|
||||
aclocal autoconf autoheader autom4te automake makeinfo
|
||||
bison yacc flex lex help2man
|
||||
|
||||
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
|
||||
'g' are ignored when checking the name.
|
||||
|
||||
Send bug reports to <bug-automake@gnu.org>."
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||
echo "missing $scriptversion (GNU Automake)"
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-*)
|
||||
echo 1>&2 "$0: unknown '$1' option"
|
||||
echo 1>&2 "Try '$0 --help' for more information"
|
||||
exit 1
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# Run the given program, remember its exit status.
|
||||
"$@"; st=$?
|
||||
|
||||
# If it succeeded, we are done.
|
||||
test $st -eq 0 && exit 0
|
||||
|
||||
# Also exit now if we it failed (or wasn't found), and '--version' was
|
||||
# passed; such an option is passed most likely to detect whether the
|
||||
# program is present and works.
|
||||
case $2 in --version|--help) exit $st;; esac
|
||||
|
||||
# Exit code 63 means version mismatch. This often happens when the user
|
||||
# tries to use an ancient version of a tool on a file that requires a
|
||||
# minimum version.
|
||||
if test $st -eq 63; then
|
||||
msg="probably too old"
|
||||
elif test $st -eq 127; then
|
||||
# Program was missing.
|
||||
msg="missing on your system"
|
||||
else
|
||||
# Program was found and executed, but failed. Give up.
|
||||
exit $st
|
||||
fi
|
||||
|
||||
perl_URL=http://www.perl.org/
|
||||
flex_URL=http://flex.sourceforge.net/
|
||||
gnu_software_URL=http://www.gnu.org/software
|
||||
|
||||
program_details ()
|
||||
{
|
||||
case $1 in
|
||||
aclocal|automake)
|
||||
echo "The '$1' program is part of the GNU Automake package:"
|
||||
echo "<$gnu_software_URL/automake>"
|
||||
echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
|
||||
echo "<$gnu_software_URL/autoconf>"
|
||||
echo "<$gnu_software_URL/m4/>"
|
||||
echo "<$perl_URL>"
|
||||
;;
|
||||
autoconf|autom4te|autoheader)
|
||||
echo "The '$1' program is part of the GNU Autoconf package:"
|
||||
echo "<$gnu_software_URL/autoconf/>"
|
||||
echo "It also requires GNU m4 and Perl in order to run:"
|
||||
echo "<$gnu_software_URL/m4/>"
|
||||
echo "<$perl_URL>"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
give_advice ()
|
||||
{
|
||||
# Normalize program name to check for.
|
||||
normalized_program=`echo "$1" | sed '
|
||||
s/^gnu-//; t
|
||||
s/^gnu//; t
|
||||
s/^g//; t'`
|
||||
|
||||
printf '%s\n' "'$1' is $msg."
|
||||
|
||||
configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
|
||||
case $normalized_program in
|
||||
autoconf*)
|
||||
echo "You should only need it if you modified 'configure.ac',"
|
||||
echo "or m4 files included by it."
|
||||
program_details 'autoconf'
|
||||
;;
|
||||
autoheader*)
|
||||
echo "You should only need it if you modified 'acconfig.h' or"
|
||||
echo "$configure_deps."
|
||||
program_details 'autoheader'
|
||||
;;
|
||||
automake*)
|
||||
echo "You should only need it if you modified 'Makefile.am' or"
|
||||
echo "$configure_deps."
|
||||
program_details 'automake'
|
||||
;;
|
||||
aclocal*)
|
||||
echo "You should only need it if you modified 'acinclude.m4' or"
|
||||
echo "$configure_deps."
|
||||
program_details 'aclocal'
|
||||
;;
|
||||
autom4te*)
|
||||
echo "You might have modified some maintainer files that require"
|
||||
echo "the 'autom4te' program to be rebuilt."
|
||||
program_details 'autom4te'
|
||||
;;
|
||||
bison*|yacc*)
|
||||
echo "You should only need it if you modified a '.y' file."
|
||||
echo "You may want to install the GNU Bison package:"
|
||||
echo "<$gnu_software_URL/bison/>"
|
||||
;;
|
||||
lex*|flex*)
|
||||
echo "You should only need it if you modified a '.l' file."
|
||||
echo "You may want to install the Fast Lexical Analyzer package:"
|
||||
echo "<$flex_URL>"
|
||||
;;
|
||||
help2man*)
|
||||
echo "You should only need it if you modified a dependency" \
|
||||
"of a man page."
|
||||
echo "You may want to install the GNU Help2man package:"
|
||||
echo "<$gnu_software_URL/help2man/>"
|
||||
;;
|
||||
makeinfo*)
|
||||
echo "You should only need it if you modified a '.texi' file, or"
|
||||
echo "any other file indirectly affecting the aspect of the manual."
|
||||
echo "You might want to install the Texinfo package:"
|
||||
echo "<$gnu_software_URL/texinfo/>"
|
||||
echo "The spurious makeinfo call might also be the consequence of"
|
||||
echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
|
||||
echo "want to install GNU make:"
|
||||
echo "<$gnu_software_URL/make/>"
|
||||
;;
|
||||
*)
|
||||
echo "You might have modified some files without having the proper"
|
||||
echo "tools for further handling them. Check the 'README' file, it"
|
||||
echo "often tells you about the needed prerequisites for installing"
|
||||
echo "this package. You may also peek at any GNU archive site, in"
|
||||
echo "case some other package contains this missing '$1' program."
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
give_advice "$1" | sed -e '1s/^/WARNING: /' \
|
||||
-e '2,$s/^/ /' >&2
|
||||
|
||||
# Propagate the correct exit status (expected to be 127 for a program
|
||||
# not found, 63 for a program that failed due to version mismatch).
|
||||
exit $st
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
121
pcre2/pcre2-config.in
Normal file
121
pcre2/pcre2-config.in
Normal file
|
@ -0,0 +1,121 @@
|
|||
#!/bin/sh
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
exec_prefix_set=no
|
||||
|
||||
cflags="[--cflags]"
|
||||
libs=
|
||||
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
libs="[--libs16] $libs"
|
||||
fi
|
||||
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
libs="[--libs32] $libs"
|
||||
fi
|
||||
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
libs="[--libs8] [--libs-posix] $libs"
|
||||
cflags="$cflags [--cflags-posix]"
|
||||
fi
|
||||
|
||||
usage="Usage: pcre2-config [--prefix] [--exec-prefix] [--version] $libs $cflags"
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
libR=
|
||||
case `uname -s` in
|
||||
*SunOS*)
|
||||
libR=" -R@libdir@"
|
||||
;;
|
||||
*BSD*)
|
||||
libR=" -Wl,-R@libdir@"
|
||||
;;
|
||||
esac
|
||||
|
||||
libS=
|
||||
if test @libdir@ != /usr/lib ; then
|
||||
libS=-L@libdir@
|
||||
fi
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case $1 in
|
||||
--prefix=*)
|
||||
prefix=$optarg
|
||||
if test $exec_prefix_set = no ; then
|
||||
exec_prefix=$optarg
|
||||
fi
|
||||
;;
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
--exec-prefix=*)
|
||||
exec_prefix=$optarg
|
||||
exec_prefix_set=yes
|
||||
;;
|
||||
--exec-prefix)
|
||||
echo $exec_prefix
|
||||
;;
|
||||
--version)
|
||||
echo @PACKAGE_VERSION@
|
||||
;;
|
||||
--cflags)
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes @PCRE2_STATIC_CFLAG@
|
||||
;;
|
||||
--cflags-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes @PCRE2_STATIC_CFLAG@
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs-posix)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2posix -lpcre2-8
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs8)
|
||||
if test @enable_pcre2_8@ = yes ; then
|
||||
echo $libS$libR -lpcre2-8
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs16)
|
||||
if test @enable_pcre2_16@ = yes ; then
|
||||
echo $libS$libR -lpcre2-16
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
--libs32)
|
||||
if test @enable_pcre2_32@ = yes ; then
|
||||
echo $libS$libR -lpcre2-32
|
||||
else
|
||||
echo "${usage}" 1>&2
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
297
pcre2/perltest.sh
Executable file
297
pcre2/perltest.sh
Executable file
|
@ -0,0 +1,297 @@
|
|||
#! /bin/sh
|
||||
|
||||
# Script for testing regular expressions with perl to check that PCRE2 handles
|
||||
# them the same. The Perl code has to have "use utf8" and "require Encode" at
|
||||
# the start when running UTF-8 tests, but *not* for non-utf8 tests. (The
|
||||
# "require" would actually be OK for non-utf8-tests, but is not always
|
||||
# installed, so this way the script will always run for these tests.)
|
||||
#
|
||||
# The desired effect is achieved by making this a shell script that passes the
|
||||
# Perl script to Perl through a pipe. If the first argument is "-utf8", a
|
||||
# suitable prefix is set up.
|
||||
#
|
||||
# The remaining arguments, if any, are passed to Perl. They are an input file
|
||||
# and an output file. If there is one argument, the output is written to
|
||||
# STDOUT. If Perl receives no arguments, it opens /dev/tty as input, and writes
|
||||
# output to STDOUT. (I haven't found a way of getting it to use STDIN, because
|
||||
# of the contorted piping input.)
|
||||
|
||||
perl=perl
|
||||
prefix=''
|
||||
if [ $# -gt 0 -a "$1" = "-utf8" ] ; then
|
||||
prefix="use utf8; require Encode;"
|
||||
shift
|
||||
fi
|
||||
|
||||
|
||||
# The Perl script that follows has a similar specification to pcre2test, and so
|
||||
# can be given identical input, except that input patterns can be followed only
|
||||
# by Perl's lower case modifiers and certain other pcre2test modifiers that are
|
||||
# either handled or ignored:
|
||||
#
|
||||
# aftertext interpreted as "print $' afterwards"
|
||||
# afteralltext ignored
|
||||
# dupnames ignored (Perl always allows)
|
||||
# mark ignored
|
||||
# no_auto_possess ignored
|
||||
# no_start_optimize ignored
|
||||
# ucp sets Perl's /u modifier
|
||||
# utf invoke UTF-8 functionality
|
||||
#
|
||||
# The data lines must not have any pcre2test modifiers. They are processed as
|
||||
# Perl double-quoted strings, so if they contain " $ or @ characters, these
|
||||
# have to be escaped. For this reason, all such characters in the
|
||||
# Perl-compatible testinput1 and testinput4 files are escaped so that they can
|
||||
# be used for perltest as well as for pcre2test. The output from this script
|
||||
# should be same as from pcre2test, apart from the initial identifying banner.
|
||||
#
|
||||
# The other testinput files are not suitable for feeding to perltest.sh,
|
||||
# because they make use of the special modifiers that pcre2test uses for
|
||||
# testing features of PCRE2. Some of these files also contain malformed regular
|
||||
# expressions, in order to check that PCRE2 diagnoses them correctly.
|
||||
|
||||
(echo "$prefix" ; cat <<'PERLEND'
|
||||
|
||||
# Function for turning a string into a string of printing chars.
|
||||
|
||||
sub pchars {
|
||||
my($t) = "";
|
||||
if ($utf8)
|
||||
{
|
||||
@p = unpack('U*', $_[0]);
|
||||
foreach $c (@p)
|
||||
{
|
||||
if ($c >= 32 && $c < 127) { $t .= chr $c; }
|
||||
else { $t .= sprintf("\\x{%02x}", $c);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach $c (split(//, $_[0]))
|
||||
{
|
||||
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
|
||||
else { $t .= sprintf("\\x%02x", ord $c); }
|
||||
}
|
||||
}
|
||||
$t;
|
||||
}
|
||||
|
||||
|
||||
# Read lines from a named file or stdin and write to a named file or stdout;
|
||||
# lines consist of a regular expression, in delimiters and optionally followed
|
||||
# by options, followed by a set of test data, terminated by an empty line.
|
||||
|
||||
# Sort out the input and output files
|
||||
|
||||
if (@ARGV > 0)
|
||||
{
|
||||
open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
|
||||
$infile = "INFILE";
|
||||
$interact = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
open(INFILE, "</dev/tty") || die "Failed to open /dev/tty\n";
|
||||
$infile = "INFILE";
|
||||
$interact = 1;
|
||||
}
|
||||
|
||||
if (@ARGV > 1)
|
||||
{
|
||||
open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
|
||||
$outfile = "OUTFILE";
|
||||
}
|
||||
else { $outfile = "STDOUT"; }
|
||||
|
||||
printf($outfile "Perl $] Regular Expressions\n\n");
|
||||
|
||||
# Main loop
|
||||
|
||||
NEXT_RE:
|
||||
for (;;)
|
||||
{
|
||||
printf " re> " if $interact;
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if ! $interact;
|
||||
next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
|
||||
|
||||
$pattern = $_;
|
||||
|
||||
while ($pattern !~ /^\s*(.).*\1/s)
|
||||
{
|
||||
printf " > " if $interact;
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if ! $interact;
|
||||
$pattern .= $_;
|
||||
}
|
||||
|
||||
chomp($pattern);
|
||||
$pattern =~ s/\s+$//;
|
||||
|
||||
# Split the pattern from the modifiers and adjust them as necessary.
|
||||
|
||||
$pattern =~ /^\s*((.).*\2)(.*)$/s;
|
||||
$pat = $1;
|
||||
$mod = $3;
|
||||
|
||||
# The private "aftertext" modifier means "print $' afterwards".
|
||||
|
||||
$showrest = ($mod =~ s/aftertext,?//);
|
||||
|
||||
# "allaftertext" is used by pcre2test to print remainders after captures
|
||||
|
||||
$mod =~ s/allaftertext,?//;
|
||||
|
||||
# Detect utf
|
||||
|
||||
$utf8 = $mod =~ s/utf,?//;
|
||||
|
||||
# Remove "dupnames".
|
||||
|
||||
$mod =~ s/dupnames,?//;
|
||||
|
||||
# Remove "mark" (asks pcre2test to check MARK data) */
|
||||
|
||||
$mod =~ s/mark,?//;
|
||||
|
||||
# "ucp" asks pcre2test to set PCRE2_UCP; change this to /u for Perl
|
||||
|
||||
$mod =~ s/ucp,?/u/;
|
||||
|
||||
# Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
|
||||
|
||||
$mod =~ s/no_auto_possess,?//;
|
||||
$mod =~ s/no_start_optimize,?//;
|
||||
|
||||
# Add back retained modifiers and check that the pattern is valid.
|
||||
|
||||
$mod =~ s/,//g;
|
||||
$pattern = "$pat$mod";
|
||||
eval "\$_ =~ ${pattern}";
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@";
|
||||
if (! $interact)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
last if ! ($_ = <$infile>);
|
||||
last if $_ =~ /^\s*$/;
|
||||
}
|
||||
}
|
||||
next NEXT_RE;
|
||||
}
|
||||
|
||||
# If the /g modifier is present, we want to put a loop round the matching;
|
||||
# otherwise just a single "if".
|
||||
|
||||
$cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
|
||||
|
||||
# If the pattern is actually the null string, Perl uses the most recently
|
||||
# executed (and successfully compiled) regex is used instead. This is a
|
||||
# nasty trap for the unwary! The PCRE2 test suite does contain null strings
|
||||
# in places - if they are allowed through here all sorts of weird and
|
||||
# unexpected effects happen. To avoid this, we replace such patterns with
|
||||
# a non-null pattern that has the same effect.
|
||||
|
||||
$pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
|
||||
|
||||
# Read data lines and test them
|
||||
|
||||
for (;;)
|
||||
{
|
||||
printf "data> " if $interact;
|
||||
last NEXT_RE if ! ($_ = <$infile>);
|
||||
chomp;
|
||||
printf $outfile "%s", "$_\n" if ! $interact;
|
||||
|
||||
s/\s+$//; # Remove trailing space
|
||||
s/^\s+//; # Remove leading space
|
||||
|
||||
last if ($_ eq "");
|
||||
next if $_ =~ /^\\=(?:\s|$)/; # Comment line
|
||||
|
||||
$x = eval "\"$_\""; # To get escapes processed
|
||||
|
||||
# Empty array for holding results, ensure $REGERROR and $REGMARK are
|
||||
# unset, then do the matching.
|
||||
|
||||
@subs = ();
|
||||
|
||||
$pushes = "push \@subs,\$&;" .
|
||||
"push \@subs,\$1;" .
|
||||
"push \@subs,\$2;" .
|
||||
"push \@subs,\$3;" .
|
||||
"push \@subs,\$4;" .
|
||||
"push \@subs,\$5;" .
|
||||
"push \@subs,\$6;" .
|
||||
"push \@subs,\$7;" .
|
||||
"push \@subs,\$8;" .
|
||||
"push \@subs,\$9;" .
|
||||
"push \@subs,\$10;" .
|
||||
"push \@subs,\$11;" .
|
||||
"push \@subs,\$12;" .
|
||||
"push \@subs,\$13;" .
|
||||
"push \@subs,\$14;" .
|
||||
"push \@subs,\$15;" .
|
||||
"push \@subs,\$16;" .
|
||||
"push \@subs,\$'; }";
|
||||
|
||||
undef $REGERROR;
|
||||
undef $REGMARK;
|
||||
|
||||
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
|
||||
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@\n";
|
||||
next NEXT_RE;
|
||||
}
|
||||
elsif (scalar(@subs) == 0)
|
||||
{
|
||||
printf $outfile "No match";
|
||||
if (defined $REGERROR && $REGERROR != 1)
|
||||
{ printf $outfile (", mark = %s", &pchars($REGERROR)); }
|
||||
printf $outfile "\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
while (scalar(@subs) != 0)
|
||||
{
|
||||
printf $outfile (" 0: %s\n", &pchars($subs[0]));
|
||||
printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
|
||||
$last_printed = 0;
|
||||
for ($i = 1; $i <= 16; $i++)
|
||||
{
|
||||
if (defined $subs[$i])
|
||||
{
|
||||
while ($last_printed++ < $i-1)
|
||||
{ printf $outfile ("%2d: <unset>\n", $last_printed); }
|
||||
printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
|
||||
$last_printed = $i;
|
||||
}
|
||||
}
|
||||
splice(@subs, 0, 18);
|
||||
}
|
||||
|
||||
# It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
|
||||
# set and the input pattern was a UTF-8 string. We can, however, force
|
||||
# it to be so marked.
|
||||
|
||||
if (defined $REGMARK && $REGMARK != 1)
|
||||
{
|
||||
$xx = $REGMARK;
|
||||
$xx = Encode::decode_utf8($xx) if $utf8;
|
||||
printf $outfile ("MK: %s\n", &pchars($xx));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# printf $outfile "\n";
|
||||
|
||||
PERLEND
|
||||
) | $perl - $@
|
||||
|
||||
# End
|
306
pcre2/src/config.h.generic
Normal file
306
pcre2/src/config.h.generic
Normal file
|
@ -0,0 +1,306 @@
|
|||
/* src/config.h. Generated from config.h.in by configure. */
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
/* #undef EBCDIC_NL25 */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
/* #undef HAVE_BCOPY */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
/* #undef HAVE_BZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
/* #undef HAVE_DIRENT_H */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
/* #undef HAVE_INTTYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
/* #undef HAVE_LIMITS_H */
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
/* #undef HAVE_MEMMOVE */
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
/* #undef HAVE_MEMORY_H */
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
/* #undef HAVE_PTHREAD */
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
/* #undef HAVE_READLINE_HISTORY_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
/* #undef HAVE_STDLIB_H */
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
/* #undef HAVE_STRERROR */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
/* #undef HAVE_STRINGS_H */
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
/* #undef HAVE_STRING_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
/* #undef HAVE_SYS_STAT_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
/* #undef HAVE_SYS_TYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||
/* #undef HAVE_VISIBILITY */
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* PCRE2 uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't
|
||||
use recursion in the match() function; instead it creates its own stack by
|
||||
steam using memory from the heap. For more detail, see the comments and
|
||||
other stuff just above the match() function. */
|
||||
/* #undef HEAP_MATCH_RECURSE */
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
/* This is ignored unless you are using libtool. */
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre2_match(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if HEAP_MATCH_RECURSE is defined)
|
||||
that is used. The value of MATCH_LIMIT_RECURSION applies only to recursive
|
||||
calls of match(). To have any useful effect, it must be less than the value
|
||||
of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There
|
||||
is a runtime method for setting a different limit. */
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
/* #undef NEVER_BACKSLASH_C */
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
(ANYCRLF). */
|
||||
#ifndef NEWLINE_DEFAULT
|
||||
#define NEWLINE_DEFAULT 2
|
||||
#endif
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre2"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.21"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.21"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#ifndef PCRE2GREP_BUFSIZE
|
||||
#define PCRE2GREP_BUFSIZE 20480
|
||||
#endif
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
/* #undef PCRE2_DEBUG */
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
/* #undef PCRE2_EXP_DEFN */
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE2_STATIC */
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
/* #undef STDC_HEADERS */
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
/* #undef SUPPORT_LIBEDIT */
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_16 */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE2_8 */
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
/* #undef SUPPORT_UNICODE */
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.21"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef int64_t */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
297
pcre2/src/config.h.in
Normal file
297
pcre2/src/config.h.in
Normal file
|
@ -0,0 +1,297 @@
|
|||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
#undef BSR_ANYCRLF
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
#undef EBCDIC
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
#undef EBCDIC_NL25
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#undef HAVE_BCOPY
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#undef HAVE_BZLIB_H
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#undef HAVE_DIRENT_H
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
#undef HAVE_EDITLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
#undef HAVE_EDIT_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#undef HAVE_LIMITS_H
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#undef HAVE_MEMMOVE
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#undef HAVE_READLINE_HISTORY_H
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#undef HAVE_READLINE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||
#undef HAVE_VISIBILITY
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
#undef HAVE_WINDOWS_H
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#undef HAVE_ZLIB_H
|
||||
|
||||
/* PCRE2 uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't
|
||||
use recursion in the match() function; instead it creates its own stack by
|
||||
steam using memory from the heap. For more detail, see the comments and
|
||||
other stuff just above the match() function. */
|
||||
#undef HEAP_MATCH_RECURSE
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
#undef LINK_SIZE
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre2_match(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. */
|
||||
#undef MATCH_LIMIT
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if HEAP_MATCH_RECURSE is defined)
|
||||
that is used. The value of MATCH_LIMIT_RECURSION applies only to recursive
|
||||
calls of match(). To have any useful effect, it must be less than the value
|
||||
of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There
|
||||
is a runtime method for setting a different limit. */
|
||||
#undef MATCH_LIMIT_RECURSION
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_COUNT
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_SIZE
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
#undef NEVER_BACKSLASH_C
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
(ANYCRLF). */
|
||||
#undef NEWLINE_DEFAULT
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#undef PARENS_NEST_LIMIT
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#undef PCRE2GREP_BUFSIZE
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2POSIX_EXP_DECL
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2POSIX_EXP_DEFN
|
||||
|
||||
/* Define to any value to include debugging code. */
|
||||
#undef PCRE2_DEBUG
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2_EXP_DECL
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
#undef PCRE2_STATIC
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#undef SUPPORT_JIT
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
#undef SUPPORT_LIBBZ2
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
#undef SUPPORT_LIBEDIT
|
||||
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
#undef SUPPORT_LIBREADLINE
|
||||
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
#undef SUPPORT_LIBZ
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_JIT
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_16
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_32
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE2_8
|
||||
|
||||
/* Define to any value to enable support for Unicode and UTF encoding. This
|
||||
will work even in an EBCDIC environment, but it is incompatible with the
|
||||
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/Unicode, but not both at once. */
|
||||
#undef SUPPORT_UNICODE
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
#undef SUPPORT_VALGRIND
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
#undef const
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int64_t
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
#undef size_t
|
214
pcre2/src/dftables.c
Normal file
214
pcre2/src/dftables.c
Normal file
|
@ -0,0 +1,214 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE2. The tables are built according to the current
|
||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define DFTABLES /* pcre2_maketables.c notices this */
|
||||
#include "pcre2_maketables.c"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
int i = 1;
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
/* By default, the default C locale is used rather than what the building user
|
||||
happens to have set. However, if the -L option is given, set the locale from
|
||||
the LC_xxx environment variables. */
|
||||
|
||||
if (argc > 1 && strcmp(argv[1], "-L") == 0)
|
||||
{
|
||||
setlocale(LC_ALL, ""); /* Set from environment variables */
|
||||
i++;
|
||||
}
|
||||
|
||||
if (argc < i + 1)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are several fprintf() calls here, because gcc in pedantic mode
|
||||
complains about the very long string otherwise. */
|
||||
|
||||
fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
fprintf(f,
|
||||
"/* For z/OS, config.h is forced */\n"
|
||||
"#ifndef HAVE_CONFIG_H\n"
|
||||
"#define HAVE_CONFIG_H 1\n"
|
||||
"#endif\n\n");
|
||||
#endif
|
||||
|
||||
fprintf(f,
|
||||
"/* The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE2 is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre2_internal.h\"\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table contains bit maps for various character classes.\n"
|
||||
"Each map is 32 bytes long and the bits run from the least\n"
|
||||
"significant end of each byte. The classes that have their own\n"
|
||||
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
||||
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) fprintf(f, "\n");
|
||||
fprintf(f, "\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x hexadecimal digit\n"
|
||||
" 0x%02x alphanumeric or '_'\n"
|
||||
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
||||
ctype_meta);
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
fprintf(f, " /* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
|
||||
fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of dftables.c */
|
722
pcre2/src/pcre2.h.generic
Normal file
722
pcre2/src/pcre2.h.generic
Normal file
|
@ -0,0 +1,722 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE2_H
|
||||
#define _PCRE2_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 21
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2016-01-12
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
|
||||
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
functions (though pcre2_jit_match() ignores the latter since it bypasses all
|
||||
sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
|
||||
/* These are additional options for pcre2_dfa_match(). */
|
||||
|
||||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* These are additional options for pcre2_substitute(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
|
||||
functions, and serializing functions. They are in numerical order. Originally
|
||||
they were in alphabetical order too, but now that PCRE2 is released, the
|
||||
numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. Define the generic version in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_general_context *pcre2_general_context_create( \
|
||||
void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
||||
pcre2_compile_context *, int (*)(uint32_t, void *), \
|
||||
void *);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
|
||||
int *, PCRE2_SIZE *, pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
||||
void *); \
|
||||
PCRE2_EXP_DECL int pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), \
|
||||
void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(\
|
||||
const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||
uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||
uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||
uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||
const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
||||
PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t pcre2_serialize_encode(const pcre2_code **, \
|
||||
int32_t, uint8_t **, PCRE2_SIZE *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t pcre2_serialize_decode(pcre2_code **, int32_t, \
|
||||
const uint8_t *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int pcre2_substitute(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_match_context *, \
|
||||
pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL \
|
||||
const uint8_t *pcre2_maketables(pcre2_general_context *); \
|
||||
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre2.h */
|
722
pcre2/src/pcre2.h.in
Normal file
722
pcre2/src/pcre2.h.in
Normal file
|
@ -0,0 +1,722 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE2_H
|
||||
#define _PCRE2_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR @PCRE2_MAJOR@
|
||||
#define PCRE2_MINOR @PCRE2_MINOR@
|
||||
#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
|
||||
#define PCRE2_DATE @PCRE2_DATE@
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
|
||||
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
functions (though pcre2_jit_match() ignores the latter since it bypasses all
|
||||
sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
|
||||
/* These are additional options for pcre2_dfa_match(). */
|
||||
|
||||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* These are additional options for pcre2_substitute(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
|
||||
functions, and serializing functions. They are in numerical order. Originally
|
||||
they were in alphabetical order too, but now that PCRE2 is released, the
|
||||
numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. Define the generic version in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_general_context *pcre2_general_context_create( \
|
||||
void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
||||
pcre2_compile_context *, int (*)(uint32_t, void *), \
|
||||
void *);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
|
||||
int *, PCRE2_SIZE *, pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
||||
void *); \
|
||||
PCRE2_EXP_DECL int pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), \
|
||||
void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(\
|
||||
const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||
uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||
uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||
uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||
const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
||||
PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t pcre2_serialize_encode(const pcre2_code **, \
|
||||
int32_t, uint8_t **, PCRE2_SIZE *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t pcre2_serialize_decode(pcre2_code **, int32_t, \
|
||||
const uint8_t *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int pcre2_substitute(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_match_context *, \
|
||||
pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL \
|
||||
const uint8_t *pcre2_maketables(pcre2_general_context *); \
|
||||
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre2.h */
|
1287
pcre2/src/pcre2_auto_possess.c
Normal file
1287
pcre2/src/pcre2_auto_possess.c
Normal file
File diff suppressed because it is too large
Load diff
198
pcre2/src/pcre2_chartables.c.dist
Normal file
198
pcre2/src/pcre2_chartables.c.dist
Normal file
|
@ -0,0 +1,198 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE2 by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE2) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #includes are present because without them gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE2 is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
const uint8_t PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre2_chartables.c */
|
9018
pcre2/src/pcre2_compile.c
Normal file
9018
pcre2/src/pcre2_compile.c
Normal file
File diff suppressed because it is too large
Load diff
217
pcre2/src/pcre2_config.c
Normal file
217
pcre2/src/pcre2_config.c
Normal file
|
@ -0,0 +1,217 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_internal.h to be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test their values. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned
|
||||
>= 0 if where is NULL, giving length required
|
||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||
or JIT target requested when JIT not enabled
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_config(uint32_t what, void *where)
|
||||
{
|
||||
if (where == NULL) /* Requests a length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_RECURSIONLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
/* These are handled below */
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
|
||||
#else
|
||||
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
{
|
||||
const char *v = PRIV(jit_get_target)();
|
||||
return 1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v));
|
||||
}
|
||||
#else
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
#endif
|
||||
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
*((uint32_t *)where) = configured_link_size;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
*((uint32_t *)where) = NEWLINE_DEFAULT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
*((uint32_t *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_RECURSIONLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
*((uint32_t *)where) = 0;
|
||||
#else
|
||||
*((uint32_t *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
#endif
|
||||
return 1 + ((where == NULL)?
|
||||
strlen(v): PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v));
|
||||
}
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
#if defined SUPPORT_UNICODE
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
before the date. On the other hand, if all four macros are put into a single
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
There are problems using an "obvious" approach like this:
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
|
||||
|
||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||
of STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what
|
||||
we really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
resort to a runtime test. */
|
||||
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
{
|
||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||
return 1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_config.c */
|
391
pcre2/src/pcre2_context.c
Normal file
391
pcre2/src/pcre2_context.c
Normal file
|
@ -0,0 +1,391 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Default malloc/free functions *
|
||||
*************************************************/
|
||||
|
||||
/* Ignore the "user data" argument in each case. */
|
||||
|
||||
static void *default_malloc(size_t size, void *data)
|
||||
{
|
||||
(void)data;
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
static void default_free(void *block, void *data)
|
||||
{
|
||||
(void)data;
|
||||
free(block);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get a block and save memory control *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
memory control data is to be stored at the start for future use.
|
||||
|
||||
Arguments:
|
||||
size amount of memory required
|
||||
memctl pointer to a memctl block or NULL
|
||||
|
||||
Returns: pointer to memory or NULL on failure
|
||||
*/
|
||||
|
||||
extern void *
|
||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||
{
|
||||
pcre2_memctl *newmemctl;
|
||||
void *yield = (memctl == NULL)? malloc(size) :
|
||||
memctl->malloc(size, memctl->memory_data);
|
||||
if (yield == NULL) return NULL;
|
||||
newmemctl = (pcre2_memctl *)yield;
|
||||
if (memctl == NULL)
|
||||
{
|
||||
newmemctl->malloc = default_malloc;
|
||||
newmemctl->free = default_free;
|
||||
newmemctl->memory_data = NULL;
|
||||
}
|
||||
else *newmemctl = *memctl;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create and initialize contexts *
|
||||
*************************************************/
|
||||
|
||||
/* Initializing for compile and match contexts is done in separate, private
|
||||
functions so that these can be called from functions such as pcre2_compile()
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
option to set up default memory management. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
void (*private_free)(void *, void *), void *memory_data)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (private_malloc == NULL) private_malloc = default_malloc;
|
||||
if (private_free == NULL) private_free = default_free;
|
||||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||
if (gcontext == NULL) return NULL;
|
||||
gcontext->memctl.malloc = private_malloc;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.memory_data = memory_data;
|
||||
return gcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default compile context is set up to save having to initialize at run time
|
||||
when no context is supplied to the compile function. */
|
||||
|
||||
const pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT }; /* As it says */
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_compile_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default match context is set up to save having to initialize at run time
|
||||
when no context is supplied to a match function. */
|
||||
|
||||
const pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
{ default_malloc, default_free, NULL },
|
||||
#endif
|
||||
#ifdef SUPPORT_JIT
|
||||
NULL,
|
||||
NULL,
|
||||
#endif
|
||||
NULL,
|
||||
NULL,
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_RECURSION };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
*mcontext = PRIV(default_match_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||
return mcontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context copy functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_general_context *new =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
gcontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||
{
|
||||
pcre2_compile_context *new =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||
{
|
||||
pcre2_match_context *new =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
mcontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context free functions *
|
||||
*************************************************/
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_free(pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_free(pcre2_compile_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_free(pcre2_match_context *mcontext)
|
||||
{
|
||||
if (mcontext != NULL)
|
||||
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set values in contexts *
|
||||
*************************************************/
|
||||
|
||||
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||
data is given. Only some of the functions are able to test the validity of the
|
||||
data. */
|
||||
|
||||
|
||||
/* ------------ Compile contexts ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->parens_nest_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t, void *), void *user_data)
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
ccontext->stack_guard_data = user_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ------------ Match contexts ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->match_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||
{
|
||||
mcontext->offset_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->recursion_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mcontext->stack_memctl.malloc = mymalloc;
|
||||
mcontext->stack_memctl.free = myfree;
|
||||
mcontext->stack_memctl.memory_data = mydata;
|
||||
#else
|
||||
(void)mcontext;
|
||||
(void)mymalloc;
|
||||
(void)myfree;
|
||||
(void)mydata;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
3618
pcre2/src/pcre2_dfa_match.c
Normal file
3618
pcre2/src/pcre2_dfa_match.c
Normal file
File diff suppressed because it is too large
Load diff
321
pcre2/src/pcre2_error.c
Normal file
321
pcre2/src/pcre2_error.c
Normal file
|
@ -0,0 +1,321 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
This used to be a table of strings, but in order to reduce the number of
|
||||
relocations needed when a shared library is loaded dynamically, it is now one
|
||||
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||
performance issue because these strings are used only when there is an error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const char compile_error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
"unrecognized character follows \\\0"
|
||||
"numbers out of order in {} quantifier\0"
|
||||
/* 5 */
|
||||
"number too big in {} quantifier\0"
|
||||
"missing terminating ] for character class\0"
|
||||
"invalid escape sequence in character class\0"
|
||||
"range out of order in character class\0"
|
||||
"quantifier does not follow a repeatable item\0"
|
||||
/* 10 */
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"missing closing parenthesis\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"pattern passed as NULL\0"
|
||||
"unrecognised compile-time option bit(s)\0"
|
||||
"missing ) after (?# comment\0"
|
||||
"parentheses are too deeply nested\0"
|
||||
/* 20 */
|
||||
"regular expression is too large\0"
|
||||
"failed to allocate heap memory\0"
|
||||
"unmatched closing parenthesis\0"
|
||||
"internal error: code overflow\0"
|
||||
"letter or underscore expected after (?< or (?'\0"
|
||||
/* 25 */
|
||||
"lookbehind assertion is not fixed length\0"
|
||||
"malformed number or name after (?(\0"
|
||||
"conditional group contains more than two branches\0"
|
||||
"assertion expected after (?( or (?(?C)\0"
|
||||
"(?R or (?[+-]digits must be followed by )\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"this version of PCRE2 does not have Unicode support\0"
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"character code point value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
"invalid condition (?(0)\0"
|
||||
"\\C is not allowed in a lookbehind assertion\0"
|
||||
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
"invalid escape sequence in (*VERB) name\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator)\0"
|
||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||
"group name must start with a non-digit\0"
|
||||
/* 45 */
|
||||
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
|
||||
"malformed \\P or \\p sequence\0"
|
||||
"unknown property name after \\P or \\p\0"
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||
/* 50 */
|
||||
"invalid range in character class\0"
|
||||
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||
"internal error: overran compiling workspace\0"
|
||||
"internal error: previously-checked referenced subpattern not found\0"
|
||||
"DEFINE group contains more than one branch\0"
|
||||
/* 55 */
|
||||
"missing opening brace after \\o\0"
|
||||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"a numbered reference must not be zero\0"
|
||||
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"digit expected after (?+\0"
|
||||
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
#ifndef EBCDIC
|
||||
"\\c must be followed by a printable ASCII character\0"
|
||||
#else
|
||||
"\\c must be followed by a letter or one of [\\]^_?\0"
|
||||
#endif
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown opcode in find_fixedlength()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"SPARE ERROR\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"syntax error in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
"patterns do not all use the same character tables\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
"invalid replacement string\0"
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test is not supported for DFA matching\0"
|
||||
"function is not supported for DFA matching\0"
|
||||
"pattern contains an item that is not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
/* 50 */
|
||||
"non-unique substring name\0"
|
||||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"recursion limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
/* 55 */
|
||||
"requested value is not set\0"
|
||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
buffer where to put the message (zero terminated)
|
||||
size size of the buffer
|
||||
|
||||
Returns: length of message if all is well
|
||||
negative on error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
|
||||
{
|
||||
char xbuff[128];
|
||||
const char *message;
|
||||
size_t i;
|
||||
uint32_t n;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NULL) {};
|
||||
if (*message == CHAR_NULL)
|
||||
{
|
||||
sprintf(xbuff, "No text for error %d", enumber);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
if (i >= size - 1)
|
||||
{
|
||||
buffer[i] = 0; /* Terminate partial message */
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
buffer[i] = 0;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* End of pcre2_error.c */
|
218
pcre2/src/pcre2_find_bracket.c
Normal file
218
pcre2/src/pcre2_find_bracket.c
Normal file
|
@ -0,0 +1,218 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE for a lookbehind. The function is called
|
||||
from pcre2_compile.c and also from pcre2_study.c when finding the minimum
|
||||
matching length. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
register PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle lookbehind */
|
||||
|
||||
else if (c == OP_REVERSE)
|
||||
{
|
||||
if (number < 0) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
1944
pcre2/src/pcre2_internal.h
Normal file
1944
pcre2/src/pcre2_internal.h
Normal file
File diff suppressed because it is too large
Load diff
852
pcre2/src/pcre2_intmodedep.h
Normal file
852
pcre2/src/pcre2_intmodedep.h
Normal file
|
@ -0,0 +1,852 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||
These mode-dependent items are kept in a separate file so that they can also be
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
|
||||
Some of the mode-dependent macros are required at different widths for
|
||||
different parts of the pcre2test code (in particular, the included
|
||||
pcre_printint.c file). We undefine them here so that they can be re-defined for
|
||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||
just to undefine them all. */
|
||||
|
||||
#undef ACROSSCHAR
|
||||
#undef BACKCHAR
|
||||
#undef BYTES2CU
|
||||
#undef CU2BYTES
|
||||
#undef FORWARDCHAR
|
||||
#undef FORWARDCHARTEST
|
||||
#undef GET
|
||||
#undef GET2
|
||||
#undef GETCHAR
|
||||
#undef GETCHARINC
|
||||
#undef GETCHARINCTEST
|
||||
#undef GETCHARLEN
|
||||
#undef GETCHARLENTEST
|
||||
#undef GETCHARTEST
|
||||
#undef GET_EXTRALEN
|
||||
#undef HAS_EXTRALEN
|
||||
#undef IMM2_SIZE
|
||||
#undef MAX_255
|
||||
#undef MAX_MARK
|
||||
#undef MAX_PATTERN_SIZE
|
||||
#undef MAX_UTF_SINGLE_CU
|
||||
#undef NOT_FIRSTCU
|
||||
#undef PUT
|
||||
#undef PUT2
|
||||
#undef PUT2INC
|
||||
#undef PUTCHAR
|
||||
#undef PUTINC
|
||||
#undef TABLE_GET
|
||||
|
||||
|
||||
|
||||
/* -------------------------- MACROS ----------------------------- */
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
|
||||
(always stored in big-endian order in 8-bit mode) by default. These are used,
|
||||
for example, to link from the start of a subpattern to its alternatives and its
|
||||
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
|
||||
to around 64K, which is big enough for almost everybody. However, I received a
|
||||
request for an even bigger limit. For this reason, and also to make the code
|
||||
easier to maintain, the storing and loading of offsets from the compiled code
|
||||
unit string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
values of 2 or 4 are also supported. */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 8), \
|
||||
(a[(n)+1] = (d) & 255)
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 16), \
|
||||
(a[(n)+1] = (d) >> 8), \
|
||||
(a[(n)+2] = (d) & 255)
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 24), \
|
||||
(a[(n)+1] = (d) >> 16), \
|
||||
(a[(n)+2] = (d) >> 8), \
|
||||
(a[(n)+3] = (d) & 255)
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3 || LINK_SIZE == 4
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 16), \
|
||||
(a[(n)+1] = (d) & 65535)
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 16) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error Unsupported compiling mode
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------- Other mode-specific macros ----------------- */
|
||||
|
||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||
the size of offsets changes. There are used for repeat counts and for other
|
||||
things such as capturing parenthesis numbers in back references.
|
||||
|
||||
Define the number of code units required to hold a 16-bit count/offset, and
|
||||
macros to load and store such a value. For reasons that I do not understand,
|
||||
the expression in the 8-bit GET2 macro is treated by gcc as a signed
|
||||
expression, even when a is declared as unsigned. It seems that any kind of
|
||||
arithmetic results in a signed value. Hence the cast. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define IMM2_SIZE 2
|
||||
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define IMM2_SIZE 1
|
||||
#define GET2(a,n) a[n]
|
||||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument is less than 256. The maximum length of a MARK name must
|
||||
fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
|
||||
is used to access elements of tables containing exactly 256 items. When code
|
||||
points can be greater than 255, a check is needed before accessing these
|
||||
tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* ----------------- Character-handling macros ----------------- */
|
||||
|
||||
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
|
||||
high-order bits available to the application for other uses. In preparation for
|
||||
the future implementation of this mode, there are macros that load a data item
|
||||
and, if in this special mode, mask it to 21 bits. These macros all have names
|
||||
starting with UCHAR21. In all other modes, including the normal 32-bit
|
||||
library, the macros all have the same simple definitions. When the new mode is
|
||||
implemented, it is expected that these definitions will be varied appropriately
|
||||
using #ifdef when compiling the library that supports the special mode. */
|
||||
|
||||
#define UCHAR21(eptr) (*(eptr))
|
||||
#define UCHAR21TEST(eptr) (*(eptr))
|
||||
#define UCHAR21INC(eptr) (*(eptr)++)
|
||||
#define UCHAR21INCTEST(eptr) (*(eptr)++)
|
||||
|
||||
/* When UTF encoding is being used, a character is no longer just a single
|
||||
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||
handling generate simple sequences when used in the basic mode, and more
|
||||
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCU(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||
/* #define BACKCHAR(eptr) */
|
||||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define FORWARCCHARTEST(eptr,end) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 127
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xc0) == 0x80)
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (c >= 0xc0) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && c >= 0xc0) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
|
||||
pointer, incrementing length if there are extra bytes. This is called when we
|
||||
do not know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0) == 0x80) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
while((condition) && ((eptr) & 0xc0) == 0x80) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 65535
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) 1
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xfc00) == 0xdc00)
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF16(c, eptr) \
|
||||
{ c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF16INC(c, eptr) \
|
||||
{ c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; }
|
||||
|
||||
/* Get the next UTF-16 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF16LEN(c, eptr, len) \
|
||||
{ c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer, incrementing
|
||||
length if there is a low surrogate. This is called when we know we are in
|
||||
UTF-16 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
|
||||
pointer, incrementing length if there is a low surrogate. This is called when
|
||||
we do not know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-16 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-16 only
|
||||
code. */
|
||||
|
||||
#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00) == 0xdc00) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#else
|
||||
|
||||
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
|
||||
into one PCRE2_UCHAR unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||
#define HAS_EXTRALEN(c) (0)
|
||||
#define GET_EXTRALEN(c) (0)
|
||||
#define NOT_FIRSTCU(c) (0)
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
|
||||
length (since all UTF-32 is of length 1). This is called when we know we are in
|
||||
UTF-32 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
GETCHAR(c, eptr)
|
||||
|
||||
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
|
||||
pointer, not incrementing the length (since all UTF-32 is of length 1).
|
||||
This is called when we do not know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
GETCHARTEST(c, eptr)
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-32 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-32 only
|
||||
code.
|
||||
|
||||
These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
||||
|
||||
#define BACKCHAR(eptr) do { } while (0)
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
|
||||
#define FORWARDCHAR(eptr) do { } while (0)
|
||||
#define FORWARDCHARTEST(eptr,end) do { } while (0)
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
|
||||
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
|
||||
#endif /* UTF-32 character handling */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/* Mode-dependent macros that have the same definition in all modes. */
|
||||
|
||||
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
|
||||
|
||||
|
||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||
|
||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||
code that uses them is simpler because it assumes this. */
|
||||
|
||||
/* The real general context structure. At present it holds only data for custom
|
||||
memory control. */
|
||||
|
||||
typedef struct pcre2_real_general_context {
|
||||
pcre2_memctl memctl;
|
||||
} pcre2_real_general_context;
|
||||
|
||||
/* The real compile context structure */
|
||||
|
||||
typedef struct pcre2_real_compile_context {
|
||||
pcre2_memctl memctl;
|
||||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
} pcre2_real_compile_context;
|
||||
|
||||
/* The real match context structure. */
|
||||
|
||||
typedef struct pcre2_real_match_context {
|
||||
pcre2_memctl memctl;
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl;
|
||||
#endif
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_jit_callback jit_callback;
|
||||
void *jit_callback_data;
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t recursion_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
||||
/* The real compiled code structure. The type for the blocksize field is
|
||||
defined specially because it is required in pcre2_serialize_decode() when
|
||||
copying the size from possibly unaligned memory into a variable of the same
|
||||
type. Use a macro rather than a typedef to avoid compiler warnings when this
|
||||
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
|
||||
largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
|
||||
argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
|
||||
here.) */
|
||||
|
||||
#undef CODE_BLOCKSIZE_TYPE
|
||||
#define CODE_BLOCKSIZE_TYPE size_t
|
||||
|
||||
#undef LOOKBEHIND_MAX
|
||||
#define LOOKBEHIND_MAX UINT16_MAX
|
||||
|
||||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
uint32_t limit_recursion; /* Limit set in the pattern */
|
||||
uint32_t first_codeunit; /* Starting code unit */
|
||||
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||
uint16_t bsr_convention; /* What \R matches */
|
||||
uint16_t newline_convention; /* What is a newline? */
|
||||
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||
uint16_t minlength; /* Minimum length of match */
|
||||
uint16_t top_bracket; /* Highest numbered group */
|
||||
uint16_t top_backref; /* Highest numbered back reference */
|
||||
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||
uint16_t name_count; /* Number of name entries in the table */
|
||||
} pcre2_real_code;
|
||||
|
||||
/* The real match data structure. */
|
||||
|
||||
typedef struct pcre2_real_match_data {
|
||||
pcre2_memctl memctl;
|
||||
const pcre2_real_code *code; /* The pattern used for the match */
|
||||
PCRE2_SPTR subject; /* The subject that was matched */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
int rc; /* The return code from the match */
|
||||
PCRE2_SIZE ovector[1]; /* The first field */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
|
||||
|
||||
/* These structures are not needed for pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
/* Structure for checking for mutual recursion when scanning compiled code. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
/* Structure for building a cache when filling in recursion offsets. */
|
||||
|
||||
typedef struct recurse_cache {
|
||||
PCRE2_SPTR group;
|
||||
int recno;
|
||||
} recurse_cache;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
PCRE2_UCHAR *current_branch;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for building a list of named groups during the first pass of
|
||||
compiling. */
|
||||
|
||||
typedef struct named_group {
|
||||
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||
uint32_t number; /* Group number */
|
||||
uint16_t length; /* Length of the name */
|
||||
uint16_t isdup; /* TRUE if a duplicate */
|
||||
} named_group;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_block {
|
||||
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *cbits; /* Points to character type table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SPTR start_workspace; /* The start of working space */
|
||||
PCRE2_SPTR start_code; /* The start of the compiled code */
|
||||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_SPTR nestptr[2]; /* Pointer(s) saved for string substitution */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
size_t workspace_size; /* Size of workspace */
|
||||
uint16_t names_found; /* Number of entries so far */
|
||||
uint16_t name_entry_size; /* Size of each entry */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
uint32_t named_group_list_size; /* Number of entries in the list */
|
||||
uint32_t external_options; /* External (initial) options */
|
||||
uint32_t external_flags; /* External flag bits to be set */
|
||||
uint32_t bracount; /* Count of capturing parens as we compile */
|
||||
uint32_t final_bracount; /* Saved value after first pass */
|
||||
uint32_t *groupinfo; /* Group info vector */
|
||||
uint32_t top_backref; /* Maximum back reference */
|
||||
uint32_t backref_map; /* Bitmap of low back refs */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||
int parens_depth; /* Depth of nested parentheses */
|
||||
int assert_depth; /* Depth of nested assertions */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL had_recurse; /* Had a recursion or subroutine call */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
BOOL iscondassert; /* Next assert is a condition */
|
||||
} compile_block;
|
||||
|
||||
/* Structure for keeping the properties of the in-memory stack used
|
||||
by the JIT matcher. */
|
||||
|
||||
typedef struct pcre2_real_jit_stack {
|
||||
pcre2_memctl memctl;
|
||||
void* stack;
|
||||
} pcre2_real_jit_stack;
|
||||
|
||||
/* Structure for keeping a chain of heap blocks used for saving ovectors
|
||||
during pattern recursion when the ovector is larger than can be saved on
|
||||
the system stack. */
|
||||
|
||||
typedef struct ovecsave_frame {
|
||||
struct ovecsave_frame *next; /* Next frame on free chain */
|
||||
PCRE2_SIZE saved_ovec[1]; /* First vector element */
|
||||
} ovecsave_frame;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern; used by pcre_match(). */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
unsigned int group_num; /* Number of group that was called */
|
||||
PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
|
||||
uint32_t saved_capture_last; /* Last capture number */
|
||||
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
||||
} recursion_info;
|
||||
|
||||
/* A similar structure for pcre_dfa_match(). */
|
||||
|
||||
typedef struct dfa_recursion_info {
|
||||
struct dfa_recursion_info *prevrec;
|
||||
PCRE2_SPTR subject_position;
|
||||
uint32_t group_num;
|
||||
} dfa_recursion_info;
|
||||
|
||||
/* Structure for building a chain of data for holding the values of the subject
|
||||
pointer at the start of each subpattern, so as to detect when an empty string
|
||||
has been matched by a subpattern - to break infinite loops; used by
|
||||
pcre2_match(). */
|
||||
|
||||
typedef struct eptrblock {
|
||||
struct eptrblock *epb_prev;
|
||||
PCRE2_SPTR epb_saved_eptr;
|
||||
} eptrblock;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching (pcre2_match() and friends). */
|
||||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl; /* For "stack" frames */
|
||||
#endif
|
||||
uint32_t match_call_count; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_recursion; /* As it says */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SIZE *ovector; /* Pointer to the offset vector */
|
||||
PCRE2_SIZE offset_end; /* One past the end */
|
||||
PCRE2_SIZE offset_max; /* The maximum usable for return data */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
uint16_t name_count; /* Number of names in name table */
|
||||
uint16_t name_entry_size; /* Size of entry in names table */
|
||||
PCRE2_SPTR name_table; /* Table of group names */
|
||||
PCRE2_SPTR start_code; /* For use when recursing */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of the subject string */
|
||||
PCRE2_SPTR start_match_ptr; /* Start of matched string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t capture_last; /* Most recent capture number + overflow flag */
|
||||
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||
uint32_t match_function_type; /* Set for certain special calls of match() */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
void *match_frames_base; /* For remembering malloc'd frames */
|
||||
#endif
|
||||
} match_block;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
functions. */
|
||||
|
||||
typedef struct dfa_match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
||||
} dfa_match_block;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* End of pcre2_intmodedep.h */
|
11503
pcre2/src/pcre2_jit_compile.c
Normal file
11503
pcre2/src/pcre2_jit_compile.c
Normal file
File diff suppressed because it is too large
Load diff
189
pcre2/src/pcre2_jit_match.c
Normal file
189
pcre2/src/pcre2_jit_match.c
Normal file
|
@ -0,0 +1,189 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
|
||||
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
|
||||
{
|
||||
sljit_ub local_space[MACHINE_STACK_SIZE];
|
||||
struct sljit_stack local_stack;
|
||||
|
||||
local_stack.top = (sljit_sw)&local_space;
|
||||
local_stack.base = local_stack.top;
|
||||
local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
|
||||
local_stack.max_limit = local_stack.limit;
|
||||
arguments->stack = &local_stack;
|
||||
return executable_func(arguments);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Do a JIT pattern match *
|
||||
*************************************************/
|
||||
|
||||
/* This function runs a JIT pattern match.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
jit_stack points to a JIT stack
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
-1 => failed to match (PCRE_ERROR_NOMATCH)
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)subject;
|
||||
(void)length;
|
||||
(void)start_offset;
|
||||
(void)options;
|
||||
(void)match_data;
|
||||
(void)mcontext;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
executable_functions *functions = (executable_functions *)re->executable_jit;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
uint32_t oveccount = match_data->oveccount;
|
||||
uint32_t max_oveccount;
|
||||
union {
|
||||
void *executable_func;
|
||||
jit_function call_executable_func;
|
||||
} convert_executable_func;
|
||||
jit_arguments arguments;
|
||||
int rc;
|
||||
int index = 0;
|
||||
|
||||
if ((options & PCRE2_PARTIAL_HARD) != 0)
|
||||
index = 2;
|
||||
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
|
||||
index = 1;
|
||||
|
||||
if (functions->executable_funcs[index] == NULL)
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
/* Sanity checks should be handled by pcre_exec. */
|
||||
arguments.str = subject + start_offset;
|
||||
arguments.begin = subject;
|
||||
arguments.end = subject + length;
|
||||
arguments.match_data = match_data;
|
||||
arguments.startchar_ptr = subject;
|
||||
arguments.mark_ptr = NULL;
|
||||
arguments.options = options;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
arguments.callout = mcontext->callout;
|
||||
arguments.callout_data = mcontext->callout_data;
|
||||
arguments.offset_limit = mcontext->offset_limit;
|
||||
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
if (mcontext->jit_callback != NULL)
|
||||
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
|
||||
else
|
||||
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
arguments.callout = NULL;
|
||||
arguments.callout_data = NULL;
|
||||
arguments.offset_limit = PCRE2_UNSET;
|
||||
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
|
||||
MATCH_LIMIT : re->limit_match;
|
||||
jit_stack = NULL;
|
||||
}
|
||||
|
||||
/* JIT only need two offsets for each ovector entry. Hence
|
||||
the last 1/3 of the ovector will never be touched. */
|
||||
|
||||
max_oveccount = functions->top_bracket;
|
||||
if (oveccount > max_oveccount)
|
||||
oveccount = max_oveccount;
|
||||
arguments.oveccount = oveccount << 1;
|
||||
|
||||
|
||||
convert_executable_func.executable_func = functions->executable_funcs[index];
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
|
||||
rc = convert_executable_func.call_executable_func(&arguments);
|
||||
}
|
||||
else
|
||||
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
|
||||
|
||||
if (rc > (int)oveccount)
|
||||
rc = 0;
|
||||
match_data->code = re;
|
||||
match_data->subject = subject;
|
||||
match_data->rc = rc;
|
||||
match_data->startchar = arguments.startchar_ptr - subject;
|
||||
match_data->leftchar = 0;
|
||||
match_data->rightchar = 0;
|
||||
match_data->mark = arguments.mark_ptr;
|
||||
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
|
||||
|
||||
return match_data->rc;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_match.c */
|
227
pcre2/src/pcre2_jit_misc.c
Normal file
227
pcre2/src/pcre2_jit_misc.c
Normal file
|
@ -0,0 +1,227 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free JIT read-only data *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free_rodata)(void *current, void *allocator_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)current;
|
||||
(void)allocator_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
void *next;
|
||||
|
||||
SLJIT_UNUSED_ARG(allocator_data);
|
||||
|
||||
while (current != NULL)
|
||||
{
|
||||
next = *(void**)current;
|
||||
SLJIT_FREE(current, allocator_data);
|
||||
current = next;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Free JIT compiled code *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
(void)memctl;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
executable_functions *functions = (executable_functions *)executable_jit;
|
||||
void *allocator_data = memctl;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
|
||||
{
|
||||
if (functions->executable_funcs[i] != NULL)
|
||||
sljit_free_code(functions->executable_funcs[i]);
|
||||
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
|
||||
}
|
||||
|
||||
SLJIT_FREE(functions, allocator_data);
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free unused JIT memory *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)gcontext; /* Suppress warning */
|
||||
#else /* SUPPORT_JIT */
|
||||
SLJIT_UNUSED_ARG(gcontext);
|
||||
sljit_free_unused_memory_exec();
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Allocate a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)gcontext;
|
||||
(void)startsize;
|
||||
(void)maxsize;
|
||||
return NULL;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_jit_stack *jit_stack;
|
||||
|
||||
if (startsize < 1 || maxsize < 1)
|
||||
return NULL;
|
||||
if (startsize > maxsize)
|
||||
startsize = maxsize;
|
||||
startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
|
||||
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
|
||||
|
||||
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
|
||||
if (jit_stack == NULL) return NULL;
|
||||
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
|
||||
return jit_stack;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Assign a JIT stack to a pattern *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
|
||||
void *callback_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)mcontext;
|
||||
(void)callback;
|
||||
(void)callback_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
if (mcontext == NULL) return;
|
||||
mcontext->jit_callback = callback;
|
||||
mcontext->jit_callback_data = callback_data;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)jit_stack;
|
||||
#else /* SUPPORT_JIT */
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
|
||||
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get target CPU type *
|
||||
*************************************************/
|
||||
|
||||
const char*
|
||||
PRIV(jit_get_target)(void)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
return "JIT is not supported";
|
||||
#else /* SUPPORT_JIT */
|
||||
return sljit_get_platform_name();
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of JIT code *
|
||||
*************************************************/
|
||||
|
||||
size_t
|
||||
PRIV(jit_get_size)(void *executable_jit)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
return 0;
|
||||
#else /* SUPPORT_JIT */
|
||||
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
|
||||
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
|
||||
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_misc.c */
|
1735
pcre2/src/pcre2_jit_test.c
Normal file
1735
pcre2/src/pcre2_jit_test.c
Normal file
File diff suppressed because it is too large
Load diff
157
pcre2/src/pcre2_maketables.c
Normal file
157
pcre2/src/pcre2_maketables.c
Normal file
|
@ -0,0 +1,157 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
#ifndef DFTABLES
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
# include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE2 character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||
program) malloc() is used, and the function has a different name so as not to
|
||||
clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none when DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data
|
||||
*/
|
||||
|
||||
#ifdef DFTABLES /* Included in freestanding dftables.c program */
|
||||
static const uint8_t *maketables(void)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)malloc(tables_length);
|
||||
|
||||
#else /* Not DFTABLES, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
|
||||
malloc(tables_length));
|
||||
#endif /* DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different.
|
||||
|
||||
Note that the table for "space" includes everything "isspace" gives, including
|
||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||
From release 8.34 is is also correct for Perl space, because Perl added VT at
|
||||
release 5.18.
|
||||
|
||||
Note also that it is possible for a character to be alnum or alpha without
|
||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
|
||||
test for alnum specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||
white space chars, because Perl didn't recognize it as such for \s and for
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
|
||||
at release 8.34. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
|
||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||
In this instance, that is ok because we want binary zero to be flagged as a
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of pcre2_maketables.c */
|
7243
pcre2/src/pcre2_match.c
Normal file
7243
pcre2/src/pcre2_match.c
Normal file
File diff suppressed because it is too large
Load diff
147
pcre2/src/pcre2_match_data.c
Normal file
147
pcre2/src/pcre2_match_data.c
Normal file
|
@ -0,0 +1,147 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector triplets. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (yield == NULL) return NULL;
|
||||
yield->oveccount = oveccount;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block using pattern data *
|
||||
*************************************************/
|
||||
|
||||
/* If no context is supplied, use the memory allocator from the code. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
|
||||
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
||||
gcontext);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
{
|
||||
if (match_data != NULL)
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get last mark in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
|
||||
pcre2_get_mark(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->mark;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get pointer to ovector *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->ovector;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get number of ovector slots *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->oveccount;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get starting code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->startchar;
|
||||
}
|
||||
|
||||
/* End of pcre2_match_data.c */
|
243
pcre2/src/pcre2_newline.c
Normal file
243
pcre2/src/pcre2_newline.c
Normal file
|
@ -0,0 +1,243 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE2 supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||
value of ptr is greater than the start of the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_newline.c */
|
120
pcre2/src/pcre2_ord2utf.c
Normal file
120
pcre2/src/pcre2_ord2utf.c
Normal file
|
@ -0,0 +1,120 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a function that converts a Unicode character code point
|
||||
into a UTF string. The behaviour is different for each code unit width. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||
Supply a dummy function because some compilers do not like empty source
|
||||
modules. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert code point to UTF *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result
|
||||
|
||||
Returns: number of code units placed in the buffer
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
/* Convert to UTF-8 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
register int i, j;
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
/* Convert to UTF-16 */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (cvalue <= 0xffff)
|
||||
{
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
}
|
||||
cvalue -= 0x10000;
|
||||
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||
return 2;
|
||||
|
||||
/* Convert to UTF-32 */
|
||||
|
||||
#else
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre_ord2utf.c */
|
410
pcre2/src/pcre2_pattern_info.c
Normal file
410
pcre2/src/pcre2_pattern_info.c
Normal file
|
@ -0,0 +1,410 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
what what information is required
|
||||
where where to put the information; if NULL, return length
|
||||
|
||||
Returns: 0 when data returned
|
||||
> 0 when length requested
|
||||
< 0 on error or unset value
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
{
|
||||
const pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
|
||||
if (where == NULL) /* Requests field length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
case PCRE2_INFO_BSR:
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
case PCRE2_INFO_RECURSIONLIMIT:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
return sizeof(const uint8_t *);
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
case PCRE2_INFO_SIZE:
|
||||
return sizeof(size_t);
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
return sizeof(PCRE2_SPTR);
|
||||
}
|
||||
}
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
*((uint32_t *)where) = re->overall_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
*((uint32_t *)where) = re->compile_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
*((uint32_t *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BSR:
|
||||
*((uint32_t *)where) = re->bsr_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
*((uint32_t *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
|
||||
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
|
||||
re->first_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
|
||||
&(re->start_bitmap[0]) : NULL;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) = (re->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(re->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
|
||||
re->last_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
*((uint32_t *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
*((uint32_t *)where) = re->minlength;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
*((uint32_t *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
*((uint32_t *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
*((uint32_t *)where) = re->newline_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_RECURSIONLIMIT:
|
||||
*((uint32_t *)where) = re->limit_recursion;
|
||||
if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
*((size_t *)where) = re->blocksize;
|
||||
break;
|
||||
|
||||
default: return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout enumerator *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
callback function called for each callout block
|
||||
callout_data user data passed to the callback
|
||||
|
||||
Returns: 0 when successfully completed
|
||||
< 0 on local error
|
||||
!= 0 for callback error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_callout_enumerate(const pcre2_code *code,
|
||||
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
|
||||
{
|
||||
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
pcre2_callout_enumerate_block cb;
|
||||
PCRE2_SPTR cc;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
cb.version = 0;
|
||||
cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
|
||||
+ re->name_count * re->name_entry_size;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
int rc;
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
case OP_TYPEPOSUPTO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
|
||||
#endif
|
||||
break;
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
case OP_XCLASS:
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[*cc] + cc[1];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = cc[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_length = 0;
|
||||
cb.callout_string = NULL;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_length =
|
||||
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
|
||||
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_pattern_info.c */
|
832
pcre2/src/pcre2_printint.c
Normal file
832
pcre2/src/pcre2_printint.c
Normal file
|
@ -0,0 +1,832 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. It can also optionally be included in
|
||||
pcre2_compile.c for detailed debugging in error situations. */
|
||||
|
||||
|
||||
/* Tables of operator names. The same 8-bit table is used for all code unit
|
||||
widths, so it must be defined only once. The list itself is defined in
|
||||
pcre2_internal.h, which is #included by pcre2test before this file. */
|
||||
|
||||
#ifndef OP_LISTS_DEFINED
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
#define OP_LISTS_DEFINED
|
||||
#endif
|
||||
|
||||
/* The functions and tables herein must all have mode-dependent names. */
|
||||
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
The contents of the table are, however, mode-dependent. */
|
||||
|
||||
static const uint8_t OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print one character from a string *
|
||||
*************************************************/
|
||||
|
||||
/* In UTF mode the character may occupy more than one code unit.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr pointer to first code unit of the character
|
||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||
|
||||
Returns: number of additional code units used
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
{
|
||||
uint32_t c = *ptr;
|
||||
BOOL one_code_unit = !utf;
|
||||
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
one_code_unit = c < 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
one_code_unit = (c & 0xfc00) != 0xd800;
|
||||
#else
|
||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
if (one_code_unit)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
return 0;
|
||||
#else
|
||||
|
||||
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
|
||||
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
|
||||
\X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if ((c & 0xc0) != 0xc0)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & PRIV(utf8_table3)[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
|
||||
return i - 1;
|
||||
}
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
Print it with \X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return 1;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
|
||||
|
||||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||
as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* These take no account of UTF as they always print each individual code unit.
|
||||
The string is zero-terminated for print_custring(); the length is given for
|
||||
print_custring_bylen().
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
len length for print_custring_bylen()
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||
{
|
||||
while (*ptr != '\0')
|
||||
{
|
||||
register uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||
{
|
||||
while (len-- > 0)
|
||||
{
|
||||
register uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function. */
|
||||
|
||||
static const char *
|
||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int i;
|
||||
for (i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
|
||||
}
|
||||
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
|
||||
#else /* No UTF support */
|
||||
(void)ptype;
|
||||
(void)pvalue;
|
||||
return "??";
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print Unicode property value *
|
||||
*************************************************/
|
||||
|
||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||
pseudo-property that contains a pointer to a list of case-equivalent
|
||||
characters.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
||||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
|
||||
code[2]), after);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *not = (*code == OP_PROP)? "" : "not ";
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
fprintf (f, "%s%sclist", before, not);
|
||||
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
|
||||
fprintf(f, "%s", after);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
|
||||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lengths show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
PCRE2_SPTR codestart, nametable, code;
|
||||
uint32_t nesize = re->name_entry_size;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||
code = codestart = nametable + re->name_count * re->name_entry_size;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR ccode;
|
||||
uint32_t c;
|
||||
int i;
|
||||
const char *flag = " ";
|
||||
unsigned int extra = 0;
|
||||
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* These cases are never obeyed. This is a fudge that causes a compile-
|
||||
time error if the vectors OP_names or OP_lengths, which are indexed
|
||||
by opcode, are not the correct length. It seems to be the only way to do
|
||||
such a check at compile time, as the sizeof() operator does not work in
|
||||
the C preprocessor. */
|
||||
|
||||
case OP_TABLE_LENGTH:
|
||||
case OP_TABLE_LENGTH +
|
||||
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
|
||||
(sizeof(OP_lengths) == OP_TABLE_LENGTH)):
|
||||
break;
|
||||
/* ========================================================================== */
|
||||
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARI:
|
||||
fprintf(f, " /i ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf);
|
||||
}
|
||||
while (*code == OP_CHARI);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRA:
|
||||
case OP_SCBRAPOS:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRA:
|
||||
case OP_SBRAPOS:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRPOS:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CLOSE:
|
||||
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_DNCREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond ref <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_RREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond recurse any");
|
||||
else
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DNRREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond recurse <", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_FALSE:
|
||||
fprintf(f, " Cond false");
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
fprintf(f, " Cond true");
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_POSSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " %s ", flag);
|
||||
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + 1, "", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, "%s", OP_names[code[1]]);
|
||||
}
|
||||
else extra = print_char(f, code+1, utf);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
fprintf(f, " %s ", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
|
||||
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
{
|
||||
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_NOTI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_NOT:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]");
|
||||
break;
|
||||
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTPOSUPTO:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
|
||||
fprintf(f, "]{");
|
||||
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
|
||||
else
|
||||
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REFI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_REF:
|
||||
fprintf(f, " %s \\%d", flag, GET2(code,1));
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_DNREFI:
|
||||
flag = "/i";
|
||||
/* Fall through */
|
||||
case OP_DNREF:
|
||||
{
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s \\k<", flag);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
|
||||
GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
|
||||
GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
print_prop(f, code, " ", "");
|
||||
break;
|
||||
|
||||
/* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
|
||||
in having this code always here, and it makes it less messy without all
|
||||
those #ifdefs. */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
unsigned int min, max;
|
||||
BOOL printmap;
|
||||
BOOL invertmap = FALSE;
|
||||
uint8_t *map;
|
||||
uint8_t inverted_map[32];
|
||||
|
||||
fprintf(f, " [");
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
extra = GET(code, 1);
|
||||
ccode = code + LINK_SIZE + 1;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
if ((*ccode & XCL_NOT) != 0)
|
||||
{
|
||||
invertmap = (*ccode & XCL_HASPROP) == 0;
|
||||
fprintf(f, "^");
|
||||
}
|
||||
ccode++;
|
||||
}
|
||||
else
|
||||
{
|
||||
printmap = TRUE;
|
||||
ccode = code + 1;
|
||||
}
|
||||
|
||||
/* Print a bit map */
|
||||
|
||||
if (printmap)
|
||||
{
|
||||
map = (uint8_t *)ccode;
|
||||
if (invertmap)
|
||||
{
|
||||
for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
|
||||
map = inverted_map;
|
||||
}
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((map[i/8] & (1 << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((map[j/8] & (1 << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(i)) fprintf(f, "%c", i);
|
||||
else fprintf(f, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||
if (PRINTABLE(j)) fprintf(f, "%c", j);
|
||||
else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
PCRE2_UCHAR ch;
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
BOOL not = FALSE;
|
||||
const char *notch = "";
|
||||
|
||||
switch(ch)
|
||||
{
|
||||
case XCL_NOTPROP:
|
||||
not = TRUE;
|
||||
notch = "^";
|
||||
/* Fall through */
|
||||
|
||||
case XCL_PROP:
|
||||
{
|
||||
unsigned int ptype = *ccode++;
|
||||
unsigned int pvalue = *ccode++;
|
||||
|
||||
switch(ptype)
|
||||
{
|
||||
case PT_PXGRAPH:
|
||||
fprintf(f, "[:%sgraph:]", notch);
|
||||
break;
|
||||
|
||||
case PT_PXPRINT:
|
||||
fprintf(f, "[:%sprint:]", notch);
|
||||
break;
|
||||
|
||||
case PT_PXPUNCT:
|
||||
fprintf(f, "[:%spunct:]", notch);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
|
||||
get_ucpname(ptype, pvalue));
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, utf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF class which was created by negation */
|
||||
|
||||
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,1 + IMM2_SIZE);
|
||||
if (max == 0) fprintf(f, "{%u,}", min);
|
||||
else fprintf(f, "{%u,%u}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
warning about the lack of a default code path. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
fprintf(f, " %s ", OP_names[*code]);
|
||||
print_custring_bylen(f, code + 2, code[1]);
|
||||
extra += code[1];
|
||||
break;
|
||||
|
||||
case OP_THEN:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CIRCM:
|
||||
case OP_DOLLM:
|
||||
flag = "/m";
|
||||
/* Fall through */
|
||||
|
||||
/* Anything else is just an item with no data, but possibly a flag. */
|
||||
|
||||
default:
|
||||
fprintf(f, " %s %s", flag, OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_printint.c */
|
258
pcre2/src/pcre2_serialize.c
Normal file
258
pcre2/src/pcre2_serialize.c
Normal file
|
@ -0,0 +1,258 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions for serializing and deserializing
|
||||
a sequence of compiled codes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define SERIALIZED_DATA_MAGIC 0x50523253u
|
||||
|
||||
/* Deserialization is limited to the current PCRE version and
|
||||
character width. */
|
||||
|
||||
#define SERIALIZED_DATA_VERSION \
|
||||
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
|
||||
|
||||
#define SERIALIZED_DATA_CONFIG \
|
||||
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Serialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
|
||||
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t *dst_bytes;
|
||||
int32_t i;
|
||||
PCRE2_SIZE total_size;
|
||||
const pcre2_real_code *re;
|
||||
const uint8_t *tables;
|
||||
pcre2_serialized_data *data;
|
||||
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + tables_length;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
if (tables == NULL)
|
||||
tables = re->tables;
|
||||
else if (tables != re->tables)
|
||||
return PCRE2_ERROR_MIXEDTABLES;
|
||||
total_size += re->blocksize;
|
||||
}
|
||||
|
||||
/* Initialize the byte stream. */
|
||||
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
|
||||
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
/* The controller is stored as a hidden parameter. */
|
||||
memcpy(bytes, memctl, sizeof(pcre2_memctl));
|
||||
bytes += sizeof(pcre2_memctl);
|
||||
|
||||
data = (pcre2_serialized_data *)bytes;
|
||||
data->magic = SERIALIZED_DATA_MAGIC;
|
||||
data->version = SERIALIZED_DATA_VERSION;
|
||||
data->config = SERIALIZED_DATA_CONFIG;
|
||||
data->number_of_codes = number_of_codes;
|
||||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, tables_length);
|
||||
dst_bytes += tables_length;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
memcpy(dst_bytes, (char *)re, re->blocksize);
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
||||
*serialized_bytes = bytes;
|
||||
*serialized_size = total_size;
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Deserialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
|
||||
const uint8_t *bytes, pcre2_general_context *gcontext)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
const uint8_t *src_bytes;
|
||||
pcre2_real_code *dst_re;
|
||||
uint8_t *tables;
|
||||
int32_t i, j;
|
||||
|
||||
/* Sanity checks. */
|
||||
|
||||
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
if (number_of_codes > data->number_of_codes)
|
||||
number_of_codes = data->number_of_codes;
|
||||
|
||||
src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, tables_length);
|
||||
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
|
||||
src_bytes += tables_length;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
as in the block. */
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
CODE_BLOCKSIZE_TYPE blocksize;
|
||||
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
|
||||
sizeof(CODE_BLOCKSIZE_TYPE));
|
||||
|
||||
/* The allocator provided by gcontext replaces the original one. */
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (dst_re == NULL)
|
||||
{
|
||||
memctl->free(tables, memctl->memory_data);
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
memctl->free(codes[j], memctl->memory_data);
|
||||
codes[j] = NULL;
|
||||
}
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
/* The new allocator must be preserved. */
|
||||
|
||||
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
|
||||
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
|
||||
|
||||
/* At the moment only one table is supported. */
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
||||
codes[i] = dst_re;
|
||||
src_bytes += blocksize;
|
||||
}
|
||||
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get the number of serialized patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
|
||||
if (data == NULL) return PCRE2_ERROR_NULL;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
return data->number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free the allocated stream *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_free(uint8_t *bytes)
|
||||
{
|
||||
if (bytes != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_serialize.c */
|
201
pcre2/src/pcre2_string_utils.c
Normal file
201
pcre2/src/pcre2_string_utils.c
Normal file
|
@ -0,0 +1,201 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains internal functions for comparing and finding the length
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
functions work only on 8-bit data. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the length of a PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strlen)(PCRE2_SPTR str)
|
||||
{
|
||||
PCRE2_SIZE c = 0;
|
||||
while (*str++ != 0) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy 8-bit 0-terminated string to PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/* Arguments:
|
||||
str1 buffer to receive the string
|
||||
str2 8-bit string to be copied
|
||||
|
||||
Returns: the number of code units used (excluding trailing zero)
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR *t = str1;
|
||||
while (*str2 != 0) *t++ = *str2++;
|
||||
*t = 0;
|
||||
return t - str1;
|
||||
}
|
||||
|
||||
/* End of pcre2_string_utils.c */
|
1575
pcre2/src/pcre2_study.c
Normal file
1575
pcre2/src/pcre2_study.c
Normal file
File diff suppressed because it is too large
Load diff
850
pcre2/src/pcre2_substitute.c
Normal file
850
pcre2/src/pcre2_substitute.c
Normal file
|
@ -0,0 +1,850 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PTR_STACK_SIZE 20
|
||||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find end of substitute text *
|
||||
*************************************************/
|
||||
|
||||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||
constructions. This requires the identification of unescaped : and }
|
||||
characters. This function scans for such. It must deal with nested ${
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
character, or to where an error was detected.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression (for options)
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
last TRUE if the last expected string (only } recognized)
|
||||
|
||||
Returns: 0 on success
|
||||
negative error code on failure
|
||||
*/
|
||||
|
||||
static int
|
||||
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
|
||||
BOOL last)
|
||||
{
|
||||
int rc = 0;
|
||||
uint32_t nestlevel = 0;
|
||||
BOOL literal = FALSE;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
|
||||
for (; ptr < ptrend; ptr++)
|
||||
{
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
if (nestlevel == 0) goto EXIT;
|
||||
nestlevel--;
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
|
||||
|
||||
else if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
nestlevel++;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int erc;
|
||||
int errorcode = 0;
|
||||
uint32_t ch;
|
||||
|
||||
if (ptr < ptrend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
case CHAR_l:
|
||||
case CHAR_U:
|
||||
case CHAR_u:
|
||||
ptr += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||||
code->overall_options, FALSE, NULL);
|
||||
if (errorcode != 0)
|
||||
{
|
||||
rc = errorcode;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
switch(erc)
|
||||
{
|
||||
case 0: /* Data character */
|
||||
case ESC_E: /* Isolated \E is ignored */
|
||||
break;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
|
||||
|
||||
EXIT:
|
||||
*ptrptr = ptr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match and substitute *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and creates a new
|
||||
string with substitutions. The first 7 arguments are the same as for
|
||||
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block, or is NULL
|
||||
context points a PCRE2 context
|
||||
replacement points to the replacement string
|
||||
rlength length of replacement string
|
||||
buffer where to put the substituted string
|
||||
blength points to length of buffer; updated to length of string
|
||||
|
||||
Returns: >= 0 number of substitutions made
|
||||
< 0 an error code
|
||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||
*/
|
||||
|
||||
/* This macro checks for space in the buffer before copying into it. On
|
||||
overflow, either give an error immediately, or keep on, accumulating the
|
||||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from,length) \
|
||||
if (!overflowed && lengthleft < length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = length - lengthleft; \
|
||||
} \
|
||||
else if (overflowed) \
|
||||
{ \
|
||||
extra_needed += length; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
|
||||
buff_offset += length; \
|
||||
lengthleft -= length; \
|
||||
}
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||
{
|
||||
int rc;
|
||||
int subs;
|
||||
int forcecase = 0;
|
||||
int forcecasereset = 0;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR repend;
|
||||
PCRE2_SIZE extra_needed = 0;
|
||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
|
||||
/* Partial matching is not valid. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* If no match data block is provided, create one. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context *gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
match_data_created = TRUE;
|
||||
}
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
/* Find lengths of zero-terminated strings and the end of the replacement. */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||
repend = replacement + rlength;
|
||||
|
||||
/* Check UTF replacement string if necessary. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Save the substitute options and remove them from the match options. */
|
||||
|
||||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Copy up to the start offset */
|
||||
|
||||
CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
{
|
||||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||
#endif
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
subject, breaks the global loop. Otherwise, advance the starting point by one
|
||||
character, copying it to the output, and try again. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
PCRE2_SIZE save_start;
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
|
||||
if (goptions == 0 || start_offset >= length) break;
|
||||
|
||||
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
|
||||
we have advanced into the middle of it, advance one more code point. In
|
||||
other words, do not start in the middle of CRLF, even if CR and LF on their
|
||||
own are valid newlines. */
|
||||
|
||||
save_start = start_offset++;
|
||||
if (subject[start_offset-1] == CHAR_CR &&
|
||||
code->newline_convention != PCRE2_NEWLINE_CR &&
|
||||
code->newline_convention != PCRE2_NEWLINE_LF &&
|
||||
start_offset < length &&
|
||||
subject[start_offset] == CHAR_LF)
|
||||
start_offset++;
|
||||
|
||||
/* Otherwise, in UTF mode, advance past any secondary code points. */
|
||||
|
||||
else if ((code->overall_options & PCRE2_UTF) != 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
|
||||
start_offset++;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
while (start_offset < length &&
|
||||
(subject[start_offset] & 0xfc00) == 0xdc00)
|
||||
start_offset++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past, reset the special global options, and
|
||||
continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0])
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
if (subs == INT_MAX)
|
||||
{
|
||||
rc = PCRE2_ERROR_TOOMANYREPLACE;
|
||||
goto EXIT;
|
||||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
must handle nested substrings that are to be reprocessed. */
|
||||
|
||||
ptr = replacement;
|
||||
for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
||||
/* If at the end of a nested substring, pop the stack. */
|
||||
|
||||
if (ptr >= repend)
|
||||
{
|
||||
if (ptrstackptr <= 0) break; /* End of replacement string */
|
||||
repend = ptrstack[--ptrstackptr];
|
||||
ptr = ptrstack[--ptrstackptr];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle the next character */
|
||||
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
goto LOADLITERAL;
|
||||
}
|
||||
|
||||
/* Not in literal mode. */
|
||||
|
||||
if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
int group, n;
|
||||
uint32_t special = 0;
|
||||
BOOL inparens;
|
||||
BOOL star;
|
||||
PCRE2_SIZE sublength;
|
||||
PCRE2_SPTR text1_start = NULL;
|
||||
PCRE2_SPTR text1_end = NULL;
|
||||
PCRE2_SPTR text2_start = NULL;
|
||||
PCRE2_SPTR text2_end = NULL;
|
||||
PCRE2_UCHAR next;
|
||||
PCRE2_UCHAR name[33];
|
||||
|
||||
if (++ptr >= repend) goto BAD;
|
||||
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
|
||||
|
||||
group = -1;
|
||||
n = 0;
|
||||
inparens = FALSE;
|
||||
star = FALSE;
|
||||
|
||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inparens = TRUE;
|
||||
}
|
||||
|
||||
if (next == CHAR_ASTERISK)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
star = TRUE;
|
||||
}
|
||||
|
||||
if (!star && next >= CHAR_0 && next <= CHAR_9)
|
||||
{
|
||||
group = next - CHAR_0;
|
||||
while (++ptr < repend)
|
||||
{
|
||||
next = *ptr;
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + next - CHAR_0;
|
||||
|
||||
/* A check for a number greater than the hightest captured group
|
||||
is sufficient here; no need for a separate overflow check. If unknown
|
||||
groups are to be treated as unset, just skip over any remaining
|
||||
digits and carry on. */
|
||||
|
||||
if (group > code->top_bracket)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint8_t *ctypes = code->tables + ctypes_offset;
|
||||
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
|
||||
{
|
||||
name[n++] = next;
|
||||
if (n > 32) goto BAD;
|
||||
if (++ptr >= repend) break;
|
||||
next = *ptr;
|
||||
}
|
||||
if (n == 0) goto BAD;
|
||||
name[n] = 0;
|
||||
}
|
||||
|
||||
/* In extended mode we recognize ${name:+set text:unset text} and
|
||||
${name:-default text}. */
|
||||
|
||||
if (inparens)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
!star && ptr < repend - 2 && next == CHAR_COLON)
|
||||
{
|
||||
special = *(++ptr);
|
||||
if (special != CHAR_PLUS && special != CHAR_MINUS)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSTITUTION;
|
||||
goto PTREXIT;
|
||||
}
|
||||
|
||||
text1_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text1_end = ptr;
|
||||
|
||||
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
|
||||
{
|
||||
text2_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, TRUE);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text2_end = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Have found a syntactically correct group number or name, or *name.
|
||||
Only *MARK is currently recognized. */
|
||||
|
||||
if (star)
|
||||
{
|
||||
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
|
||||
{
|
||||
PCRE2_SPTR mark = pcre2_get_mark(match_data);
|
||||
if (mark != NULL)
|
||||
{
|
||||
PCRE2_SPTR mark_start = mark;
|
||||
while (*mark != 0) mark++;
|
||||
fraglength = mark - mark_start;
|
||||
CHECKMEMCPY(mark_start, fraglength);
|
||||
}
|
||||
}
|
||||
else goto BAD;
|
||||
}
|
||||
|
||||
/* Substitute the contents of a group. We don't use substring_copy
|
||||
functions any more, in order to support case forcing. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR subptr, subptrend;
|
||||
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
search for the first one that is set. If the name is not found when
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
|
||||
non-existent group. */
|
||||
|
||||
if (group < 0)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
group = code->top_bracket + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc < 0) goto PTREXIT;
|
||||
for (entry = first; entry <= last; entry += rc)
|
||||
{
|
||||
uint32_t ng = GET2(entry, 0);
|
||||
if (ng < ovector_count)
|
||||
{
|
||||
if (group < 0) group = ng; /* First in ovector */
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
{
|
||||
group = ng; /* First that is set */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If group is still negative, it means we did not find a group
|
||||
that is in the ovector. Just set the first group. */
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a group that is identified by number. Find the length of
|
||||
the captured string. If a group in a non-special substitution is unset
|
||||
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
|
||||
|
||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
rc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
|
||||
if (special == 0) /* Plain substitution */
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
|
||||
goto PTREXIT; /* Else error */
|
||||
}
|
||||
}
|
||||
|
||||
/* If special is '+' we have a 'set' and possibly an 'unset' text,
|
||||
both of which are reprocessed when used. If special is '-' we have a
|
||||
default text for when the group is unset; it must be reprocessed. */
|
||||
|
||||
if (special != 0)
|
||||
{
|
||||
if (special == CHAR_MINUS)
|
||||
{
|
||||
if (rc == 0) goto LITERAL_SUBSTITUTE;
|
||||
text2_start = text1_start;
|
||||
text2_end = text1_end;
|
||||
}
|
||||
|
||||
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
|
||||
ptrstack[ptrstackptr++] = ptr;
|
||||
ptrstack[ptrstackptr++] = repend;
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
ptr = text1_start;
|
||||
repend = text1_end;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = text2_start;
|
||||
repend = text2_end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise we have a literal substitution of a group's contents. */
|
||||
|
||||
LITERAL_SUBSTITUTE:
|
||||
subptr = subject + ovector[group*2];
|
||||
subptrend = subject + ovector[group*2 + 1];
|
||||
|
||||
/* Substitute a literal string, possibly forcing alphabetic case. */
|
||||
|
||||
while (subptr < subptrend)
|
||||
{
|
||||
GETCHARINCTEST(ch, subptr);
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1 << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
forcecase = forcecasereset;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle an escape sequence in extended mode. We can use check_escape()
|
||||
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
|
||||
the case-forcing escapes are not supported in pcre2_compile() so must be
|
||||
recognized here. */
|
||||
|
||||
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int errorcode = 0;
|
||||
|
||||
if (ptr < repend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
forcecase = forcecasereset = -1;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_l:
|
||||
forcecase = -1;
|
||||
forcecasereset = 0;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_U:
|
||||
forcecase = forcecasereset = 1;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_u:
|
||||
forcecase = 1;
|
||||
forcecasereset = 0;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||||
code->overall_options, FALSE, NULL);
|
||||
if (errorcode != 0) goto BADESCAPE;
|
||||
ptr++;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
case ESC_E:
|
||||
forcecase = forcecasereset = 0;
|
||||
continue;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
goto LITERAL;
|
||||
|
||||
default:
|
||||
goto BADESCAPE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle a literal code unit */
|
||||
|
||||
else
|
||||
{
|
||||
LOADLITERAL:
|
||||
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
|
||||
|
||||
LITERAL:
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1 << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
forcecase = forcecasereset;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output. Update the start offset to
|
||||
point to the rest of the subject string. If we matched an empty string,
|
||||
do the magic for global matches. */
|
||||
|
||||
start_offset = ovector[1];
|
||||
goptions = (ovector[0] != ovector[1])? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject. */
|
||||
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp , 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
needed. Otherwise, an overflow generates an immediate error return. */
|
||||
|
||||
if (overflowed)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
*blength = buff_length + extra_needed;
|
||||
}
|
||||
|
||||
/* After a successful execution, return the number of substitutions and set the
|
||||
length of buffer used, excluding the trailing zero. */
|
||||
|
||||
else
|
||||
{
|
||||
rc = subs;
|
||||
*blength = buff_offset - 1;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (match_data_created) pcre2_match_data_free(match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
NOROOM:
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
goto EXIT;
|
||||
|
||||
BAD:
|
||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||
goto PTREXIT;
|
||||
|
||||
BADESCAPE:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
|
||||
PTREXIT:
|
||||
*blength = (PCRE2_SIZE)(ptr - replacement);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2_substitute.c */
|
536
pcre2/src/pcre2_substring.c
Normal file
536
pcre2/src/pcre2_substring.c
Normal file
|
@ -0,0 +1,536 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative error code:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from copy_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy numbered captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by number.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: buffer too small
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
buffer[size] = 0;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract named captured string *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new memory. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match_data
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer to the new memory
|
||||
sizeptr where to put the length of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract captured string to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
memory.
|
||||
|
||||
Arguments:
|
||||
match_data points to match data
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the new memory
|
||||
sizeptr where to put the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_UCHAR *yield;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
|
||||
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
yield[size] = 0;
|
||||
*stringptr = yield;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_free(PCRE2_UCHAR *string)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a named substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a named captured substring. If the regex
|
||||
permits duplicate names, the first substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringname the name of the required substring
|
||||
sizeptr where to put the length
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a numbered substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
length to zero.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringnumber the number of the required substring
|
||||
sizeptr where to put the length, if not NULL
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector is too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
int count = match_data->rc;
|
||||
if (count == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
|
||||
count = 0;
|
||||
}
|
||||
else if (count < 0) return count; /* Match failed */
|
||||
|
||||
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
{
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
else /* Matched using pcre2_dfa_match() */
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
|
||||
left = match_data->ovector[stringnumber*2];
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract all captured strings to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
handled.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
listptr set to point to the list of pointers
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||
or a match failure code
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
PCRE2_SIZE **lengthsptr)
|
||||
{
|
||||
int i, count, count2;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_SIZE *lensp;
|
||||
pcre2_memctl *memp;
|
||||
PCRE2_UCHAR **listp;
|
||||
PCRE2_UCHAR *sp;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
if ((count = match_data->rc) < 0) return count; /* Match failed */
|
||||
if (count == 0) count = match_data->oveccount; /* Ovector too small */
|
||||
|
||||
count2 = 2*count;
|
||||
ovector = match_data->ovector;
|
||||
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
|
||||
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
|
||||
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||
|
||||
if (lengthsptr == NULL)
|
||||
{
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
lensp = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
|
||||
*listp++ = sp;
|
||||
if (lensp != NULL) *lensp++ = size;
|
||||
sp += size;
|
||||
*sp++ = 0;
|
||||
}
|
||||
|
||||
*listp = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by substring_list_get *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_list_get()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_free(PCRE2_SPTR *list)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a unique group with the given name. If duplicate names are
|
||||
permitted, and the name is not unique, an error is generated.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
|
||||
otherwise, if firstptr and lastptr are NULL:
|
||||
a group number for a unique substring
|
||||
else PCRE2_ERROR_NOUNIQUESUBSTRING
|
||||
otherwise:
|
||||
the length of each entry, having set firstptr and lastptr
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
|
||||
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
|
||||
{
|
||||
uint16_t bot = 0;
|
||||
uint16_t top = code->name_count;
|
||||
uint16_t entrysize = code->name_entry_size;
|
||||
PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
|
||||
|
||||
while (top > bot)
|
||||
{
|
||||
uint16_t mid = (top + bot) / 2;
|
||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||
if (c == 0)
|
||||
{
|
||||
PCRE2_SPTR first;
|
||||
PCRE2_SPTR last;
|
||||
PCRE2_SPTR lastentry;
|
||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||
first = last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
defined which number is returned.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parenthesis, or a negative number
|
||||
PCRE2_ERROR_NOSUBSTRING if not found
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
PCRE2_SPTR stringname)
|
||||
{
|
||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||
}
|
||||
|
||||
/* End of pcre2_substring.c */
|
765
pcre2/src/pcre2_tables.c
Normal file
765
pcre2/src/pcre2_tables.c
Normal file
|
@ -0,0 +1,765 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcre2test program,
|
||||
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
|
||||
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
|
||||
defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
This is mode-dependent, so is skipped when this file is included by pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
#endif
|
||||
|
||||
/* Tables of horizontal and vertical whitespace characters, suitable for
|
||||
adding to classes. */
|
||||
|
||||
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
|
||||
/* These tables are the pairs of delimiters that are valid for callout string
|
||||
arguments. For each starting delimiter there must be a matching ending
|
||||
delimiter, which in fact is different only for bracket-like delimiters. */
|
||||
|
||||
const uint32_t PRIV(callout_start_delims)[] = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
|
||||
|
||||
const uint32_t PRIV(callout_end_delims[]) = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
|
||||
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(defined SUPPORT_UNICODE && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uint8_t PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const uint32_t PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
/* This table encodes the rules for finding the end of an extended grapheme
|
||||
cluster. Every code point has a grapheme break property which is one of the
|
||||
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
|
||||
the properties of two adjacent code points. The left property selects a word
|
||||
from the table, and the right property selects a bit from that word like this:
|
||||
|
||||
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
|
||||
|
||||
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||
two code points. The breaking rules are as follows:
|
||||
|
||||
1. Break at the start and end of text (pretty obviously).
|
||||
|
||||
2. Do not break between a CR and LF; otherwise, break before and after
|
||||
controls.
|
||||
|
||||
3. Do not break Hangul syllable sequences, the rules for which are:
|
||||
|
||||
L may be followed by L, V, LV or LVT
|
||||
LV or V may be followed by V or T
|
||||
LVT or T may be followed by T
|
||||
|
||||
4. Do not break before extending characters.
|
||||
|
||||
The next two rules are only for extended grapheme clusters (but that's what we
|
||||
are implementing).
|
||||
|
||||
5. Do not break before SpacingMarks.
|
||||
|
||||
6. Do not break after Prepend characters.
|
||||
|
||||
7. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|
|
||||
(1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)|
|
||||
(1<<ucp_gbLVT)|(1<<ucp_gbOther),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
|
||||
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
|
||||
(1<<ucp_gbT),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */
|
||||
(1<<ucp_gbT),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */
|
||||
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
|
||||
const int PRIV(ucp_typerange)[] = {
|
||||
ucp_Cc, ucp_Cs,
|
||||
ucp_Ll, ucp_Lu,
|
||||
ucp_Mc, ucp_Mn,
|
||||
ucp_Nd, ucp_No,
|
||||
ucp_Pc, ucp_Ps,
|
||||
ucp_Sc, ucp_So,
|
||||
ucp_Zl, ucp_Zs,
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* The PRIV(utt)[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data are unlikely.
|
||||
|
||||
July 2008: There is now a script called maint/GenerateUtt.py that can be used
|
||||
to generate this data automatically instead of maintaining it by hand.
|
||||
|
||||
The script was updated in March 2009 to generate a new EBCDIC-compliant
|
||||
version. Like all other character and string literals that are compared against
|
||||
the regular expression pattern, we must use STR_ macros instead of literal
|
||||
strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
|
||||
#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
|
||||
#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
|
||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
|
||||
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
|
||||
#define STRING_C0 STR_C "\0"
|
||||
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cc0 STR_C STR_c "\0"
|
||||
#define STRING_Cf0 STR_C STR_f "\0"
|
||||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
|
||||
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||
#define STRING_Cn0 STR_C STR_n "\0"
|
||||
#define STRING_Co0 STR_C STR_o "\0"
|
||||
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
|
||||
#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
|
||||
#define STRING_Cs0 STR_C STR_s "\0"
|
||||
#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
|
||||
#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
|
||||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
|
||||
#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
|
||||
#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
|
||||
#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
|
||||
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||
#define STRING_L0 STR_L "\0"
|
||||
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
|
||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||
#define STRING_Ll0 STR_L STR_l "\0"
|
||||
#define STRING_Lm0 STR_L STR_m "\0"
|
||||
#define STRING_Lo0 STR_L STR_o "\0"
|
||||
#define STRING_Lt0 STR_L STR_t "\0"
|
||||
#define STRING_Lu0 STR_L STR_u "\0"
|
||||
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_M0 STR_M "\0"
|
||||
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||
#define STRING_Mn0 STR_M STR_n "\0"
|
||||
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||
#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||
#define STRING_Nl0 STR_N STR_l "\0"
|
||||
#define STRING_No0 STR_N STR_o "\0"
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
|
||||
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
|
||||
#define STRING_Pc0 STR_P STR_c "\0"
|
||||
#define STRING_Pd0 STR_P STR_d "\0"
|
||||
#define STRING_Pe0 STR_P STR_e "\0"
|
||||
#define STRING_Pf0 STR_P STR_f "\0"
|
||||
#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
|
||||
#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Pi0 STR_P STR_i "\0"
|
||||
#define STRING_Po0 STR_P STR_o "\0"
|
||||
#define STRING_Ps0 STR_P STR_s "\0"
|
||||
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_S0 STR_S "\0"
|
||||
#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
|
||||
#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
|
||||
#define STRING_Sc0 STR_S STR_c "\0"
|
||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||
#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
#define STRING_So0 STR_S STR_o "\0"
|
||||
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
|
||||
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
|
||||
#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
|
||||
#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
|
||||
#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
|
||||
#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
|
||||
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
|
||||
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
|
||||
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
|
||||
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
|
||||
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
|
||||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
|
||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
|
||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
|
||||
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||
#define STRING_Z0 STR_Z "\0"
|
||||
#define STRING_Zl0 STR_Z STR_l "\0"
|
||||
#define STRING_Zp0 STR_Z STR_p "\0"
|
||||
#define STRING_Zs0 STR_Z STR_s "\0"
|
||||
|
||||
const char PRIV(utt_names)[] =
|
||||
STRING_Ahom0
|
||||
STRING_Anatolian_Hieroglyphs0
|
||||
STRING_Any0
|
||||
STRING_Arabic0
|
||||
STRING_Armenian0
|
||||
STRING_Avestan0
|
||||
STRING_Balinese0
|
||||
STRING_Bamum0
|
||||
STRING_Bassa_Vah0
|
||||
STRING_Batak0
|
||||
STRING_Bengali0
|
||||
STRING_Bopomofo0
|
||||
STRING_Brahmi0
|
||||
STRING_Braille0
|
||||
STRING_Buginese0
|
||||
STRING_Buhid0
|
||||
STRING_C0
|
||||
STRING_Canadian_Aboriginal0
|
||||
STRING_Carian0
|
||||
STRING_Caucasian_Albanian0
|
||||
STRING_Cc0
|
||||
STRING_Cf0
|
||||
STRING_Chakma0
|
||||
STRING_Cham0
|
||||
STRING_Cherokee0
|
||||
STRING_Cn0
|
||||
STRING_Co0
|
||||
STRING_Common0
|
||||
STRING_Coptic0
|
||||
STRING_Cs0
|
||||
STRING_Cuneiform0
|
||||
STRING_Cypriot0
|
||||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Elbasan0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
STRING_Gothic0
|
||||
STRING_Grantha0
|
||||
STRING_Greek0
|
||||
STRING_Gujarati0
|
||||
STRING_Gurmukhi0
|
||||
STRING_Han0
|
||||
STRING_Hangul0
|
||||
STRING_Hanunoo0
|
||||
STRING_Hatran0
|
||||
STRING_Hebrew0
|
||||
STRING_Hiragana0
|
||||
STRING_Imperial_Aramaic0
|
||||
STRING_Inherited0
|
||||
STRING_Inscriptional_Pahlavi0
|
||||
STRING_Inscriptional_Parthian0
|
||||
STRING_Javanese0
|
||||
STRING_Kaithi0
|
||||
STRING_Kannada0
|
||||
STRING_Katakana0
|
||||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khmer0
|
||||
STRING_Khojki0
|
||||
STRING_Khudawadi0
|
||||
STRING_L0
|
||||
STRING_L_AMPERSAND0
|
||||
STRING_Lao0
|
||||
STRING_Latin0
|
||||
STRING_Lepcha0
|
||||
STRING_Limbu0
|
||||
STRING_Linear_A0
|
||||
STRING_Linear_B0
|
||||
STRING_Lisu0
|
||||
STRING_Ll0
|
||||
STRING_Lm0
|
||||
STRING_Lo0
|
||||
STRING_Lt0
|
||||
STRING_Lu0
|
||||
STRING_Lycian0
|
||||
STRING_Lydian0
|
||||
STRING_M0
|
||||
STRING_Mahajani0
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Manichaean0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Mende_Kikakui0
|
||||
STRING_Meroitic_Cursive0
|
||||
STRING_Meroitic_Hieroglyphs0
|
||||
STRING_Miao0
|
||||
STRING_Mn0
|
||||
STRING_Modi0
|
||||
STRING_Mongolian0
|
||||
STRING_Mro0
|
||||
STRING_Multani0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nabataean0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Nko0
|
||||
STRING_Nl0
|
||||
STRING_No0
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Hungarian0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_North_Arabian0
|
||||
STRING_Old_Permic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
STRING_Pahawh_Hmong0
|
||||
STRING_Palmyrene0
|
||||
STRING_Pau_Cin_Hau0
|
||||
STRING_Pc0
|
||||
STRING_Pd0
|
||||
STRING_Pe0
|
||||
STRING_Pf0
|
||||
STRING_Phags_Pa0
|
||||
STRING_Phoenician0
|
||||
STRING_Pi0
|
||||
STRING_Po0
|
||||
STRING_Ps0
|
||||
STRING_Psalter_Pahlavi0
|
||||
STRING_Rejang0
|
||||
STRING_Runic0
|
||||
STRING_S0
|
||||
STRING_Samaritan0
|
||||
STRING_Saurashtra0
|
||||
STRING_Sc0
|
||||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Siddham0
|
||||
STRING_SignWriting0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
STRING_So0
|
||||
STRING_Sora_Sompeng0
|
||||
STRING_Sundanese0
|
||||
STRING_Syloti_Nagri0
|
||||
STRING_Syriac0
|
||||
STRING_Tagalog0
|
||||
STRING_Tagbanwa0
|
||||
STRING_Tai_Le0
|
||||
STRING_Tai_Tham0
|
||||
STRING_Tai_Viet0
|
||||
STRING_Takri0
|
||||
STRING_Tamil0
|
||||
STRING_Telugu0
|
||||
STRING_Thaana0
|
||||
STRING_Thai0
|
||||
STRING_Tibetan0
|
||||
STRING_Tifinagh0
|
||||
STRING_Tirhuta0
|
||||
STRING_Ugaritic0
|
||||
STRING_Vai0
|
||||
STRING_Warang_Citi0
|
||||
STRING_Xan0
|
||||
STRING_Xps0
|
||||
STRING_Xsp0
|
||||
STRING_Xuc0
|
||||
STRING_Xwd0
|
||||
STRING_Yi0
|
||||
STRING_Z0
|
||||
STRING_Zl0
|
||||
STRING_Zp0
|
||||
STRING_Zs0;
|
||||
|
||||
const ucp_type_table PRIV(utt)[] = {
|
||||
{ 0, PT_SC, ucp_Ahom },
|
||||
{ 5, PT_SC, ucp_Anatolian_Hieroglyphs },
|
||||
{ 27, PT_ANY, 0 },
|
||||
{ 31, PT_SC, ucp_Arabic },
|
||||
{ 38, PT_SC, ucp_Armenian },
|
||||
{ 47, PT_SC, ucp_Avestan },
|
||||
{ 55, PT_SC, ucp_Balinese },
|
||||
{ 64, PT_SC, ucp_Bamum },
|
||||
{ 70, PT_SC, ucp_Bassa_Vah },
|
||||
{ 80, PT_SC, ucp_Batak },
|
||||
{ 86, PT_SC, ucp_Bengali },
|
||||
{ 94, PT_SC, ucp_Bopomofo },
|
||||
{ 103, PT_SC, ucp_Brahmi },
|
||||
{ 110, PT_SC, ucp_Braille },
|
||||
{ 118, PT_SC, ucp_Buginese },
|
||||
{ 127, PT_SC, ucp_Buhid },
|
||||
{ 133, PT_GC, ucp_C },
|
||||
{ 135, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 155, PT_SC, ucp_Carian },
|
||||
{ 162, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 181, PT_PC, ucp_Cc },
|
||||
{ 184, PT_PC, ucp_Cf },
|
||||
{ 187, PT_SC, ucp_Chakma },
|
||||
{ 194, PT_SC, ucp_Cham },
|
||||
{ 199, PT_SC, ucp_Cherokee },
|
||||
{ 208, PT_PC, ucp_Cn },
|
||||
{ 211, PT_PC, ucp_Co },
|
||||
{ 214, PT_SC, ucp_Common },
|
||||
{ 221, PT_SC, ucp_Coptic },
|
||||
{ 228, PT_PC, ucp_Cs },
|
||||
{ 231, PT_SC, ucp_Cuneiform },
|
||||
{ 241, PT_SC, ucp_Cypriot },
|
||||
{ 249, PT_SC, ucp_Cyrillic },
|
||||
{ 258, PT_SC, ucp_Deseret },
|
||||
{ 266, PT_SC, ucp_Devanagari },
|
||||
{ 277, PT_SC, ucp_Duployan },
|
||||
{ 286, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 307, PT_SC, ucp_Elbasan },
|
||||
{ 315, PT_SC, ucp_Ethiopic },
|
||||
{ 324, PT_SC, ucp_Georgian },
|
||||
{ 333, PT_SC, ucp_Glagolitic },
|
||||
{ 344, PT_SC, ucp_Gothic },
|
||||
{ 351, PT_SC, ucp_Grantha },
|
||||
{ 359, PT_SC, ucp_Greek },
|
||||
{ 365, PT_SC, ucp_Gujarati },
|
||||
{ 374, PT_SC, ucp_Gurmukhi },
|
||||
{ 383, PT_SC, ucp_Han },
|
||||
{ 387, PT_SC, ucp_Hangul },
|
||||
{ 394, PT_SC, ucp_Hanunoo },
|
||||
{ 402, PT_SC, ucp_Hatran },
|
||||
{ 409, PT_SC, ucp_Hebrew },
|
||||
{ 416, PT_SC, ucp_Hiragana },
|
||||
{ 425, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 442, PT_SC, ucp_Inherited },
|
||||
{ 452, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 474, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 497, PT_SC, ucp_Javanese },
|
||||
{ 506, PT_SC, ucp_Kaithi },
|
||||
{ 513, PT_SC, ucp_Kannada },
|
||||
{ 521, PT_SC, ucp_Katakana },
|
||||
{ 530, PT_SC, ucp_Kayah_Li },
|
||||
{ 539, PT_SC, ucp_Kharoshthi },
|
||||
{ 550, PT_SC, ucp_Khmer },
|
||||
{ 556, PT_SC, ucp_Khojki },
|
||||
{ 563, PT_SC, ucp_Khudawadi },
|
||||
{ 573, PT_GC, ucp_L },
|
||||
{ 575, PT_LAMP, 0 },
|
||||
{ 578, PT_SC, ucp_Lao },
|
||||
{ 582, PT_SC, ucp_Latin },
|
||||
{ 588, PT_SC, ucp_Lepcha },
|
||||
{ 595, PT_SC, ucp_Limbu },
|
||||
{ 601, PT_SC, ucp_Linear_A },
|
||||
{ 610, PT_SC, ucp_Linear_B },
|
||||
{ 619, PT_SC, ucp_Lisu },
|
||||
{ 624, PT_PC, ucp_Ll },
|
||||
{ 627, PT_PC, ucp_Lm },
|
||||
{ 630, PT_PC, ucp_Lo },
|
||||
{ 633, PT_PC, ucp_Lt },
|
||||
{ 636, PT_PC, ucp_Lu },
|
||||
{ 639, PT_SC, ucp_Lycian },
|
||||
{ 646, PT_SC, ucp_Lydian },
|
||||
{ 653, PT_GC, ucp_M },
|
||||
{ 655, PT_SC, ucp_Mahajani },
|
||||
{ 664, PT_SC, ucp_Malayalam },
|
||||
{ 674, PT_SC, ucp_Mandaic },
|
||||
{ 682, PT_SC, ucp_Manichaean },
|
||||
{ 693, PT_PC, ucp_Mc },
|
||||
{ 696, PT_PC, ucp_Me },
|
||||
{ 699, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 712, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 726, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 743, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 764, PT_SC, ucp_Miao },
|
||||
{ 769, PT_PC, ucp_Mn },
|
||||
{ 772, PT_SC, ucp_Modi },
|
||||
{ 777, PT_SC, ucp_Mongolian },
|
||||
{ 787, PT_SC, ucp_Mro },
|
||||
{ 791, PT_SC, ucp_Multani },
|
||||
{ 799, PT_SC, ucp_Myanmar },
|
||||
{ 807, PT_GC, ucp_N },
|
||||
{ 809, PT_SC, ucp_Nabataean },
|
||||
{ 819, PT_PC, ucp_Nd },
|
||||
{ 822, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 834, PT_SC, ucp_Nko },
|
||||
{ 838, PT_PC, ucp_Nl },
|
||||
{ 841, PT_PC, ucp_No },
|
||||
{ 844, PT_SC, ucp_Ogham },
|
||||
{ 850, PT_SC, ucp_Ol_Chiki },
|
||||
{ 859, PT_SC, ucp_Old_Hungarian },
|
||||
{ 873, PT_SC, ucp_Old_Italic },
|
||||
{ 884, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 902, PT_SC, ucp_Old_Permic },
|
||||
{ 913, PT_SC, ucp_Old_Persian },
|
||||
{ 925, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 943, PT_SC, ucp_Old_Turkic },
|
||||
{ 954, PT_SC, ucp_Oriya },
|
||||
{ 960, PT_SC, ucp_Osmanya },
|
||||
{ 968, PT_GC, ucp_P },
|
||||
{ 970, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 983, PT_SC, ucp_Palmyrene },
|
||||
{ 993, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1005, PT_PC, ucp_Pc },
|
||||
{ 1008, PT_PC, ucp_Pd },
|
||||
{ 1011, PT_PC, ucp_Pe },
|
||||
{ 1014, PT_PC, ucp_Pf },
|
||||
{ 1017, PT_SC, ucp_Phags_Pa },
|
||||
{ 1026, PT_SC, ucp_Phoenician },
|
||||
{ 1037, PT_PC, ucp_Pi },
|
||||
{ 1040, PT_PC, ucp_Po },
|
||||
{ 1043, PT_PC, ucp_Ps },
|
||||
{ 1046, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1062, PT_SC, ucp_Rejang },
|
||||
{ 1069, PT_SC, ucp_Runic },
|
||||
{ 1075, PT_GC, ucp_S },
|
||||
{ 1077, PT_SC, ucp_Samaritan },
|
||||
{ 1087, PT_SC, ucp_Saurashtra },
|
||||
{ 1098, PT_PC, ucp_Sc },
|
||||
{ 1101, PT_SC, ucp_Sharada },
|
||||
{ 1109, PT_SC, ucp_Shavian },
|
||||
{ 1117, PT_SC, ucp_Siddham },
|
||||
{ 1125, PT_SC, ucp_SignWriting },
|
||||
{ 1137, PT_SC, ucp_Sinhala },
|
||||
{ 1145, PT_PC, ucp_Sk },
|
||||
{ 1148, PT_PC, ucp_Sm },
|
||||
{ 1151, PT_PC, ucp_So },
|
||||
{ 1154, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1167, PT_SC, ucp_Sundanese },
|
||||
{ 1177, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1190, PT_SC, ucp_Syriac },
|
||||
{ 1197, PT_SC, ucp_Tagalog },
|
||||
{ 1205, PT_SC, ucp_Tagbanwa },
|
||||
{ 1214, PT_SC, ucp_Tai_Le },
|
||||
{ 1221, PT_SC, ucp_Tai_Tham },
|
||||
{ 1230, PT_SC, ucp_Tai_Viet },
|
||||
{ 1239, PT_SC, ucp_Takri },
|
||||
{ 1245, PT_SC, ucp_Tamil },
|
||||
{ 1251, PT_SC, ucp_Telugu },
|
||||
{ 1258, PT_SC, ucp_Thaana },
|
||||
{ 1265, PT_SC, ucp_Thai },
|
||||
{ 1270, PT_SC, ucp_Tibetan },
|
||||
{ 1278, PT_SC, ucp_Tifinagh },
|
||||
{ 1287, PT_SC, ucp_Tirhuta },
|
||||
{ 1295, PT_SC, ucp_Ugaritic },
|
||||
{ 1304, PT_SC, ucp_Vai },
|
||||
{ 1308, PT_SC, ucp_Warang_Citi },
|
||||
{ 1320, PT_ALNUM, 0 },
|
||||
{ 1324, PT_PXSPACE, 0 },
|
||||
{ 1328, PT_SPACE, 0 },
|
||||
{ 1332, PT_UCNC, 0 },
|
||||
{ 1336, PT_WORD, 0 },
|
||||
{ 1340, PT_SC, ucp_Yi },
|
||||
{ 1343, PT_GC, ucp_Z },
|
||||
{ 1345, PT_PC, ucp_Zl },
|
||||
{ 1348, PT_PC, ucp_Zp },
|
||||
{ 1351, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_tables.c */
|
3747
pcre2/src/pcre2_ucd.c
Normal file
3747
pcre2/src/pcre2_ucd.c
Normal file
File diff suppressed because it is too large
Load diff
268
pcre2/src/pcre2_ucp.h
Normal file
268
pcre2/src/pcre2_ucp.h
Normal file
|
@ -0,0 +1,268 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _PCRE2_UCP_H
|
||||
#define _PCRE2_UCP_H
|
||||
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the UCD access macros. New values that are added for new releases of Unicode
|
||||
should always be at the end of each enum, for backwards compatibility.
|
||||
|
||||
IMPORTANT: Note also that the specific numeric values of the enums have to be
|
||||
the same as the values that are generated by the maint/MultiStage2.py script,
|
||||
where the equivalent property descriptive names are listed in vectors.
|
||||
|
||||
ALSO: The specific values of the first two enums are assumed for the table
|
||||
called catposstab in pcre2_compile.c. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
ucp_L, /* Letter */
|
||||
ucp_M, /* Mark */
|
||||
ucp_N, /* Number */
|
||||
ucp_P, /* Punctuation */
|
||||
ucp_S, /* Symbol */
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the particular character categories. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. Note that the code for processing them
|
||||
assumes that the values are less than 16. If more values are added that take
|
||||
the number to 16 or more, the code will have to be rewritten. */
|
||||
|
||||
enum {
|
||||
ucp_gbCR, /* 0 */
|
||||
ucp_gbLF, /* 1 */
|
||||
ucp_gbControl, /* 2 */
|
||||
ucp_gbExtend, /* 3 */
|
||||
ucp_gbPrepend, /* 4 */
|
||||
ucp_gbSpacingMark, /* 5 */
|
||||
ucp_gbL, /* 6 Hangul syllable type L */
|
||||
ucp_gbV, /* 7 Hangul syllable type V */
|
||||
ucp_gbT, /* 8 Hangul syllable type T */
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegionalIndicator, /* 11 */
|
||||
ucp_gbOther /* 12 */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Arabic,
|
||||
ucp_Armenian,
|
||||
ucp_Bengali,
|
||||
ucp_Bopomofo,
|
||||
ucp_Braille,
|
||||
ucp_Buginese,
|
||||
ucp_Buhid,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Cherokee,
|
||||
ucp_Common,
|
||||
ucp_Coptic,
|
||||
ucp_Cypriot,
|
||||
ucp_Cyrillic,
|
||||
ucp_Deseret,
|
||||
ucp_Devanagari,
|
||||
ucp_Ethiopic,
|
||||
ucp_Georgian,
|
||||
ucp_Glagolitic,
|
||||
ucp_Gothic,
|
||||
ucp_Greek,
|
||||
ucp_Gujarati,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Han,
|
||||
ucp_Hangul,
|
||||
ucp_Hanunoo,
|
||||
ucp_Hebrew,
|
||||
ucp_Hiragana,
|
||||
ucp_Inherited,
|
||||
ucp_Kannada,
|
||||
ucp_Katakana,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Khmer,
|
||||
ucp_Lao,
|
||||
ucp_Latin,
|
||||
ucp_Limbu,
|
||||
ucp_Linear_B,
|
||||
ucp_Malayalam,
|
||||
ucp_Mongolian,
|
||||
ucp_Myanmar,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Ogham,
|
||||
ucp_Old_Italic,
|
||||
ucp_Old_Persian,
|
||||
ucp_Oriya,
|
||||
ucp_Osmanya,
|
||||
ucp_Runic,
|
||||
ucp_Shavian,
|
||||
ucp_Sinhala,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Syriac,
|
||||
ucp_Tagalog,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Tai_Le,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Thaana,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Tifinagh,
|
||||
ucp_Ugaritic,
|
||||
ucp_Yi,
|
||||
/* New for Unicode 5.0: */
|
||||
ucp_Balinese,
|
||||
ucp_Cuneiform,
|
||||
ucp_Nko,
|
||||
ucp_Phags_Pa,
|
||||
ucp_Phoenician,
|
||||
/* New for Unicode 5.1: */
|
||||
ucp_Carian,
|
||||
ucp_Cham,
|
||||
ucp_Kayah_Li,
|
||||
ucp_Lepcha,
|
||||
ucp_Lycian,
|
||||
ucp_Lydian,
|
||||
ucp_Ol_Chiki,
|
||||
ucp_Rejang,
|
||||
ucp_Saurashtra,
|
||||
ucp_Sundanese,
|
||||
ucp_Vai,
|
||||
/* New for Unicode 5.2: */
|
||||
ucp_Avestan,
|
||||
ucp_Bamum,
|
||||
ucp_Egyptian_Hieroglyphs,
|
||||
ucp_Imperial_Aramaic,
|
||||
ucp_Inscriptional_Pahlavi,
|
||||
ucp_Inscriptional_Parthian,
|
||||
ucp_Javanese,
|
||||
ucp_Kaithi,
|
||||
ucp_Lisu,
|
||||
ucp_Meetei_Mayek,
|
||||
ucp_Old_South_Arabian,
|
||||
ucp_Old_Turkic,
|
||||
ucp_Samaritan,
|
||||
ucp_Tai_Tham,
|
||||
ucp_Tai_Viet,
|
||||
/* New for Unicode 6.0.0: */
|
||||
ucp_Batak,
|
||||
ucp_Brahmi,
|
||||
ucp_Mandaic,
|
||||
/* New for Unicode 6.1.0: */
|
||||
ucp_Chakma,
|
||||
ucp_Meroitic_Cursive,
|
||||
ucp_Meroitic_Hieroglyphs,
|
||||
ucp_Miao,
|
||||
ucp_Sharada,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Takri,
|
||||
/* New for Unicode 7.0.0: */
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
ucp_Khojki,
|
||||
ucp_Khudawadi,
|
||||
ucp_Linear_A,
|
||||
ucp_Mahajani,
|
||||
ucp_Manichaean,
|
||||
ucp_Mende_Kikakui,
|
||||
ucp_Modi,
|
||||
ucp_Mro,
|
||||
ucp_Nabataean,
|
||||
ucp_Old_North_Arabian,
|
||||
ucp_Old_Permic,
|
||||
ucp_Pahawh_Hmong,
|
||||
ucp_Palmyrene,
|
||||
ucp_Psalter_Pahlavi,
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Tirhuta,
|
||||
ucp_Warang_Citi,
|
||||
/* New for Unicode 8.0.0: */
|
||||
ucp_Ahom,
|
||||
ucp_Anatolian_Hieroglyphs,
|
||||
ucp_Hatran,
|
||||
ucp_Multani,
|
||||
ucp_Old_Hungarian,
|
||||
ucp_SignWriting
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of pcre2_ucp.h */
|
398
pcre2/src/pcre2_valid_utf.c
Normal file
398
pcre2/src/pcre2_valid_utf.c
Normal file
|
@ -0,0 +1,398 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF character
|
||||
strings. This file is also #included by the pcre2test program, which uses
|
||||
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
|
||||
with the library. In this case, PCRE2_PCRE2TEST is defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Dummy function when Unicode is not supported *
|
||||
*************************************************/
|
||||
|
||||
/* This function should never be called when Unicode is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
(void)string;
|
||||
(void)length;
|
||||
(void)erroroffset;
|
||||
return 0;
|
||||
}
|
||||
#else /* UTF is supported */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: == 0 if the string is a valid UTF string
|
||||
!= 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
register PCRE2_SPTR p;
|
||||
register uint32_t c;
|
||||
|
||||
/* ----------------- Check a UTF-8 string ----------------- */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
/* Originally, this function checked according to RFC 2279, allowing for values
|
||||
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
|
||||
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked. Error returns are as follows:
|
||||
|
||||
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
register uint32_t ab, d;
|
||||
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
|
||||
if (length < ab) /* Missing bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
switch(ab - length)
|
||||
{
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
}
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||
excluded range 0xd800 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||
for for xx00 000x (overlong sequence). */
|
||||
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||
rejected by the length test below. However, we do the appropriate tests
|
||||
here so that overlong sequences get diagnosed, and also in case there is
|
||||
ever an option for handling these larger code points. */
|
||||
|
||||
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||
1111 1000, xx00 0xxx */
|
||||
|
||||
case 4:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||
1111 1100, xx00 00xx. */
|
||||
|
||||
case 5:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||
character. */
|
||||
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - ab;
|
||||
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-16 string ----------------- */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* There's not so much work, nor so many errors, for UTF-16.
|
||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
|
||||
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if ((c & 0xf800) != 0xd800)
|
||||
{
|
||||
/* Normal UTF-16 code point. Neither high nor low surrogate. */
|
||||
}
|
||||
else if ((c & 0x0400) == 0)
|
||||
{
|
||||
/* High surrogate. Must be a followed by a low surrogate. */
|
||||
if (length == 0)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR1;
|
||||
}
|
||||
p++;
|
||||
length--;
|
||||
if ((*p & 0xfc00) != 0xdc00)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Isolated low surrogate. Always an error. */
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR3;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-32 string ----------------- */
|
||||
|
||||
#else
|
||||
|
||||
/* There is very little to do for a UTF-32 string.
|
||||
PCRE2_ERROR_UTF32_ERR1 Surrogate character
|
||||
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; length--, p++)
|
||||
{
|
||||
c = *p;
|
||||
if ((c & 0xfffff800u) != 0xd800u)
|
||||
{
|
||||
/* Normal UTF-32 code point. Neither high nor low surrogate. */
|
||||
if (c > 0x10ffffu)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF32_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A surrogate */
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF32_ERR1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre2_valid_utf.c */
|
271
pcre2/src/pcre2_xclass.c
Normal file
271
pcre2/src/pcre2_xclass.c
Normal file
|
@ -0,0 +1,271 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class. It is used by pcre2_auto_possessify() and by both pcre2_match() and
|
||||
pcre2_def_match(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain codepoints above 255 and/or Unicode properties.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag code unit of the XCLASS data
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf)
|
||||
{
|
||||
PCRE2_UCHAR t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||
utf = TRUE;
|
||||
#endif
|
||||
|
||||
/* Code points < 256 are matched against a bitmap, if one is present. If not,
|
||||
we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
{
|
||||
if ((*data & XCL_HASPROP) == 0)
|
||||
{
|
||||
if ((*data & XCL_MAP) == 0) return negated;
|
||||
return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0;
|
||||
}
|
||||
if ((*data & XCL_MAP) != 0 &&
|
||||
(((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
uint32_t x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x = *data++;
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
GETCHARINC(y, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
x = *data++;
|
||||
y = *data++;
|
||||
}
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
BOOL isprop = t == XCL_PROP;
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
case PT_ANY:
|
||||
if (isprop) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||
prop->chartype == ucp_Lt) == isprop) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == prop->chartype) == isprop) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == prop->script) == isprop) return !negated;
|
||||
break;
|
||||
|
||||
case PT_ALNUM:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (isprop) return !negated;
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||
== isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
if (c < 0xa0)
|
||||
{
|
||||
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||
c == CHAR_GRAVE_ACCENT) == isprop)
|
||||
return !negated;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((c < 0xd800 || c > 0xdfff) == isprop)
|
||||
return !negated;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The following three properties can occur only in an XCLASS, as there
|
||||
is no \p or \P coding for them. */
|
||||
|
||||
/* Graphic character. Implement this as not Z (space or separator) and
|
||||
not C (other), except for Cf (format) with a few exceptions. This seems
|
||||
to be what Perl does. The exceptional characters are:
|
||||
|
||||
U+061C Arabic Letter Mark
|
||||
U+180E Mongolian Vowel Separator
|
||||
U+2066 - U+2069 Various "isolate"s
|
||||
*/
|
||||
|
||||
case PT_PXGRAPH:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
|
||||
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
|
||||
(prop->chartype == ucp_Cf &&
|
||||
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
|
||||
)) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* Printable character: same as graphic, with the addition of Zs, i.e.
|
||||
not Zl and not Zp, and U+180E. */
|
||||
|
||||
case PT_PXPRINT:
|
||||
if ((prop->chartype != ucp_Zl &&
|
||||
prop->chartype != ucp_Zp &&
|
||||
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
|
||||
(prop->chartype == ucp_Cf &&
|
||||
c != 0x061c && (c < 0x2066 || c > 0x2069))
|
||||
)) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* Punctuation: all Unicode punctuation, plus ASCII characters that
|
||||
Unicode treats as symbols rather than punctuation, for Perl
|
||||
compatibility (these are $+<=>^`|~). */
|
||||
|
||||
case PT_PXPUNCT:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
||||
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcre2_xclass.c */
|
423
pcre2/src/pcre2demo.c
Normal file
423
pcre2/src/pcre2demo.c
Normal file
|
@ -0,0 +1,423 @@
|
|||
/*************************************************
|
||||
* PCRE2 DEMONSTRATION PROGRAM *
|
||||
*************************************************/
|
||||
|
||||
/* This is a demonstration program to illustrate a straightforward way of
|
||||
calling the PCRE2 regular expression library from a C program. See the
|
||||
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
libraries, you should be able to compile this program using this command:
|
||||
|
||||
gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
|
||||
|
||||
If PCRE2 is not installed in a standard place, it is likely to be installed
|
||||
with support for the pkg-config mechanism. If you have pkg-config, you can
|
||||
compile this program using this command:
|
||||
|
||||
gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
|
||||
|
||||
If you do not have pkg-config, you may have to use this:
|
||||
|
||||
gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
|
||||
-R/usr/local/lib -lpcre2-8 -o pcre2demo
|
||||
|
||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||
library files for PCRE2 are installed on your system. Only some operating
|
||||
systems (Solaris is one) use the -R option.
|
||||
|
||||
Building under Windows:
|
||||
|
||||
If you want to statically link this program against a non-dll .a file, you must
|
||||
define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
|
||||
the following line. */
|
||||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
such as pcre2_compile(). */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre2.h>
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* Here is the program. The API includes the concept of "contexts" for *
|
||||
* setting up unusual interface requirements for compiling and matching, *
|
||||
* such as custom memory managers and non-standard newline definitions. *
|
||||
* This program does not do any of this, so it makes no use of contexts, *
|
||||
* always passing NULL where a context could be given. *
|
||||
**************************************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pcre2_code *re;
|
||||
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
|
||||
PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */
|
||||
PCRE2_SPTR name_table;
|
||||
|
||||
int crlf_is_newline;
|
||||
int errornumber;
|
||||
int find_all;
|
||||
int i;
|
||||
int namecount;
|
||||
int name_entry_size;
|
||||
int rc;
|
||||
int utf8;
|
||||
|
||||
uint32_t option_bits;
|
||||
uint32_t newline;
|
||||
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
size_t subject_length;
|
||||
pcre2_match_data *match_data;
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* First, sort out the command line. There is only one possible option at *
|
||||
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||
* if the -g option is present. Apart from that, there must be exactly two *
|
||||
* arguments. *
|
||||
**************************************************************************/
|
||||
|
||||
find_all = 0;
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||
else break;
|
||||
}
|
||||
|
||||
/* After the options, we require exactly two arguments, which are the pattern,
|
||||
and the subject string. */
|
||||
|
||||
if (argc - i != 2)
|
||||
{
|
||||
printf("Two arguments required: a regex and a subject string\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* As pattern and subject are char arguments, they can be straightforwardly
|
||||
cast to PCRE2_SPTR as we are working in 8-bit code units. */
|
||||
|
||||
pattern = (PCRE2_SPTR)argv[i];
|
||||
subject = (PCRE2_SPTR)argv[i+1];
|
||||
subject_length = strlen((char *)subject);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Now we are going to compile the regular expression pattern, and handle *
|
||||
* any errors that are detected. *
|
||||
*************************************************************************/
|
||||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
NULL); /* use default compile context */
|
||||
|
||||
/* Compilation failed: print the error message and exit. */
|
||||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the compilation succeeded, we call PCRE again, in order to do a *
|
||||
* pattern match against the subject string. This does just ONE match. If *
|
||||
* further matching is needed, it will be done below. Before running the *
|
||||
* match we must set up a match_data block for holding the result. *
|
||||
*************************************************************************/
|
||||
|
||||
/* Using this function ensures that the block is exactly the right size for
|
||||
the number of capturing parentheses in the pattern. */
|
||||
|
||||
match_data = pcre2_match_data_create_from_pattern(re, NULL);
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* Matching failed: handle error cases */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
|
||||
/*
|
||||
Handle other special cases if you like
|
||||
*/
|
||||
default: printf("Matching error %d\n", rc); break;
|
||||
}
|
||||
pcre2_match_data_free(match_data); /* Release memory used for the match */
|
||||
pcre2_code_free(re); /* data and the compiled pattern. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* We have found the first match within the subject string. If the output *
|
||||
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||
application you might want to do things other than print them. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
size_t substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* That concludes the basic part of this demonstration program. We have *
|
||||
* compiled a pattern, and performed a single match. The code that follows *
|
||||
* shows first how to access named substrings, and then how to code for *
|
||||
* repeated matches on the same subject. *
|
||||
**************************************************************************/
|
||||
|
||||
/* See if there are any named substrings, and if so, show them by name. First
|
||||
we have to extract the count of named parentheses from the pattern. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
|
||||
&namecount); /* where to put the answer */
|
||||
|
||||
if (namecount <= 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr;
|
||||
printf("Named substrings\n");
|
||||
|
||||
/* Before we can access the substrings, we must extract the table for
|
||||
translating names to numbers, and the size of each entry in the table. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMETABLE, /* address of the table */
|
||||
&name_table); /* where to put the answer */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the "-g" option was given on the command line, we want to continue *
|
||||
* to search for additional matches in the subject string, in a similar *
|
||||
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||
* might think because of the possibility of matching an empty string. *
|
||||
* What happens is as follows: *
|
||||
* *
|
||||
* If the previous match was NOT for an empty string, we can just start *
|
||||
* the next match at the end of the previous one. *
|
||||
* *
|
||||
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||
* would lead to an infinite loop. Instead, a call of pcre2_match() is *
|
||||
* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
|
||||
* first of these tells PCRE2 that an empty string at the start of the *
|
||||
* subject is not a valid match; other possibilities must be tried. The *
|
||||
* second flag restricts PCRE2 to one match attempt at the initial string *
|
||||
* position. If this match succeeds, an alternative to the empty string *
|
||||
* match has been found, and we can print it and proceed round the loop, *
|
||||
* advancing by the length of whatever was found. If this match does not *
|
||||
* succeed, we still stay in the loop, advancing by just one character. *
|
||||
* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
|
||||
* more than one byte. *
|
||||
* *
|
||||
* However, there is a complication concerned with newlines. When the *
|
||||
* newline convention is such that CRLF is a valid newline, we must *
|
||||
* advance by two characters rather than one. The newline convention can *
|
||||
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||
*************************************************************************/
|
||||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
||||
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||
sequence. First, find the options with which the regex was compiled and extract
|
||||
the UTF state. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
|
||||
utf8 = (option_bits & PCRE2_UTF) != 0;
|
||||
|
||||
/* Now find the newline convention and see whether CRLF is a valid newline
|
||||
sequence. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
uint32_t options = 0; /* Normally no options */
|
||||
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are
|
||||
at the end of the subject. Otherwise, arrange to run another match at the
|
||||
same point to see if a non-empty match can be found. */
|
||||
|
||||
if (ovector[0] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == subject_length) break;
|
||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
}
|
||||
|
||||
/* Run the next matching operation */
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
start_offset, /* starting offset in the subject */
|
||||
options, /* options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||
is zero, it just means we have found all possible matches, so the loop ends.
|
||||
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||
point where there was a previous empty-string match. In this case, we do what
|
||||
Perl does: advance the matching position by one character, and continue. We
|
||||
do this by setting the "end of previous match" offset, because that is picked
|
||||
up at the top of the loop as the point at which to start again.
|
||||
|
||||
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||
the current position is just before it, advance by an extra byte. (b)
|
||||
Otherwise we must ensure that we skip an entire UTF character if we are in
|
||||
UTF mode. */
|
||||
|
||||
if (rc == PCRE2_ERROR_NOMATCH)
|
||||
{
|
||||
if (options == 0) break; /* All matches found */
|
||||
ovector[1] = start_offset + 1; /* Advance one code unit */
|
||||
if (crlf_is_newline && /* If CRLF is newline & */
|
||||
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||
subject[start_offset] == '\r' &&
|
||||
subject[start_offset + 1] == '\n')
|
||||
ovector[1] += 1; /* Advance by one more. */
|
||||
else if (utf8) /* Otherwise, ensure we */
|
||||
{ /* advance a whole UTF-8 */
|
||||
while (ovector[1] < subject_length) /* character. */
|
||||
{
|
||||
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||
ovector[1] += 1;
|
||||
}
|
||||
}
|
||||
continue; /* Go round the loop again */
|
||||
}
|
||||
|
||||
/* Other matching errors are not recoverable. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
printf("Matching error %d\n", rc);
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
|
||||
|
||||
/* The match succeeded, but the output vector wasn't big enough. This
|
||||
should not happen. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* As before, show substrings stored in the output vector by number, and then
|
||||
also any named substrings. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
size_t substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
if (namecount <= 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr = name_table;
|
||||
printf("Named substrings\n");
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
} /* End of loop to find second and subsequent matches */
|
||||
|
||||
printf("\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2demo.c */
|
3270
pcre2/src/pcre2grep.c
Normal file
3270
pcre2/src/pcre2grep.c
Normal file
File diff suppressed because it is too large
Load diff
335
pcre2/src/pcre2posix.c
Normal file
335
pcre2/src/pcre2posix.c
Normal file
|
@ -0,0 +1,335 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module is a wrapper that provides a POSIX API to the underlying PCRE2
|
||||
functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for
|
||||
compiling these functions. This must come before including pcre2posix.h, where
|
||||
they are set for an application (using these functions) if they have not
|
||||
previously been set. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# define PCRE2POSIX_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE2POSIX_EXP_DEFN __declspec(dllexport)
|
||||
#endif
|
||||
|
||||
/* We include pcre2.h before pcre2_internal.h so that the PCRE2 library
|
||||
functions are declared as "import" for Windows by defining PCRE2_EXP_DECL as
|
||||
"import". This is needed even though pcre2_internal.h itself includes pcre2.h,
|
||||
because it does so after it has set PCRE2_EXP_DECL to "export" if it is not
|
||||
already set. */
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_internal.h"
|
||||
#include "pcre2posix.h"
|
||||
|
||||
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
||||
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
|
||||
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
|
||||
don't. */
|
||||
|
||||
static const int eint1[] = {
|
||||
0, /* No error */
|
||||
REG_EESCAPE, /* \ at end of pattern */
|
||||
REG_EESCAPE, /* \c at end of pattern */
|
||||
REG_EESCAPE, /* unrecognized character follows \ */
|
||||
REG_BADBR, /* numbers out of order in {} quantifier */
|
||||
/* 5 */
|
||||
REG_BADBR, /* number too big in {} quantifier */
|
||||
REG_EBRACK, /* missing terminating ] for character class */
|
||||
REG_ECTYPE, /* invalid escape sequence in character class */
|
||||
REG_ERANGE, /* range out of order in character class */
|
||||
REG_BADRPT, /* nothing to repeat */
|
||||
/* 10 */
|
||||
REG_ASSERT, /* internal error: unexpected repeat */
|
||||
REG_BADPAT, /* unrecognized character after (? or (?- */
|
||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_EPAREN, /* missing ) */
|
||||
/* 15 */
|
||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||
REG_INVARG, /* pattern passed as NULL */
|
||||
REG_INVARG, /* unknown compile-time option bit(s) */
|
||||
REG_EPAREN, /* missing ) after (?# comment */
|
||||
REG_ESIZE, /* parentheses nested too deeply */
|
||||
/* 20 */
|
||||
REG_ESIZE, /* regular expression too large */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_EPAREN, /* unmatched closing parenthesis */
|
||||
REG_ASSERT /* internal error: code overflow */
|
||||
};
|
||||
|
||||
static const int eint2[] = {
|
||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
|
||||
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
|
||||
56, REG_INVARG, /* internal error: unknown newline setting */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
||||
static const char *const pstring[] = {
|
||||
"", /* Dummy for value 0 */
|
||||
"internal error", /* REG_ASSERT */
|
||||
"invalid repeat counts in {}", /* BADBR */
|
||||
"pattern error", /* BADPAT */
|
||||
"? * + invalid", /* BADRPT */
|
||||
"unbalanced {}", /* EBRACE */
|
||||
"unbalanced []", /* EBRACK */
|
||||
"collation error - not relevant", /* ECOLLATE */
|
||||
"bad class", /* ECTYPE */
|
||||
"bad escape sequence", /* EESCAPE */
|
||||
"empty expression", /* EMPTY */
|
||||
"unbalanced ()", /* EPAREN */
|
||||
"bad range inside []", /* ERANGE */
|
||||
"expression too big", /* ESIZE */
|
||||
"failed to get memory", /* ESPACE */
|
||||
"bad back reference", /* ESUBREG */
|
||||
"bad argument", /* INVARG */
|
||||
"match failed" /* NOMATCH */
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
int used;
|
||||
const char *message;
|
||||
|
||||
message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
|
||||
if (preg != NULL && (int)preg->re_erroffset != -1)
|
||||
{
|
||||
used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message,
|
||||
(int)preg->re_erroffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
used = snprintf(errbuf, errbuf_size, "%s", message);
|
||||
}
|
||||
|
||||
return used + 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
regfree(regex_t *preg)
|
||||
{
|
||||
pcre2_match_data_free(preg->re_match_data);
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compile a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
preg points to a structure for recording the compiled expression
|
||||
pattern the pattern to compile
|
||||
cflags compilation flags
|
||||
|
||||
Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
PCRE2_SIZE erroffset;
|
||||
int errorcode;
|
||||
int options = 0;
|
||||
int re_nsub = 0;
|
||||
|
||||
if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS;
|
||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
|
||||
if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL;
|
||||
if ((cflags & REG_NOSUB) != 0) options |= PCRE2_NO_AUTO_CAPTURE;
|
||||
if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
|
||||
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
|
||||
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
|
||||
|
||||
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
|
||||
options, &errorcode, &erroffset, NULL);
|
||||
preg->re_erroffset = erroffset;
|
||||
|
||||
if (preg->re_pcre2_code == NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* A negative value is a UTF error; otherwise all error codes are greater
|
||||
than COMPILE_ERROR_BASE, but check, just in case. */
|
||||
|
||||
if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
|
||||
errorcode -= COMPILE_ERROR_BASE;
|
||||
|
||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||
return eint1[errorcode];
|
||||
for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2)
|
||||
if (errorcode == eint2[i]) return eint2[i+1];
|
||||
return REG_BADPAT;
|
||||
}
|
||||
|
||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
||||
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
|
||||
if (preg->re_match_data == NULL)
|
||||
{
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
return REG_ESPACE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/* A suitable match_data block, large enough to hold all possible captures, was
|
||||
obtained when the pattern was compiled, to save having to allocate and free it
|
||||
for each match. If REG_NOSUB was specified at compile time, the
|
||||
PCRE_NO_AUTO_CAPTURE flag will be set. When this is the case, the nmatch and
|
||||
pmatch arguments are ignored, and the only result is yes/no/error. */
|
||||
|
||||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
int rc, so, eo;
|
||||
int options = 0;
|
||||
pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
|
||||
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
|
||||
|
||||
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
||||
|
||||
/* When no string data is being returned, or no vector has been passed in which
|
||||
to put it, ensure that nmatch is zero. */
|
||||
|
||||
if ((((pcre2_real_code *)(preg->re_pcre2_code))->compile_options &
|
||||
PCRE2_NO_AUTO_CAPTURE) != 0 || pmatch == NULL) nmatch = 0;
|
||||
|
||||
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
|
||||
The man page from OS X says "REG_STARTEND affects only the location of the
|
||||
string, not how it is matched". That is why the "so" value is used to bump the
|
||||
start location rather than being passed as a PCRE2 "starting offset". */
|
||||
|
||||
if ((eflags & REG_STARTEND) != 0)
|
||||
{
|
||||
if (pmatch == NULL) return REG_INVARG;
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
else
|
||||
{
|
||||
so = 0;
|
||||
eo = (int)strlen(string);
|
||||
}
|
||||
|
||||
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
||||
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
|
||||
|
||||
/* Successful match */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
if ((size_t)rc > nmatch) rc = (int)nmatch;
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = md->ovector[i*2];
|
||||
pmatch[i].rm_eo = md->ovector[i*2+1];
|
||||
}
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unsuccessful match */
|
||||
|
||||
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
|
||||
return REG_INVARG;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
default: return REG_ASSERT;
|
||||
case PCRE2_ERROR_BADMODE: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADOPTION: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG;
|
||||
case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
case PCRE2_ERROR_NOMEMORY: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NULL: return REG_INVARG;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2posix.c */
|
146
pcre2/src/pcre2posix.h
Normal file
146
pcre2/src/pcre2posix.h
Normal file
|
@ -0,0 +1,146 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options, mostly defined by POSIX, but with some extras. */
|
||||
|
||||
#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */
|
||||
#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */
|
||||
#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */
|
||||
#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */
|
||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */
|
||||
#define REG_NOSUB 0x0020 /* Maps to PCRE2_NO_AUTO_CAPTURE */
|
||||
#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */
|
||||
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
|
||||
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */
|
||||
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
|
||||
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */
|
||||
|
||||
/* This is not used by PCRE2, but by defining it we make it easier
|
||||
to slot PCRE2 into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
enum {
|
||||
REG_ASSERT = 1, /* internal error ? */
|
||||
REG_BADBR, /* invalid repeat counts in {} */
|
||||
REG_BADPAT, /* pattern error */
|
||||
REG_BADRPT, /* ? * + invalid */
|
||||
REG_EBRACE, /* unbalanced {} */
|
||||
REG_EBRACK, /* unbalanced [] */
|
||||
REG_ECOLLATE, /* collation error - not relevant */
|
||||
REG_ECTYPE, /* bad class */
|
||||
REG_EESCAPE, /* bad escape sequence */
|
||||
REG_EMPTY, /* empty expression */
|
||||
REG_EPAREN, /* unbalanced () */
|
||||
REG_ERANGE, /* bad range inside [] */
|
||||
REG_ESIZE, /* expression too big */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_ESUBREG, /* bad back reference */
|
||||
REG_INVARG, /* bad argument */
|
||||
REG_NOMATCH /* match failed */
|
||||
};
|
||||
|
||||
|
||||
/* The structure representing a compiled regular expression. */
|
||||
|
||||
typedef struct {
|
||||
void *re_pcre2_code;
|
||||
void *re_match_data;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
} regex_t;
|
||||
|
||||
/* The structure in which a captured offset is returned. */
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* When an application links to a PCRE2 DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export settings are needed, and are set in pcre2posix.c before including this
|
||||
file. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL)
|
||||
# define PCRE2POSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCRE2POSIX_EXP_DEFN __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2POSIX_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2POSIX_EXP_DECL extern "C"
|
||||
# define PCRE2POSIX_EXP_DEFN extern "C"
|
||||
# else
|
||||
# define PCRE2POSIX_EXP_DECL extern
|
||||
# define PCRE2POSIX_EXP_DEFN extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions */
|
||||
|
||||
PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
|
||||
PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCRE2POSIX_EXP_DECL void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
/* End of pcre2posix.h */
|
7426
pcre2/src/pcre2test.c
Normal file
7426
pcre2/src/pcre2test.c
Normal file
File diff suppressed because it is too large
Load diff
135
pcre2/src/sljit/sljitConfig.h
Normal file
135
pcre2/src/sljit/sljitConfig.h
Normal file
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Stack-less Just-In-Time compiler
|
||||
*
|
||||
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SLJIT_CONFIG_H_
|
||||
#define _SLJIT_CONFIG_H_
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* Custom defines */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* Put your custom defines here. This empty section will never change
|
||||
which helps maintaining patches (with diff / patch utilities). */
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* Architecture */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* Architecture selection. */
|
||||
/* #define SLJIT_CONFIG_X86_32 1 */
|
||||
/* #define SLJIT_CONFIG_X86_64 1 */
|
||||
/* #define SLJIT_CONFIG_ARM_V5 1 */
|
||||
/* #define SLJIT_CONFIG_ARM_V7 1 */
|
||||
/* #define SLJIT_CONFIG_ARM_THUMB2 1 */
|
||||
/* #define SLJIT_CONFIG_ARM_64 1 */
|
||||
/* #define SLJIT_CONFIG_PPC_32 1 */
|
||||
/* #define SLJIT_CONFIG_PPC_64 1 */
|
||||
/* #define SLJIT_CONFIG_MIPS_32 1 */
|
||||
/* #define SLJIT_CONFIG_MIPS_64 1 */
|
||||
/* #define SLJIT_CONFIG_SPARC_32 1 */
|
||||
/* #define SLJIT_CONFIG_TILEGX 1 */
|
||||
|
||||
/* #define SLJIT_CONFIG_AUTO 1 */
|
||||
/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* Utilities */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* Useful for thread-safe compiling of global functions. */
|
||||
#ifndef SLJIT_UTIL_GLOBAL_LOCK
|
||||
/* Enabled by default */
|
||||
#define SLJIT_UTIL_GLOBAL_LOCK 1
|
||||
#endif
|
||||
|
||||
/* Implements a stack like data structure (by using mmap / VirtualAlloc). */
|
||||
#ifndef SLJIT_UTIL_STACK
|
||||
/* Enabled by default */
|
||||
#define SLJIT_UTIL_STACK 1
|
||||
#endif
|
||||
|
||||
/* Single threaded application. Does not require any locks. */
|
||||
#ifndef SLJIT_SINGLE_THREADED
|
||||
/* Disabled by default. */
|
||||
#define SLJIT_SINGLE_THREADED 0
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* Configuration */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should
|
||||
define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */
|
||||
#ifndef SLJIT_STD_MACROS_DEFINED
|
||||
/* Disabled by default. */
|
||||
#define SLJIT_STD_MACROS_DEFINED 0
|
||||
#endif
|
||||
|
||||
/* Executable code allocation:
|
||||
If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
|
||||
define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */
|
||||
#ifndef SLJIT_EXECUTABLE_ALLOCATOR
|
||||
/* Enabled by default. */
|
||||
#define SLJIT_EXECUTABLE_ALLOCATOR 1
|
||||
#endif
|
||||
|
||||
/* Force cdecl calling convention even if a better calling
|
||||
convention (e.g. fastcall) is supported by the C compiler.
|
||||
If this option is enabled, C functions without
|
||||
SLJIT_CALL can also be called from JIT code. */
|
||||
#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
|
||||
/* Disabled by default */
|
||||
#define SLJIT_USE_CDECL_CALLING_CONVENTION 0
|
||||
#endif
|
||||
|
||||
/* Return with error when an invalid argument is passed. */
|
||||
#ifndef SLJIT_ARGUMENT_CHECKS
|
||||
/* Disabled by default */
|
||||
#define SLJIT_ARGUMENT_CHECKS 0
|
||||
#endif
|
||||
|
||||
/* Debug checks (assertions, etc.). */
|
||||
#ifndef SLJIT_DEBUG
|
||||
/* Enabled by default */
|
||||
#define SLJIT_DEBUG 1
|
||||
#endif
|
||||
|
||||
/* Verbose operations. */
|
||||
#ifndef SLJIT_VERBOSE
|
||||
/* Enabled by default */
|
||||
#define SLJIT_VERBOSE 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
SLJIT_IS_FPU_AVAILABLE
|
||||
The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE.
|
||||
zero value - FPU is NOT present.
|
||||
nonzero value - FPU is present.
|
||||
*/
|
||||
|
||||
/* For further configurations, see the beginning of sljitConfigInternal.h */
|
||||
|
||||
#endif
|
713
pcre2/src/sljit/sljitConfigInternal.h
Normal file
713
pcre2/src/sljit/sljitConfigInternal.h
Normal file
|
@ -0,0 +1,713 @@
|
|||
/*
|
||||
* Stack-less Just-In-Time compiler
|
||||
*
|
||||
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SLJIT_CONFIG_INTERNAL_H_
|
||||
#define _SLJIT_CONFIG_INTERNAL_H_
|
||||
|
||||
/*
|
||||
SLJIT defines the following architecture dependent types and macros:
|
||||
|
||||
Types:
|
||||
sljit_sb, sljit_ub : signed and unsigned 8 bit byte
|
||||
sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type
|
||||
sljit_si, sljit_ui : signed and unsigned 32 bit integer type
|
||||
sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer
|
||||
sljit_p : unsgined pointer value (usually the same as sljit_uw, but
|
||||
some 64 bit ABIs may use 32 bit pointers)
|
||||
sljit_s : single precision floating point value
|
||||
sljit_d : double precision floating point value
|
||||
|
||||
Macros for feature detection (boolean):
|
||||
SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
|
||||
SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
|
||||
SLJIT_LITTLE_ENDIAN : little endian architecture
|
||||
SLJIT_BIG_ENDIAN : big endian architecture
|
||||
SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
|
||||
SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
|
||||
|
||||
Constants:
|
||||
SLJIT_NUMBER_OF_REGISTERS : number of available registers
|
||||
SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers
|
||||
SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers
|
||||
SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
|
||||
SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
|
||||
SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
|
||||
SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
|
||||
SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing
|
||||
a double precision floating point array by index
|
||||
SLJIT_SINGLE_SHIFT : the shift required to apply when accessing
|
||||
a single precision floating point array by index
|
||||
SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
|
||||
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
|
||||
|
||||
Other macros:
|
||||
SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
|
||||
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
|
||||
*/
|
||||
|
||||
/*****************/
|
||||
/* Sanity check. */
|
||||
/*****************/
|
||||
|
||||
#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|
||||
|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
|
||||
|| (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
|
||||
|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|
||||
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|
||||
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|
||||
|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|
||||
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|
||||
|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|
||||
|| (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
|
||||
|| (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
|
||||
|| (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
|
||||
|| (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
|
||||
|| (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
|
||||
#error "An architecture must be selected"
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|
||||
+ (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
|
||||
+ (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
|
||||
+ (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|
||||
+ (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|
||||
+ (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|
||||
+ (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|
||||
+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|
||||
+ (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
|
||||
+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|
||||
+ (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
|
||||
+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
|
||||
+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
|
||||
+ (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
|
||||
#error "Multiple architectures are selected"
|
||||
#endif
|
||||
|
||||
/********************************************************/
|
||||
/* Automatic CPU detection (requires compiler support). */
|
||||
/********************************************************/
|
||||
|
||||
#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
#if defined(__i386__) || defined(__i386)
|
||||
#define SLJIT_CONFIG_X86_32 1
|
||||
#elif defined(__x86_64__)
|
||||
#define SLJIT_CONFIG_X86_64 1
|
||||
#elif defined(__arm__) || defined(__ARM__)
|
||||
#ifdef __thumb2__
|
||||
#define SLJIT_CONFIG_ARM_THUMB2 1
|
||||
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__)
|
||||
#define SLJIT_CONFIG_ARM_V7 1
|
||||
#else
|
||||
#define SLJIT_CONFIG_ARM_V5 1
|
||||
#endif
|
||||
#elif defined (__aarch64__)
|
||||
#define SLJIT_CONFIG_ARM_64 1
|
||||
#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__))
|
||||
#define SLJIT_CONFIG_PPC_64 1
|
||||
#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
|
||||
#define SLJIT_CONFIG_PPC_32 1
|
||||
#elif defined(__mips__) && !defined(_LP64)
|
||||
#define SLJIT_CONFIG_MIPS_32 1
|
||||
#elif defined(__mips64)
|
||||
#define SLJIT_CONFIG_MIPS_64 1
|
||||
#elif defined(__sparc__) || defined(__sparc)
|
||||
#define SLJIT_CONFIG_SPARC_32 1
|
||||
#elif defined(__tilegx__)
|
||||
#define SLJIT_CONFIG_TILEGX 1
|
||||
#else
|
||||
/* Unsupported architecture */
|
||||
#define SLJIT_CONFIG_UNSUPPORTED 1
|
||||
#endif
|
||||
|
||||
#else /* !_WIN32 */
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define SLJIT_CONFIG_X86_64 1
|
||||
#elif defined(_ARM_)
|
||||
#define SLJIT_CONFIG_ARM_V5 1
|
||||
#else
|
||||
#define SLJIT_CONFIG_X86_32 1
|
||||
#endif
|
||||
|
||||
#endif /* !WIN32 */
|
||||
#endif /* SLJIT_CONFIG_AUTO */
|
||||
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
#undef SLJIT_EXECUTABLE_ALLOCATOR
|
||||
#endif
|
||||
|
||||
/******************************/
|
||||
/* CPU family type detection. */
|
||||
/******************************/
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|
||||
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
|
||||
#define SLJIT_CONFIG_ARM_32 1
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
#define SLJIT_CONFIG_X86 1
|
||||
#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
|
||||
#define SLJIT_CONFIG_ARM 1
|
||||
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
|
||||
#define SLJIT_CONFIG_PPC 1
|
||||
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
|
||||
#define SLJIT_CONFIG_MIPS 1
|
||||
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
|
||||
#define SLJIT_CONFIG_SPARC 1
|
||||
#endif
|
||||
|
||||
/**********************************/
|
||||
/* External function definitions. */
|
||||
/**********************************/
|
||||
|
||||
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
|
||||
|
||||
/* These libraries are needed for the macros below. */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#endif /* SLJIT_STD_MACROS_DEFINED */
|
||||
|
||||
/* General macros:
|
||||
Note: SLJIT is designed to be independent from them as possible.
|
||||
|
||||
In release mode (SLJIT_DEBUG is not defined) only the following
|
||||
external functions are needed:
|
||||
*/
|
||||
|
||||
#ifndef SLJIT_MALLOC
|
||||
#define SLJIT_MALLOC(size, allocator_data) malloc(size)
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_FREE
|
||||
#define SLJIT_FREE(ptr, allocator_data) free(ptr)
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_MEMMOVE
|
||||
#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_ZEROMEM
|
||||
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
|
||||
#endif
|
||||
|
||||
/***************************/
|
||||
/* Compiler helper macros. */
|
||||
/***************************/
|
||||
|
||||
#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY)
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
#define SLJIT_LIKELY(x) __builtin_expect((x), 1)
|
||||
#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0)
|
||||
#else
|
||||
#define SLJIT_LIKELY(x) (x)
|
||||
#define SLJIT_UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */
|
||||
|
||||
#ifndef SLJIT_INLINE
|
||||
/* Inline functions. Some old compilers do not support them. */
|
||||
#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
|
||||
#define SLJIT_INLINE
|
||||
#else
|
||||
#define SLJIT_INLINE __inline
|
||||
#endif
|
||||
#endif /* !SLJIT_INLINE */
|
||||
|
||||
#ifndef SLJIT_NOINLINE
|
||||
/* Not inline functions. */
|
||||
#if defined(__GNUC__)
|
||||
#define SLJIT_NOINLINE __attribute__ ((noinline))
|
||||
#else
|
||||
#define SLJIT_NOINLINE
|
||||
#endif
|
||||
#endif /* !SLJIT_INLINE */
|
||||
|
||||
#ifndef SLJIT_CONST
|
||||
/* Const variables. */
|
||||
#define SLJIT_CONST const
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_UNUSED_ARG
|
||||
/* Unused arguments. */
|
||||
#define SLJIT_UNUSED_ARG(arg) (void)arg
|
||||
#endif
|
||||
|
||||
/*********************************/
|
||||
/* Type of public API functions. */
|
||||
/*********************************/
|
||||
|
||||
#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC)
|
||||
/* Static ABI functions. For all-in-one programs. */
|
||||
|
||||
#if defined(__GNUC__)
|
||||
/* Disable unused warnings in gcc. */
|
||||
#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused))
|
||||
#else
|
||||
#define SLJIT_API_FUNC_ATTRIBUTE static
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define SLJIT_API_FUNC_ATTRIBUTE
|
||||
#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */
|
||||
|
||||
/****************************/
|
||||
/* Instruction cache flush. */
|
||||
/****************************/
|
||||
|
||||
#ifndef SLJIT_CACHE_FLUSH
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
|
||||
/* Not required to implement on archs with unified caches. */
|
||||
#define SLJIT_CACHE_FLUSH(from, to)
|
||||
|
||||
#elif defined __APPLE__
|
||||
|
||||
/* Supported by all macs since Mac OS 10.5.
|
||||
However, it does not work on non-jailbroken iOS devices,
|
||||
although the compilation is successful. */
|
||||
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
|
||||
|
||||
#elif defined __ANDROID__
|
||||
|
||||
/* Android lacks __clear_cache; instead, cacheflush should be used. */
|
||||
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
cacheflush((long)(from), (long)(to), 0)
|
||||
|
||||
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
|
||||
|
||||
/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
ppc_cache_flush((from), (to))
|
||||
|
||||
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
|
||||
/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
sparc_cache_flush((from), (to))
|
||||
|
||||
#else
|
||||
|
||||
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
__clear_cache((char*)(from), (char*)(to))
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* !SLJIT_CACHE_FLUSH */
|
||||
|
||||
/******************************************************/
|
||||
/* Byte/half/int/word/single/double type definitions. */
|
||||
/******************************************************/
|
||||
|
||||
/* 8 bit byte type. */
|
||||
typedef unsigned char sljit_ub;
|
||||
typedef signed char sljit_sb;
|
||||
|
||||
/* 16 bit half-word type. */
|
||||
typedef unsigned short int sljit_uh;
|
||||
typedef signed short int sljit_sh;
|
||||
|
||||
/* 32 bit integer type. */
|
||||
typedef unsigned int sljit_ui;
|
||||
typedef signed int sljit_si;
|
||||
|
||||
/* Machine word type. Enough for storing a pointer.
|
||||
32 bit for 32 bit machines.
|
||||
64 bit for 64 bit machines. */
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
/* Just to have something. */
|
||||
#define SLJIT_WORD_SHIFT 0
|
||||
typedef unsigned long int sljit_uw;
|
||||
typedef long int sljit_sw;
|
||||
#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
|
||||
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|
||||
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|
||||
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
|
||||
&& !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
|
||||
#define SLJIT_32BIT_ARCHITECTURE 1
|
||||
#define SLJIT_WORD_SHIFT 2
|
||||
typedef unsigned int sljit_uw;
|
||||
typedef int sljit_sw;
|
||||
#else
|
||||
#define SLJIT_64BIT_ARCHITECTURE 1
|
||||
#define SLJIT_WORD_SHIFT 3
|
||||
#ifdef _WIN32
|
||||
typedef unsigned __int64 sljit_uw;
|
||||
typedef __int64 sljit_sw;
|
||||
#else
|
||||
typedef unsigned long int sljit_uw;
|
||||
typedef long int sljit_sw;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef sljit_uw sljit_p;
|
||||
|
||||
/* Floating point types. */
|
||||
typedef float sljit_s;
|
||||
typedef double sljit_d;
|
||||
|
||||
/* Shift for pointer sized data. */
|
||||
#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT
|
||||
|
||||
/* Shift for double precision sized data. */
|
||||
#define SLJIT_DOUBLE_SHIFT 3
|
||||
#define SLJIT_SINGLE_SHIFT 2
|
||||
|
||||
#ifndef SLJIT_W
|
||||
|
||||
/* Defining long constants. */
|
||||
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
|
||||
#define SLJIT_W(w) (w##ll)
|
||||
#else
|
||||
#define SLJIT_W(w) (w)
|
||||
#endif
|
||||
|
||||
#endif /* !SLJIT_W */
|
||||
|
||||
/*************************/
|
||||
/* Endianness detection. */
|
||||
/*************************/
|
||||
|
||||
#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
|
||||
|
||||
/* These macros are mostly useful for the applications. */
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|
||||
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
#define SLJIT_LITTLE_ENDIAN 1
|
||||
#else
|
||||
#define SLJIT_BIG_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|
||||
|| (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
|
||||
|
||||
#ifdef __MIPSEL__
|
||||
#define SLJIT_LITTLE_ENDIAN 1
|
||||
#else
|
||||
#define SLJIT_BIG_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
|
||||
#define SLJIT_BIG_ENDIAN 1
|
||||
|
||||
#else
|
||||
#define SLJIT_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */
|
||||
|
||||
/* Sanity check. */
|
||||
#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
|
||||
#error "Exactly one endianness must be selected"
|
||||
#endif
|
||||
|
||||
#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
|
||||
#error "Exactly one endianness must be selected"
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_UNALIGNED
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|
||||
|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
|
||||
|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|
||||
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|
||||
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|
||||
|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|
||||
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
|
||||
#define SLJIT_UNALIGNED 1
|
||||
#endif
|
||||
|
||||
#endif /* !SLJIT_UNALIGNED */
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
/* Auto detect SSE2 support using CPUID.
|
||||
On 64 bit x86 cpus, sse2 must be present. */
|
||||
#define SLJIT_DETECT_SSE2 1
|
||||
#endif
|
||||
|
||||
/*****************************************************************************************/
|
||||
/* Calling convention of functions generated by SLJIT or called from the generated code. */
|
||||
/*****************************************************************************************/
|
||||
|
||||
#ifndef SLJIT_CALL
|
||||
|
||||
#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION)
|
||||
|
||||
/* Force cdecl. */
|
||||
#define SLJIT_CALL
|
||||
|
||||
#elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
|
||||
#if defined(__GNUC__) && !defined(__APPLE__)
|
||||
|
||||
#define SLJIT_CALL __attribute__ ((fastcall))
|
||||
#define SLJIT_X86_32_FASTCALL 1
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
#define SLJIT_CALL __fastcall
|
||||
#define SLJIT_X86_32_FASTCALL 1
|
||||
|
||||
#elif defined(__BORLANDC__)
|
||||
|
||||
#define SLJIT_CALL __msfastcall
|
||||
#define SLJIT_X86_32_FASTCALL 1
|
||||
|
||||
#else /* Unknown compiler. */
|
||||
|
||||
/* The cdecl attribute is the default. */
|
||||
#define SLJIT_CALL
|
||||
|
||||
#endif
|
||||
|
||||
#else /* Non x86-32 architectures. */
|
||||
|
||||
#define SLJIT_CALL
|
||||
|
||||
#endif /* SLJIT_CONFIG_X86_32 */
|
||||
|
||||
#endif /* !SLJIT_CALL */
|
||||
|
||||
#ifndef SLJIT_INDIRECT_CALL
|
||||
#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
|
||||
|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
|
||||
/* It seems certain ppc compilers use an indirect addressing for functions
|
||||
which makes things complicated. */
|
||||
#define SLJIT_INDIRECT_CALL 1
|
||||
#endif
|
||||
#endif /* SLJIT_INDIRECT_CALL */
|
||||
|
||||
/* The offset which needs to be substracted from the return address to
|
||||
determine the next executed instruction after return. */
|
||||
#ifndef SLJIT_RETURN_ADDRESS_OFFSET
|
||||
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
#define SLJIT_RETURN_ADDRESS_OFFSET 8
|
||||
#else
|
||||
#define SLJIT_RETURN_ADDRESS_OFFSET 0
|
||||
#endif
|
||||
#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
|
||||
|
||||
/***************************************************/
|
||||
/* Functions of the built-in executable allocator. */
|
||||
/***************************************************/
|
||||
|
||||
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
|
||||
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
|
||||
#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
|
||||
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
|
||||
#endif
|
||||
|
||||
/**********************************************/
|
||||
/* Registers and locals offset determination. */
|
||||
/**********************************************/
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 10
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
|
||||
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
|
||||
#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw))
|
||||
#else
|
||||
/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */
|
||||
#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw))
|
||||
#endif /* SLJIT_X86_32_FASTCALL */
|
||||
|
||||
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
|
||||
#ifndef _WIN64
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 12
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
|
||||
#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw))
|
||||
#else
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 12
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
|
||||
#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw))
|
||||
#endif /* _WIN64 */
|
||||
|
||||
#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 11
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
|
||||
#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 11
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
|
||||
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 25
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
|
||||
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
|
||||
|
||||
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 22
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
|
||||
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
|
||||
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw))
|
||||
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
/* Add +1 for double alignment. */
|
||||
#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw))
|
||||
#else
|
||||
#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw))
|
||||
#endif /* SLJIT_CONFIG_PPC_64 || _AIX */
|
||||
|
||||
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 17
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
|
||||
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
|
||||
#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
|
||||
#else
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
#endif
|
||||
|
||||
#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 18
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
|
||||
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
/* Add +1 for double alignment. */
|
||||
#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw))
|
||||
#endif
|
||||
|
||||
#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 10
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 5
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
|
||||
#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 0
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
|
||||
#endif
|
||||
|
||||
#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE)
|
||||
|
||||
#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
|
||||
(SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS)
|
||||
|
||||
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64)
|
||||
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1
|
||||
#else
|
||||
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
|
||||
#endif
|
||||
|
||||
#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
|
||||
(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
|
||||
|
||||
/*************************************/
|
||||
/* Debug and verbose related macros. */
|
||||
/*************************************/
|
||||
|
||||
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
|
||||
|
||||
#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP)
|
||||
|
||||
/* SLJIT_HALT_PROCESS must halt the process. */
|
||||
#ifndef SLJIT_HALT_PROCESS
|
||||
#include <stdlib.h>
|
||||
|
||||
#define SLJIT_HALT_PROCESS() \
|
||||
abort();
|
||||
#endif /* !SLJIT_HALT_PROCESS */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */
|
||||
|
||||
/* Feel free to redefine these two macros. */
|
||||
#ifndef SLJIT_ASSERT
|
||||
|
||||
#define SLJIT_ASSERT(x) \
|
||||
do { \
|
||||
if (SLJIT_UNLIKELY(!(x))) { \
|
||||
printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \
|
||||
SLJIT_HALT_PROCESS(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif /* !SLJIT_ASSERT */
|
||||
|
||||
#ifndef SLJIT_ASSERT_STOP
|
||||
|
||||
#define SLJIT_ASSERT_STOP() \
|
||||
do { \
|
||||
printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \
|
||||
SLJIT_HALT_PROCESS(); \
|
||||
} while (0)
|
||||
|
||||
#endif /* !SLJIT_ASSERT_STOP */
|
||||
|
||||
#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
|
||||
|
||||
/* Forcing empty, but valid statements. */
|
||||
#undef SLJIT_ASSERT
|
||||
#undef SLJIT_ASSERT_STOP
|
||||
|
||||
#define SLJIT_ASSERT(x) \
|
||||
do { } while (0)
|
||||
#define SLJIT_ASSERT_STOP() \
|
||||
do { } while (0)
|
||||
|
||||
#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
|
||||
|
||||
#ifndef SLJIT_COMPILE_ASSERT
|
||||
|
||||
/* Should be improved eventually. */
|
||||
#define SLJIT_COMPILE_ASSERT(x, description) \
|
||||
SLJIT_ASSERT(x)
|
||||
|
||||
#endif /* !SLJIT_COMPILE_ASSERT */
|
||||
|
||||
#endif
|
312
pcre2/src/sljit/sljitExecAllocator.c
Normal file
312
pcre2/src/sljit/sljitExecAllocator.c
Normal file
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Stack-less Just-In-Time compiler
|
||||
*
|
||||
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This file contains a simple executable memory allocator
|
||||
|
||||
It is assumed, that executable code blocks are usually medium (or sometimes
|
||||
large) memory blocks, and the allocator is not too frequently called (less
|
||||
optimized than other allocators). Thus, using it as a generic allocator is
|
||||
not suggested.
|
||||
|
||||
How does it work:
|
||||
Memory is allocated in continuous memory areas called chunks by alloc_chunk()
|
||||
Chunk format:
|
||||
[ block ][ block ] ... [ block ][ block terminator ]
|
||||
|
||||
All blocks and the block terminator is started with block_header. The block
|
||||
header contains the size of the previous and the next block. These sizes
|
||||
can also contain special values.
|
||||
Block size:
|
||||
0 - The block is a free_block, with a different size member.
|
||||
1 - The block is a block terminator.
|
||||
n - The block is used at the moment, and the value contains its size.
|
||||
Previous block size:
|
||||
0 - This is the first block of the memory chunk.
|
||||
n - The size of the previous block.
|
||||
|
||||
Using these size values we can go forward or backward on the block chain.
|
||||
The unused blocks are stored in a chain list pointed by free_blocks. This
|
||||
list is useful if we need to find a suitable memory area when the allocator
|
||||
is called.
|
||||
|
||||
When a block is freed, the new free block is connected to its adjacent free
|
||||
blocks if possible.
|
||||
|
||||
[ free block ][ used block ][ free block ]
|
||||
and "used block" is freed, the three blocks are connected together:
|
||||
[ one big free block ]
|
||||
*/
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* System (OS) functions */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* 64 KByte. */
|
||||
#define CHUNK_SIZE 0x10000
|
||||
|
||||
/*
|
||||
alloc_chunk / free_chunk :
|
||||
* allocate executable system memory chunks
|
||||
* the size is always divisible by CHUNK_SIZE
|
||||
allocator_grab_lock / allocator_release_lock :
|
||||
* make the allocator thread safe
|
||||
* can be empty if the OS (or the application) does not support threading
|
||||
* only the allocator requires this lock, sljit is fully thread safe
|
||||
as it only uses local variables
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
|
||||
{
|
||||
return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
|
||||
{
|
||||
SLJIT_UNUSED_ARG(size);
|
||||
VirtualFree(chunk, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
|
||||
{
|
||||
void* retval;
|
||||
|
||||
#ifdef MAP_ANON
|
||||
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
#else
|
||||
if (dev_zero < 0) {
|
||||
if (open_dev_zero())
|
||||
return NULL;
|
||||
}
|
||||
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
|
||||
#endif
|
||||
|
||||
return (retval != MAP_FAILED) ? retval : NULL;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
|
||||
{
|
||||
munmap(chunk, size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* Common functions */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#define CHUNK_MASK (~(CHUNK_SIZE - 1))
|
||||
|
||||
struct block_header {
|
||||
sljit_uw size;
|
||||
sljit_uw prev_size;
|
||||
};
|
||||
|
||||
struct free_block {
|
||||
struct block_header header;
|
||||
struct free_block *next;
|
||||
struct free_block *prev;
|
||||
sljit_uw size;
|
||||
};
|
||||
|
||||
#define AS_BLOCK_HEADER(base, offset) \
|
||||
((struct block_header*)(((sljit_ub*)base) + offset))
|
||||
#define AS_FREE_BLOCK(base, offset) \
|
||||
((struct free_block*)(((sljit_ub*)base) + offset))
|
||||
#define MEM_START(base) ((void*)(((sljit_ub*)base) + sizeof(struct block_header)))
|
||||
#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7)
|
||||
|
||||
static struct free_block* free_blocks;
|
||||
static sljit_uw allocated_size;
|
||||
static sljit_uw total_size;
|
||||
|
||||
static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
|
||||
{
|
||||
free_block->header.size = 0;
|
||||
free_block->size = size;
|
||||
|
||||
free_block->next = free_blocks;
|
||||
free_block->prev = 0;
|
||||
if (free_blocks)
|
||||
free_blocks->prev = free_block;
|
||||
free_blocks = free_block;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
|
||||
{
|
||||
if (free_block->next)
|
||||
free_block->next->prev = free_block->prev;
|
||||
|
||||
if (free_block->prev)
|
||||
free_block->prev->next = free_block->next;
|
||||
else {
|
||||
SLJIT_ASSERT(free_blocks == free_block);
|
||||
free_blocks = free_block->next;
|
||||
}
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
|
||||
{
|
||||
struct block_header *header;
|
||||
struct block_header *next_header;
|
||||
struct free_block *free_block;
|
||||
sljit_uw chunk_size;
|
||||
|
||||
allocator_grab_lock();
|
||||
if (size < sizeof(struct free_block))
|
||||
size = sizeof(struct free_block);
|
||||
size = ALIGN_SIZE(size);
|
||||
|
||||
free_block = free_blocks;
|
||||
while (free_block) {
|
||||
if (free_block->size >= size) {
|
||||
chunk_size = free_block->size;
|
||||
if (chunk_size > size + 64) {
|
||||
/* We just cut a block from the end of the free block. */
|
||||
chunk_size -= size;
|
||||
free_block->size = chunk_size;
|
||||
header = AS_BLOCK_HEADER(free_block, chunk_size);
|
||||
header->prev_size = chunk_size;
|
||||
AS_BLOCK_HEADER(header, size)->prev_size = size;
|
||||
}
|
||||
else {
|
||||
sljit_remove_free_block(free_block);
|
||||
header = (struct block_header*)free_block;
|
||||
size = chunk_size;
|
||||
}
|
||||
allocated_size += size;
|
||||
header->size = size;
|
||||
allocator_release_lock();
|
||||
return MEM_START(header);
|
||||
}
|
||||
free_block = free_block->next;
|
||||
}
|
||||
|
||||
chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK;
|
||||
header = (struct block_header*)alloc_chunk(chunk_size);
|
||||
if (!header) {
|
||||
allocator_release_lock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
chunk_size -= sizeof(struct block_header);
|
||||
total_size += chunk_size;
|
||||
|
||||
header->prev_size = 0;
|
||||
if (chunk_size > size + 64) {
|
||||
/* Cut the allocated space into a free and a used block. */
|
||||
allocated_size += size;
|
||||
header->size = size;
|
||||
chunk_size -= size;
|
||||
|
||||
free_block = AS_FREE_BLOCK(header, size);
|
||||
free_block->header.prev_size = size;
|
||||
sljit_insert_free_block(free_block, chunk_size);
|
||||
next_header = AS_BLOCK_HEADER(free_block, chunk_size);
|
||||
}
|
||||
else {
|
||||
/* All space belongs to this allocation. */
|
||||
allocated_size += chunk_size;
|
||||
header->size = chunk_size;
|
||||
next_header = AS_BLOCK_HEADER(header, chunk_size);
|
||||
}
|
||||
next_header->size = 1;
|
||||
next_header->prev_size = chunk_size;
|
||||
allocator_release_lock();
|
||||
return MEM_START(header);
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
|
||||
{
|
||||
struct block_header *header;
|
||||
struct free_block* free_block;
|
||||
|
||||
allocator_grab_lock();
|
||||
header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
|
||||
allocated_size -= header->size;
|
||||
|
||||
/* Connecting free blocks together if possible. */
|
||||
|
||||
/* If header->prev_size == 0, free_block will equal to header.
|
||||
In this case, free_block->header.size will be > 0. */
|
||||
free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
|
||||
if (SLJIT_UNLIKELY(!free_block->header.size)) {
|
||||
free_block->size += header->size;
|
||||
header = AS_BLOCK_HEADER(free_block, free_block->size);
|
||||
header->prev_size = free_block->size;
|
||||
}
|
||||
else {
|
||||
free_block = (struct free_block*)header;
|
||||
sljit_insert_free_block(free_block, header->size);
|
||||
}
|
||||
|
||||
header = AS_BLOCK_HEADER(free_block, free_block->size);
|
||||
if (SLJIT_UNLIKELY(!header->size)) {
|
||||
free_block->size += ((struct free_block*)header)->size;
|
||||
sljit_remove_free_block((struct free_block*)header);
|
||||
header = AS_BLOCK_HEADER(free_block, free_block->size);
|
||||
header->prev_size = free_block->size;
|
||||
}
|
||||
|
||||
/* The whole chunk is free. */
|
||||
if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
|
||||
/* If this block is freed, we still have (allocated_size / 2) free space. */
|
||||
if (total_size - free_block->size > (allocated_size * 3 / 2)) {
|
||||
total_size -= free_block->size;
|
||||
sljit_remove_free_block(free_block);
|
||||
free_chunk(free_block, free_block->size + sizeof(struct block_header));
|
||||
}
|
||||
}
|
||||
|
||||
allocator_release_lock();
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
|
||||
{
|
||||
struct free_block* free_block;
|
||||
struct free_block* next_free_block;
|
||||
|
||||
allocator_grab_lock();
|
||||
|
||||
free_block = free_blocks;
|
||||
while (free_block) {
|
||||
next_free_block = free_block->next;
|
||||
if (!free_block->header.prev_size &&
|
||||
AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
|
||||
total_size -= free_block->size;
|
||||
sljit_remove_free_block(free_block);
|
||||
free_chunk(free_block, free_block->size + sizeof(struct block_header));
|
||||
}
|
||||
free_block = next_free_block;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
|
||||
allocator_release_lock();
|
||||
}
|
2029
pcre2/src/sljit/sljitLir.c
Normal file
2029
pcre2/src/sljit/sljitLir.c
Normal file
File diff suppressed because it is too large
Load diff
1249
pcre2/src/sljit/sljitLir.h
Normal file
1249
pcre2/src/sljit/sljitLir.h
Normal file
File diff suppressed because it is too large
Load diff
2566
pcre2/src/sljit/sljitNativeARM_32.c
Normal file
2566
pcre2/src/sljit/sljitNativeARM_32.c
Normal file
File diff suppressed because it is too large
Load diff
2050
pcre2/src/sljit/sljitNativeARM_64.c
Normal file
2050
pcre2/src/sljit/sljitNativeARM_64.c
Normal file
File diff suppressed because it is too large
Load diff
2090
pcre2/src/sljit/sljitNativeARM_T2_32.c
Normal file
2090
pcre2/src/sljit/sljitNativeARM_T2_32.c
Normal file
File diff suppressed because it is too large
Load diff
366
pcre2/src/sljit/sljitNativeMIPS_32.c
Normal file
366
pcre2/src/sljit/sljitNativeMIPS_32.c
Normal file
|
@ -0,0 +1,366 @@
|
|||
/*
|
||||
* Stack-less Just-In-Time compiler
|
||||
*
|
||||
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* mips 32-bit arch dependent functions. */
|
||||
|
||||
static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
|
||||
{
|
||||
if (!(imm & ~0xffff))
|
||||
return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
|
||||
|
||||
if (imm < 0 && imm >= SIMM_MIN)
|
||||
return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
|
||||
|
||||
FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
|
||||
return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
#define EMIT_LOGICAL(op_imm, op_norm) \
|
||||
if (flags & SRC2_IMM) { \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
|
||||
} \
|
||||
else { \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
|
||||
}
|
||||
|
||||
#define EMIT_SHIFT(op_imm, op_v) \
|
||||
if (flags & SRC2_IMM) { \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
|
||||
} \
|
||||
else { \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
|
||||
sljit_si dst, sljit_si src1, sljit_sw src2)
|
||||
{
|
||||
switch (GET_OPCODE(op)) {
|
||||
case SLJIT_MOV:
|
||||
case SLJIT_MOV_UI:
|
||||
case SLJIT_MOV_SI:
|
||||
case SLJIT_MOV_P:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if (dst != src2)
|
||||
return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MOV_UB:
|
||||
case SLJIT_MOV_SB:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
|
||||
if (op == SLJIT_MOV_SB) {
|
||||
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
|
||||
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
|
||||
#else
|
||||
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
|
||||
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
|
||||
#endif
|
||||
}
|
||||
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
|
||||
}
|
||||
else if (dst != src2)
|
||||
SLJIT_ASSERT_STOP();
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MOV_UH:
|
||||
case SLJIT_MOV_SH:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
|
||||
if (op == SLJIT_MOV_SH) {
|
||||
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
|
||||
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
|
||||
#else
|
||||
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
|
||||
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
|
||||
#endif
|
||||
}
|
||||
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
|
||||
}
|
||||
else if (dst != src2)
|
||||
SLJIT_ASSERT_STOP();
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_NOT:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_CLZ:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
|
||||
#else
|
||||
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
|
||||
FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
|
||||
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
|
||||
}
|
||||
/* Nearly all instructions are unmovable in the following sequence. */
|
||||
FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
|
||||
/* Check zero. */
|
||||
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
|
||||
/* Loop for searching the highest bit. */
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
|
||||
if (op & SLJIT_SET_E)
|
||||
return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
|
||||
#endif
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_ADD:
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_O) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
else
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
}
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
}
|
||||
}
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
|
||||
}
|
||||
else {
|
||||
if (op & SLJIT_SET_O)
|
||||
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
|
||||
/* a + b >= a | b (otherwise, the carry should be set to 1). */
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
if (!(op & SLJIT_SET_O))
|
||||
return SLJIT_SUCCESS;
|
||||
FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
|
||||
|
||||
case SLJIT_ADDC:
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_C) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
}
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
|
||||
} else {
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
|
||||
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
|
||||
if (!(op & SLJIT_SET_C))
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
/* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
/* Set carry flag. */
|
||||
return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
|
||||
|
||||
case SLJIT_SUB:
|
||||
if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
|
||||
src2 = TMP_REG2;
|
||||
flags &= ~SRC2_IMM;
|
||||
}
|
||||
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_O) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
else
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
}
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
|
||||
}
|
||||
else {
|
||||
if (op & SLJIT_SET_O)
|
||||
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
if (op & SLJIT_SET_U)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
|
||||
if (op & SLJIT_SET_S) {
|
||||
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
|
||||
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
|
||||
}
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
|
||||
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
|
||||
if (!(op & SLJIT_SET_O))
|
||||
return SLJIT_SUCCESS;
|
||||
FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
|
||||
|
||||
case SLJIT_SUBC:
|
||||
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
|
||||
src2 = TMP_REG2;
|
||||
flags &= ~SRC2_IMM;
|
||||
}
|
||||
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
|
||||
}
|
||||
else {
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
|
||||
|
||||
FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
|
||||
return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MUL:
|
||||
SLJIT_ASSERT(!(flags & SRC2_IMM));
|
||||
if (!(op & SLJIT_SET_O)) {
|
||||
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
|
||||
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
|
||||
#else
|
||||
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
|
||||
return push_inst(compiler, MFLO | D(dst), DR(dst));
|
||||
#endif
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
|
||||
return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
|
||||
|
||||
case SLJIT_AND:
|
||||
EMIT_LOGICAL(ANDI, AND);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_OR:
|
||||
EMIT_LOGICAL(ORI, OR);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_XOR:
|
||||
EMIT_LOGICAL(XORI, XOR);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_SHL:
|
||||
EMIT_SHIFT(SLL, SLLV);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_LSHR:
|
||||
EMIT_SHIFT(SRL, SRLV);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_ASHR:
|
||||
EMIT_SHIFT(SRA, SRAV);
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT_STOP();
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
|
||||
{
|
||||
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
|
||||
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins*)addr;
|
||||
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
|
||||
SLJIT_CACHE_FLUSH(inst, inst + 2);
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins*)addr;
|
||||
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
|
||||
SLJIT_CACHE_FLUSH(inst, inst + 2);
|
||||
}
|
469
pcre2/src/sljit/sljitNativeMIPS_64.c
Normal file
469
pcre2/src/sljit/sljitNativeMIPS_64.c
Normal file
|
@ -0,0 +1,469 @@
|
|||
/*
|
||||
* Stack-less Just-In-Time compiler
|
||||
*
|
||||
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* mips 64-bit arch dependent functions. */
|
||||
|
||||
static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
|
||||
{
|
||||
sljit_si shift = 32;
|
||||
sljit_si shift2;
|
||||
sljit_si inv = 0;
|
||||
sljit_ins ins;
|
||||
sljit_uw uimm;
|
||||
|
||||
if (!(imm & ~0xffff))
|
||||
return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
|
||||
|
||||
if (imm < 0 && imm >= SIMM_MIN)
|
||||
return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
|
||||
|
||||
if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
|
||||
FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
|
||||
return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
/* Zero extended number. */
|
||||
uimm = imm;
|
||||
if (imm < 0) {
|
||||
uimm = ~imm;
|
||||
inv = 1;
|
||||
}
|
||||
|
||||
while (!(uimm & 0xff00000000000000l)) {
|
||||
shift -= 8;
|
||||
uimm <<= 8;
|
||||
}
|
||||
|
||||
if (!(uimm & 0xf000000000000000l)) {
|
||||
shift -= 4;
|
||||
uimm <<= 4;
|
||||
}
|
||||
|
||||
if (!(uimm & 0xc000000000000000l)) {
|
||||
shift -= 2;
|
||||
uimm <<= 2;
|
||||
}
|
||||
|
||||
if ((sljit_sw)uimm < 0) {
|
||||
uimm >>= 1;
|
||||
shift += 1;
|
||||
}
|
||||
SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32));
|
||||
|
||||
if (inv)
|
||||
uimm = ~uimm;
|
||||
|
||||
FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
|
||||
if (uimm & 0x0000ffff00000000l)
|
||||
FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar));
|
||||
|
||||
imm &= (1l << shift) - 1;
|
||||
if (!(imm & ~0xffff)) {
|
||||
ins = (shift == 32) ? DSLL32 : DSLL;
|
||||
if (shift < 32)
|
||||
ins |= SH_IMM(shift);
|
||||
FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar));
|
||||
return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
|
||||
}
|
||||
|
||||
/* Double shifts needs to be performed. */
|
||||
uimm <<= 32;
|
||||
shift2 = shift - 16;
|
||||
|
||||
while (!(uimm & 0xf000000000000000l)) {
|
||||
shift2 -= 4;
|
||||
uimm <<= 4;
|
||||
}
|
||||
|
||||
if (!(uimm & 0xc000000000000000l)) {
|
||||
shift2 -= 2;
|
||||
uimm <<= 2;
|
||||
}
|
||||
|
||||
if (!(uimm & 0x8000000000000000l)) {
|
||||
shift2--;
|
||||
uimm <<= 1;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16));
|
||||
|
||||
FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar));
|
||||
FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
|
||||
FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar));
|
||||
|
||||
imm &= (1l << shift2) - 1;
|
||||
return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
|
||||
}
|
||||
|
||||
#define SELECT_OP(a, b) \
|
||||
(!(op & SLJIT_INT_OP) ? a : b)
|
||||
|
||||
#define EMIT_LOGICAL(op_imm, op_norm) \
|
||||
if (flags & SRC2_IMM) { \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
|
||||
} \
|
||||
else { \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
|
||||
}
|
||||
|
||||
#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
|
||||
if (flags & SRC2_IMM) { \
|
||||
if (src2 >= 32) { \
|
||||
SLJIT_ASSERT(!(op & SLJIT_INT_OP)); \
|
||||
ins = op_dimm32; \
|
||||
src2 -= 32; \
|
||||
} \
|
||||
else \
|
||||
ins = (op & SLJIT_INT_OP) ? op_imm : op_dimm; \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
|
||||
} \
|
||||
else { \
|
||||
ins = (op & SLJIT_INT_OP) ? op_v : op_dv; \
|
||||
if (op & SLJIT_SET_E) \
|
||||
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
|
||||
if (CHECK_FLAGS(SLJIT_SET_E)) \
|
||||
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
|
||||
sljit_si dst, sljit_si src1, sljit_sw src2)
|
||||
{
|
||||
sljit_ins ins;
|
||||
|
||||
switch (GET_OPCODE(op)) {
|
||||
case SLJIT_MOV:
|
||||
case SLJIT_MOV_P:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if (dst != src2)
|
||||
return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MOV_UB:
|
||||
case SLJIT_MOV_SB:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
|
||||
if (op == SLJIT_MOV_SB) {
|
||||
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
|
||||
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
|
||||
}
|
||||
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
|
||||
}
|
||||
else if (dst != src2)
|
||||
SLJIT_ASSERT_STOP();
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MOV_UH:
|
||||
case SLJIT_MOV_SH:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
|
||||
if (op == SLJIT_MOV_SH) {
|
||||
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
|
||||
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
|
||||
}
|
||||
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
|
||||
}
|
||||
else if (dst != src2)
|
||||
SLJIT_ASSERT_STOP();
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MOV_UI:
|
||||
SLJIT_ASSERT(!(op & SLJIT_INT_OP));
|
||||
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
|
||||
return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
|
||||
|
||||
case SLJIT_MOV_SI:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
|
||||
|
||||
case SLJIT_NOT:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_CLZ:
|
||||
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
|
||||
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
|
||||
#else
|
||||
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
|
||||
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
|
||||
}
|
||||
/* Nearly all instructions are unmovable in the following sequence. */
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
|
||||
/* Check zero. */
|
||||
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_INT_OP) ? 32 : 64), UNMOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
|
||||
/* Loop for searching the highest bit. */
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
|
||||
if (op & SLJIT_SET_E)
|
||||
return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
|
||||
#endif
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_ADD:
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_O) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
else
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
}
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
}
|
||||
}
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
|
||||
}
|
||||
else {
|
||||
if (op & SLJIT_SET_O)
|
||||
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
|
||||
/* a + b >= a | b (otherwise, the carry should be set to 1). */
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
if (!(op & SLJIT_SET_O))
|
||||
return SLJIT_SUCCESS;
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
|
||||
|
||||
case SLJIT_ADDC:
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_C) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
}
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
|
||||
} else {
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
|
||||
if (!(op & SLJIT_SET_C))
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
/* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
/* Set carry flag. */
|
||||
return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
|
||||
|
||||
case SLJIT_SUB:
|
||||
if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
|
||||
src2 = TMP_REG2;
|
||||
flags &= ~SRC2_IMM;
|
||||
}
|
||||
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_O) {
|
||||
if (src2 >= 0)
|
||||
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
else
|
||||
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
}
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E))
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
|
||||
}
|
||||
else {
|
||||
if (op & SLJIT_SET_O)
|
||||
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
if (op & SLJIT_SET_E)
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
|
||||
if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
if (op & SLJIT_SET_U)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
|
||||
if (op & SLJIT_SET_S) {
|
||||
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
|
||||
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
|
||||
}
|
||||
/* dst may be the same as src1 or src2. */
|
||||
if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
|
||||
if (!(op & SLJIT_SET_O))
|
||||
return SLJIT_SUCCESS;
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
|
||||
|
||||
case SLJIT_SUBC:
|
||||
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
|
||||
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
|
||||
src2 = TMP_REG2;
|
||||
flags &= ~SRC2_IMM;
|
||||
}
|
||||
|
||||
if (flags & SRC2_IMM) {
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
|
||||
}
|
||||
else {
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
|
||||
/* dst may be the same as src1 or src2. */
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
|
||||
}
|
||||
|
||||
if (op & SLJIT_SET_C)
|
||||
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
|
||||
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
|
||||
return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_MUL:
|
||||
SLJIT_ASSERT(!(flags & SRC2_IMM));
|
||||
if (!(op & SLJIT_SET_O)) {
|
||||
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
|
||||
if (op & SLJIT_INT_OP)
|
||||
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
|
||||
FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
|
||||
return push_inst(compiler, MFLO | D(dst), DR(dst));
|
||||
#else
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
|
||||
return push_inst(compiler, MFLO | D(dst), DR(dst));
|
||||
#endif
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
|
||||
FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
|
||||
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
|
||||
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
|
||||
|
||||
case SLJIT_AND:
|
||||
EMIT_LOGICAL(ANDI, AND);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_OR:
|
||||
EMIT_LOGICAL(ORI, OR);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_XOR:
|
||||
EMIT_LOGICAL(XORI, XOR);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_SHL:
|
||||
EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_LSHR:
|
||||
EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
|
||||
return SLJIT_SUCCESS;
|
||||
|
||||
case SLJIT_ASHR:
|
||||
EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT_STOP();
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
|
||||
{
|
||||
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst)));
|
||||
FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
|
||||
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins*)addr;
|
||||
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
|
||||
inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
|
||||
inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff);
|
||||
SLJIT_CACHE_FLUSH(inst, inst + 6);
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins*)addr;
|
||||
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
|
||||
inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
|
||||
inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff);
|
||||
SLJIT_CACHE_FLUSH(inst, inst + 6);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue