pcre2: import version 10.35

Files removed as described in b418e36f2
2024-12-27 05:13:10 +00:00 · 2020-05-10 22:27:46 +08:00 · 2020-05-10 22:27:46 +08:00 · 73ecf1576b
commit 73ecf1576b
parent 3252d0fd03
21 changed files with 3039 additions and 2598 deletions
--- a/pcre2/CMakeLists.txt
+++ b/pcre2/CMakeLists.txt
@ -1,6 +1,5 @@
 # CMakeLists.txt
 #
-#
 # This file enables PCRE2 to be built with the CMake configuration and build
 # tool. Download CMake in source or binary form from http://www.cmake.org/
 # Converted to support PCRE2 from the original PCRE file, August 2014.
@ -85,6 +84,14 @@
 # 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
 # 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
 # 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
+# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
+# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
+# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
+# 2020-04-16 enh added check for __attribute__((uninitialized))
+# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
+#            library versioning.
+# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
+# 2020-04-28 PH added function check for memfd_create based on Carlo's patch

 PROJECT(PCRE2 C)

@ -95,14 +102,26 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
 # GET_TARGET_PROPERTY. This should no longer be required.
 # CMAKE_POLICY(SET CMP0026 OLD)

-SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
+# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
+# on the command line.
+# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

 SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")

+# external packages
+FIND_PACKAGE( BZip2 )
+FIND_PACKAGE( ZLIB )
+FIND_PACKAGE( Readline )
+FIND_PACKAGE( Editline )
+
 # Configuration checks

-INCLUDE(CheckIncludeFile)
+INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckFunctionExists)
+INCLUDE(CheckSymbolExists)
+INCLUDE(CheckIncludeFile)
 INCLUDE(CheckTypeSize)

 CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
@ -114,9 +133,19 @@ CHECK_INCLUDE_FILE(unistd.h     HAVE_UNISTD_H)
 CHECK_INCLUDE_FILE(windows.h    HAVE_WINDOWS_H)

 CHECK_FUNCTION_EXISTS(bcopy         HAVE_BCOPY)
+CHECK_FUNCTION_EXISTS(memfd_create  HAVE_MEMFD_CREATE)
 CHECK_FUNCTION_EXISTS(memmove       HAVE_MEMMOVE)
+CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV)
 CHECK_FUNCTION_EXISTS(strerror      HAVE_STRERROR)

+set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
+CHECK_C_SOURCE_COMPILES(
+  "int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
+  HAVE_ATTRIBUTE_UNINITIALIZED
+)
+set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
+
 # User-configurable options
 #
 # Note: CMakeSetup displays these in alphabetical order, regardless of
@ -171,8 +200,12 @@ SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
 SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
    "Enable support for Just-in-time compiling.")

-SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
+IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+    SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
        "Enable SELinux compatible execmem allocator in JIT (experimental).")
+ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+    SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
+ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)

 SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
    "Enable use of Just-in-time compiling in pcre2grep.")
@ -298,7 +331,19 @@ IF(PCRE2_SUPPORT_JIT)
 ENDIF(PCRE2_SUPPORT_JIT)

 IF(PCRE2_SUPPORT_JIT_SEALLOC)
+        SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
+	CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED)
+        UNSET(CMAKE_REQUIRED_DEFINITIONS)
+        IF(${REQUIRED})
+                IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+                        ADD_DEFINITIONS(-D_GNU_SOURCE)
                        SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+                ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+                        MESSAGE(FATAL_ERROR "Your configuration is not supported")
+                ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+        ELSE(${REQUIRED})
+                SET(PCRE2_SUPPORT_JIT_SEALLOC OFF)
+        ENDIF(${REQUIRED})
 ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)

 IF(PCRE2GREP_SUPPORT_JIT)
@ -394,12 +439,13 @@ file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
  LIMIT_COUNT 50 # Read only the first 50 lines of the file
 )

-set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
+set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date"
+  "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version")
 foreach(configure_line ${configure_lines})
    foreach(_substitution_variable ${SEARCHED_VARIABLES})
        string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
        if (NOT ${_substitution_variable_upper})
-            string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
+            string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MACTHED_STRING ${configure_line})
            if (CMAKE_MATCH_1)
                set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
            endif()
@ -407,21 +453,74 @@ foreach(configure_line ${configure_lines})
    endforeach()
 endforeach()

+macro(PARSE_LIB_VERSION VARIABLE_PREFIX)
+  string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION})
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT)
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION)
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE)
+
+  math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}")
+  math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+  math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+  set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}")
+  set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}")
+endmacro()
+
+PARSE_LIB_VERSION(LIBPCRE2_POSIX)
+PARSE_LIB_VERSION(LIBPCRE2_8)
+PARSE_LIB_VERSION(LIBPCRE2_16)
+PARSE_LIB_VERSION(LIBPCRE2_32)
+
 CONFIGURE_FILE(src/pcre2.h.in
               ${PROJECT_BINARY_DIR}/pcre2.h
               @ONLY)

-# What about pcre2-config and libpcre2.pc?
+# Generate pkg-config files
+
+SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
+SET(prefix ${CMAKE_INSTALL_PREFIX})
+
+SET(exec_prefix "\${prefix}")
+SET(libdir "\${exec_prefix}/lib")
+SET(includedir "\${prefix}/include")
+CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
+SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
+
+IF(PCRE2_BUILD_PCRE2_8)
+  CONFIGURE_FILE(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
+  SET(enable_pcre2_8 "yes")
+ELSE()
+  SET(enable_pcre2_8 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_16)
+  CONFIGURE_FILE(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
+  SET(enable_pcre2_16 "yes")
+ELSE()
+  SET(enable_pcre2_16 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_32)
+  CONFIGURE_FILE(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
+  SET(enable_pcre2_32 "yes")
+ELSE()
+  SET(enable_pcre2_32 "no")
+ENDIF()
+
+CONFIGURE_FILE(pcre2-config.in pcre2-config @ONLY)

 # Character table generation

 OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
 IF(PCRE2_REBUILD_CHARTABLES)
-  ADD_EXECUTABLE(dftables src/dftables.c)
+  ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
  ADD_CUSTOM_COMMAND(
    COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
-    DEPENDS dftables
-    COMMAND dftables
+    DEPENDS pcre2_dftables
+    COMMAND pcre2_dftables
    ARGS        ${PROJECT_BINARY_DIR}/pcre2_chartables.c
    OUTPUT      ${PROJECT_BINARY_DIR}/pcre2_chartables.c
  )
@ -446,6 +545,7 @@ SET(PCRE2_SOURCES
  src/pcre2_error.c
  src/pcre2_extuni.c
  src/pcre2_find_bracket.c
+  src/pcre2_jit_compile.c
  src/pcre2_maketables.c
  src/pcre2_match.c
  src/pcre2_match_data.c
@ -464,6 +564,9 @@ SET(PCRE2_SOURCES
  src/pcre2_xclass.c
 )

+SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
+SET(PCRE2POSIX_SOURCES src/pcre2posix.c)
+
 IF(MINGW AND NOT PCRE2_STATIC)
 IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
 ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
@ -531,12 +634,20 @@ SET(targets)

 IF(PCRE2_BUILD_PCRE2_8)
 ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-8
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
+SET_TARGET_PROPERTIES(pcre2-8 PROPERTIES
+  COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+  MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
+  MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
+  VERSION ${LIBPCRE2_8_VERSION}
+  SOVERSION ${LIBPCRE2_8_SOVERSION})
 SET(targets ${targets} pcre2-8)
 ADD_LIBRARY(pcre2-posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
-SET_PROPERTY(TARGET pcre2-posix
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
+SET_TARGET_PROPERTIES(pcre2-posix PROPERTIES
+  COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+  MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
+  MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
+  VERSION ${LIBPCRE2_POSIX_VERSION}
+  SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
 SET(targets ${targets} pcre2-posix)
 TARGET_LINK_LIBRARIES(pcre2-posix pcre2-8)

@ -554,8 +665,12 @@ ENDIF(PCRE2_BUILD_PCRE2_8)

 IF(PCRE2_BUILD_PCRE2_16)
 ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-16
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
+SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES
+  COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
+  MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+  MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+  VERSION ${LIBPCRE2_16_VERSION}
+  SOVERSION ${LIBPCRE2_16_SOVERSION})
 SET(targets ${targets} pcre2-16)

 IF(MINGW AND NOT PCRE2_STATIC)
@ -572,8 +687,12 @@ ENDIF(PCRE2_BUILD_PCRE2_16)

 IF(PCRE2_BUILD_PCRE2_32)
 ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-32
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
+SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES
+  COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
+  MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+  MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+  VERSION ${LIBPCRE2_32_VERSION}
+  SOVERSION ${LIBPCRE2_32_SOVERSION})
 SET(targets ${targets} pcre2-32)

 IF(MINGW AND NOT PCRE2_STATIC)
@ -746,6 +865,11 @@ INSTALL(TARGETS ${targets}
        RUNTIME DESTINATION bin
        LIBRARY DESTINATION lib
        ARCHIVE DESTINATION lib)
+INSTALL(FILES ${pkg_config_files} DESTINATION lib/pkgconfig)
+INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
+  DESTINATION bin
+  # Set 0755 permissions
+  PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)

 INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)

--- a/pcre2/LICENCE
+++ b/pcre2/LICENCE
@ -26,7 +26,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2020 University of Cambridge
 All rights reserved.


@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2020 Zoltan Herczeg
 All rights reserved.


@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2020 Zoltan Herczeg
 All rights reserved.


--- a/pcre2/config-cmake.h.in
+++ b/pcre2/config-cmake.h.in
@ -1,5 +1,6 @@
 /* config.h for CMake builds */

+#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
 #cmakedefine HAVE_DIRENT_H 1
 #cmakedefine HAVE_INTTYPES_H 1    
 #cmakedefine HAVE_STDINT_H 1                                                   
@ -10,7 +11,10 @@
 #cmakedefine HAVE_WINDOWS_H 1

 #cmakedefine HAVE_BCOPY 1
+#cmakedefine HAVE_MEMFD_CREATE 1
 #cmakedefine HAVE_MEMMOVE 1
+#cmakedefine HAVE_SECURE_GETENV 1
+#cmakedefine HAVE_STRERROR 1

 #cmakedefine PCRE2_STATIC 1

--- a/pcre2/configure.ac
+++ b/pcre2/configure.ac
@ -1,7 +1,13 @@
 m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [34])
+m4_define(pcre2_minor, [35])
 m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2019-11-21])
+m4_define(pcre2_date, [2020-05-09])
+
+# Libtool shared library interface versions (current:revision:age)
+m4_define(libpcre2_8_version,     [10:0:10])
+m4_define(libpcre2_16_version,    [10:0:10])
+m4_define(libpcre2_32_version,    [10:0:10])
+m4_define(libpcre2_posix_version, [2:3:0])

 # NOTE: The CMakeLists.txt file searches for the above variables in the first
 # 50 lines of this file. Please update that if the variables above are moved.
--- a/pcre2/src/pcre2.h.in
+++ b/pcre2/src/pcre2.h.in
@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, second API, to be
 #included by applications that call PCRE2 functions.

-           Copyright (c) 2016-2019 University of Cambridge
+           Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
 #define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  0x00001000u  /* pcre2_substitute() only */
 #define PCRE2_NO_JIT                      0x00002000u  /* Not for pcre2_dfa_match() */
 #define PCRE2_COPY_MATCHED_SUBJECT        0x00004000u
+#define PCRE2_SUBSTITUTE_LITERAL          0x00008000u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_MATCHED          0x00010000u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u  /* pcre2_substitute() only */

 /* Options for pcre2_pattern_convert(). */

@ -445,6 +448,7 @@ released, the numbers must not be changed. */
 #define PCRE2_CONFIG_HEAPLIMIT              12
 #define PCRE2_CONFIG_NEVER_BACKSLASH_C      13
 #define PCRE2_CONFIG_COMPILED_WIDTHS        14
+#define PCRE2_CONFIG_TABLES_LENGTH          15


 /* Types for code units in patterns and subject strings. */
--- a/pcre2/src/pcre2_auto_possess.c
+++ b/pcre2/src/pcre2_auto_possess.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -292,6 +292,7 @@ possessification, and if so, fills a list with its properties.
 Arguments:
  code        points to start of expression
  utf         TRUE if in UTF mode
+  ucp         TRUE if in UCP mode
  fcc         points to the case-flipping table
  list        points to output list
              list[0] will be filled with the opcode
@ -304,7 +305,7 @@ Returns:      points to the start of the next opcode if *code is accepted
 */

 static PCRE2_SPTR
-get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
+get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
  uint32_t *list)
 {
 PCRE2_UCHAR c = *code;
@ -316,7 +317,8 @@ uint32_t chr;
 uint32_t *clist_dest;
 const uint32_t *clist_src;
 #else
-(void)utf;    /* Suppress "unused parameter" compiler warning */
+(void)utf;    /* Suppress "unused parameter" compiler warnings */
+(void)ucp;
 #endif

 list[0] = c;
@ -396,7 +398,7 @@ switch(c)
  list[2] = chr;

 #ifdef SUPPORT_UNICODE
-  if (chr < 128 || (chr < 256 && !utf))
+  if (chr < 128 || (chr < 256 && !utf && !ucp))
    list[3] = fcc[chr];
  else
    list[3] = UCD_OTHERCASE(chr);
@ -503,6 +505,7 @@ which case the base cannot be possessified.
 Arguments:
  code        points to the byte code
  utf         TRUE in UTF mode
+  ucp         TRUE in UCP mode
  cb          compile data block
  base_list   the data list of the base opcode
  base_end    the end of the base opcode
@ -512,7 +515,7 @@ Returns:      TRUE if the auto-possessification is possible
 */

 static BOOL
-compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
+compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
  const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
 {
 PCRE2_UCHAR c;
@ -651,7 +654,7 @@ for(;;)

    while (*next_code == OP_ALT)
      {
-      if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
+      if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
        return FALSE;
      code = next_code + 1 + LINK_SIZE;
      next_code += GET(next_code, 1);
@ -672,7 +675,8 @@ for(;;)
    /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */

    next_code += 1 + LINK_SIZE;
-    if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
+    if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
+         rec_limit))
      return FALSE;

    code += PRIV(OP_lengths)[c];
@ -688,7 +692,7 @@ for(;;)
  /* We now have the next appropriate opcode to compare with the base. Check
  for a supported opcode, and load its properties. */

-  code = get_chr_property_list(code, utf, cb->fcc, list);
+  code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
  if (code == NULL) return FALSE;    /* Unsupported */

  /* If either opcode is a small character list, set pointers for comparing
@ -1100,7 +1104,6 @@ leaving the remainder of the pattern unpossessified.

 Arguments:
  code        points to start of the byte code
-  utf         TRUE in UTF mode
  cb          compile data block

 Returns:      0 for success
@ -1108,13 +1111,15 @@ Returns:      0 for success
 */

 int
-PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
+PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
 {
 PCRE2_UCHAR c;
 PCRE2_SPTR end;
 PCRE2_UCHAR *repeat_opcode;
 uint32_t list[8];
 int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
+BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
+BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;

 for (;;)
  {
@ -1126,10 +1131,11 @@ for (;;)
    {
    c -= get_repeat_base(c) - OP_STAR;
    end = (c <= OP_MINUPTO) ?
-      get_chr_property_list(code, utf, cb->fcc, list) : NULL;
+      get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;

-    if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
+    if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
+        &rec_limit))
      {
      switch(c)
        {
@ -1181,11 +1187,11 @@ for (;;)
    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
      {
      /* end must not be NULL. */
-      end = get_chr_property_list(code, utf, cb->fcc, list);
+      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);

      list[1] = (c & 1) == 0;

-      if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
+      if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
        {
        switch (c)
          {
--- a/pcre2/src/pcre2_chartables.c.dist
+++ b/pcre2/src/pcre2_chartables.c.dist
@ -2,17 +2,21 @@
 *      Perl-Compatible Regular Expressions       *
 *************************************************/

-/* This file was automatically written by the dftables auxiliary
+/* This file was automatically written by the pcre2_dftables auxiliary
 program. It contains character tables that are used when no external
 tables are passed to PCRE2 by the application that calls it. The tables
 are used only for characters whose code values are less than 256. */

-/*The dftables program (which is distributed with PCRE2) can be used to
-build alternative versions of this file. This is necessary if you are
+/* This set of tables was written in the C locale. */
+
+/* The pcre2_ftables program (which is distributed with PCRE2) can be used
+to build alternative versions of this file. This is necessary if you are
 running in an EBCDIC environment, or if you want to default to a different
-encoding, for example ISO-8859-1. When dftables is run, it creates these
-tables in the current locale. This happens automatically if PCRE2 is
-configured with --enable-rebuild-chartables. */
+encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
+these tables in the "C" locale by default. This happens automatically if
+PCRE2 is configured with --enable-rebuild-chartables. However, you can run
+pcre2_dftables manually with the -L option to build tables using the LC_ALL
+locale. */

 /* The following #include is present because without it gcc 4.x may remove
 the array definition from the final binary if PCRE2 is built into a static
@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
 /* This table contains bit maps for various character classes. Each map is 32
 bytes long and the bits run from the least significant end of each byte. The
 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
-graph print, punct, and cntrl. Other classes are built from combinations. */
+graph, print, punct, and cntrl. Other classes are built from combinations. */

-  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
+  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,  /* space */
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,  /* xdigit */
  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,  /* digit */
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,  /* upper */
  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,  /* lower */
  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,  /* word */
  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
+  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,  /* graph */
  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
+  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,  /* print */
  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
+  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,  /* punct */
  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

-  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
+  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,  /* cntrl */
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
--- a/pcre2/src/pcre2_compile.c
+++ b/pcre2/src/pcre2_compile.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -1195,13 +1195,14 @@ if (code == NULL) return NULL;
 newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
 if (newcode == NULL) return NULL;
 memcpy(newcode, code, code->blocksize);
+newcode->executable_jit = NULL;

 /* If the code is one that has been deserialized, increment the reference count
 in the decoded tables. */

 if ((code->flags & PCRE2_DEREF_TABLES) != 0)
  {
-  ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+  ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
  (*ref_count)++;
  }

@ -1229,16 +1230,17 @@ if (code == NULL) return NULL;
 newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
 if (newcode == NULL) return NULL;
 memcpy(newcode, code, code->blocksize);
+newcode->executable_jit = NULL;

-newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
+newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
  code->memctl.memory_data);
 if (newtables == NULL)
  {
  code->memctl.free((void *)newcode, code->memctl.memory_data);
  return NULL;
  }
-memcpy(newtables, code->tables, tables_length);
-ref_count = (PCRE2_SIZE *)(newtables + tables_length);
+memcpy(newtables, code->tables, TABLES_LENGTH);
+ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
 *ref_count = 1;

 newcode->tables = newtables;
@ -1259,6 +1261,8 @@ PCRE2_SIZE* ref_count;

 if (code != NULL)
  {
+  if (code->executable_jit != NULL)
+    PRIV(jit_free)(code->executable_jit, &code->memctl);

  if ((code->flags & PCRE2_DEREF_TABLES) != 0)
    {
@ -1266,7 +1270,7 @@ if (code != NULL)
    be freed when there are no more references to them. The *ref_count should
    always be > 0. */

-    ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+    ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
    if (*ref_count > 0)
      {
      (*ref_count)--;
@ -3649,7 +3653,7 @@ while (ptr < ptrend)
    if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;

    /* If ( is not followed by ? it is either a capture or a special verb or an
-    alpha assertion. */
+    alpha assertion or a positive non-atomic lookahead. */

    if (*ptr != CHAR_QUESTION_MARK)
      {
@ -3681,10 +3685,10 @@ while (ptr < ptrend)
        break;

      /* Handle "alpha assertions" such as (*pla:...). Most of these are
-      synonyms for the historical symbolic assertions, but the script run ones
-      are new. They are distinguished by starting with a lower case letter.
-      Checking both ends of the alphabet makes this work in all character
-      codes. */
+      synonyms for the historical symbolic assertions, but the script run and
+      non-atomic lookaround ones are new. They are distinguished by starting
+      with a lower case letter. Checking both ends of the alphabet makes this
+      work in all character codes. */

      else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
        {
@ -3743,9 +3747,7 @@ while (ptr < ptrend)
          goto POSITIVE_LOOK_AHEAD;

          case META_LOOKAHEAD_NA:
-          *parsed_pattern++ = meta;
-          ptr++;
-          goto POST_ASSERTION;
+          goto POSITIVE_NONATOMIC_LOOK_AHEAD;

          case META_LOOKAHEADNOT:
          goto NEGATIVE_LOOK_AHEAD;
@ -4434,6 +4436,12 @@ while (ptr < ptrend)
      ptr++;
      goto POST_ASSERTION;

+      case CHAR_ASTERISK:
+      POSITIVE_NONATOMIC_LOOK_AHEAD:         /* Come from (?* */
+      *parsed_pattern++ = META_LOOKAHEAD_NA;
+      ptr++;
+      goto POST_ASSERTION;
+
      case CHAR_EXCLAMATION_MARK:
      NEGATIVE_LOOK_AHEAD:                   /* Come from (*nla: */
      *parsed_pattern++ = META_LOOKAHEADNOT;
@ -4443,18 +4451,21 @@ while (ptr < ptrend)

      /* ---- Lookbehind assertions ---- */

-      /* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
-      start of the name of a capturing group. */
+      /* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?<
+      is the start of the name of a capturing group. */

      case CHAR_LESS_THAN_SIGN:
      if (ptrend - ptr <= 1 ||
-         (ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
+         (ptr[1] != CHAR_EQUALS_SIGN &&
+          ptr[1] != CHAR_EXCLAMATION_MARK &&
+          ptr[1] != CHAR_ASTERISK))
        {
        terminator = CHAR_GREATER_THAN_SIGN;
        goto DEFINE_NAME;
        }
      *parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
-        META_LOOKBEHIND : META_LOOKBEHINDNOT;
+        META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)?
+        META_LOOKBEHINDNOT : META_LOOKBEHIND_NA;

      POST_LOOKBEHIND:           /* Come from (*plb: (*naplb: and (*nlb: */
      *has_lookbehind = TRUE;
@ -4629,8 +4640,6 @@ while (ptr < ptrend)
        *parsed_pattern++ = META_KET;
        }

-
-
      if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
        else top_nest--;
      }
@ -4895,7 +4904,7 @@ range. */
 if ((options & PCRE2_CASELESS) != 0)
  {
 #ifdef SUPPORT_UNICODE
-  if ((options & PCRE2_UTF) != 0)
+  if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
    {
    int rc;
    uint32_t oc, od;
@ -5310,7 +5319,8 @@ dynamically as we process the pattern. */

 #ifdef SUPPORT_UNICODE
 BOOL utf = (options & PCRE2_UTF) != 0;
-#else  /* No UTF support */
+BOOL ucp = (options & PCRE2_UCP) != 0;
+#else  /* No Unicode support */
 BOOL utf = FALSE;
 #endif

@ -5555,12 +5565,12 @@ for (;; pptr++)
      zerofirstcu = firstcu;
      zerofirstcuflags = firstcuflags;

-      /* For caseless UTF mode, check whether this character has more than
-      one other case. If so, generate a special OP_NOTPROP item instead of
+      /* For caseless UTF or UCP mode, check whether this character has more
+      than one other case. If so, generate a special OP_NOTPROP item instead of
      OP_NOTI. */

 #ifdef SUPPORT_UNICODE
-      if (utf && (options & PCRE2_CASELESS) != 0 &&
+      if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 &&
          (d = UCD_CASESET(c)) != 0)
        {
        *code++ = OP_NOTPROP;
@ -5593,7 +5603,7 @@ for (;; pptr++)
        uint32_t d;

 #ifdef SUPPORT_UNICODE
-        if (utf && c > 127) d = UCD_OTHERCASE(c); else
+        if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else
 #endif
          {
 #if PCRE2_CODE_UNIT_WIDTH != 8
@ -6667,23 +6677,11 @@ for (;; pptr++)
            }

          /* For a back reference, update the back reference map and the
-          maximum back reference. Then, for each group, we must check to
-          see if it is recursive, that is, it is inside the group that it
-          references. A flag is set so that the group can be made atomic.
-          */
+          maximum back reference. */

          cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
          if (groupnumber > cb->top_backref)
            cb->top_backref = groupnumber;
-
-          for (oc = cb->open_caps; oc != NULL; oc = oc->next)
-            {
-            if (oc->number == groupnumber)
-              {
-              oc->flag = TRUE;
-              break;
-              }
-            }
          }
        }

@ -7077,15 +7075,18 @@ for (;; pptr++)
            previous[GET(previous, 1)] != OP_ALT)
          goto END_REPEAT;

-        /* There is no sense in actually repeating assertions. The only
-        potential use of repetition is in cases when the assertion is optional.
-        Therefore, if the minimum is greater than zero, just ignore the repeat.
-        If the maximum is not zero or one, set it to 1. */
+        /* Perl allows all assertions to be quantified, and when they contain
+        capturing parentheses and/or are optional there are potential uses for
+        this feature. PCRE2 used to force the maximum quantifier to 1 on the
+        invalid grounds that further repetition was never useful. This was
+        always a bit pointless, since an assertion could be wrapped with a
+        repeated group to achieve the effect. General repetition is now
+        permitted, but if the maximum is unlimited it is set to one more than
+        the minimum. */

        if (op_previous < OP_ONCE)    /* Assertion */
          {
-          if (repeat_min > 0) goto END_REPEAT;
-          if (repeat_max > 1) repeat_max = 1;
+          if (repeat_max == REPEAT_UNLIMITED) repeat_max = repeat_min + 1;
          }

        /* The case of a zero minimum is special because of the need to stick
@ -7678,19 +7679,6 @@ for (;; pptr++)

    cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1;
    if (meta_arg > cb->top_backref) cb->top_backref = meta_arg;
-
-    /* Check to see if this back reference is recursive, that it, it
-    is inside the group that it references. A flag is set so that the
-    group can be made atomic. */
-
-    for (oc = cb->open_caps; oc != NULL; oc = oc->next)
-      {
-      if (oc->number == meta_arg)
-        {
-        oc->flag = TRUE;
-        break;
-        }
-      }
    break;


@ -7836,11 +7824,12 @@ for (;; pptr++)
    NORMAL_CHAR_SET:  /* Character is already in meta */
    matched_char = TRUE;

-    /* For caseless UTF mode, check whether this character has more than one
-    other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
+    /* For caseless UTF or UCP mode, check whether this character has more than
+    one other case. If so, generate a special OP_PROP item instead of OP_CHARI.
+    */

 #ifdef SUPPORT_UNICODE
-    if (utf && (options & PCRE2_CASELESS) != 0)
+    if ((utf||ucp) && (options & PCRE2_CASELESS) != 0)
      {
      uint32_t caseset = UCD_CASESET(meta);
      if (caseset != 0)
@ -8049,7 +8038,6 @@ if (*code == OP_CBRA)
  capnumber = GET2(code, 1 + LINK_SIZE);
  capitem.number = capnumber;
  capitem.next = cb->open_caps;
-  capitem.flag = FALSE;
  capitem.assert_depth = cb->assert_depth;
  cb->open_caps = &capitem;
  }
@ -8178,26 +8166,9 @@ for (;;)
    PUT(code, 1, (int)(code - start_bracket));
    code += 1 + LINK_SIZE;

-    /* If it was a capturing subpattern, check to see if it contained any
-    recursive back references. If so, we must wrap it in atomic brackets. In
-    any event, remove the block from the chain. */
+    /* If it was a capturing subpattern, remove the block from the chain. */

-    if (capnumber > 0)
-      {
-      if (cb->open_caps->flag)
-        {
-        (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
-          CU2BYTES(code - start_bracket));
-        *start_bracket = OP_ONCE;
-        code += 1 + LINK_SIZE;
-        PUT(start_bracket, 1, (int)(code - start_bracket));
-        *code = OP_KET;
-        PUT(code, 1, (int)(code - start_bracket));
-        code += 1 + LINK_SIZE;
-        length += 2 + 2*LINK_SIZE;
-        }
-      cb->open_caps = cb->open_caps->next;
-      }
+    if (capnumber > 0) cb->open_caps = cb->open_caps->next;

    /* Set values to pass back */

@ -8832,9 +8803,10 @@ memset(slot + IMM2_SIZE + length, 0,

 /* This function is called to skip parts of the parsed pattern when finding the
 length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find
-the end of the branch, it is called to skip over an internal lookaround, and it
-is also called to skip to the end of a class, during which it will never
-encounter nested groups (but there's no need to have special code for that).
+the end of the branch, it is called to skip over an internal lookaround or
+(DEFINE) group, and it is also called to skip to the end of a class, during
+which it will never encounter nested groups (but there's no need to have
+special code for that).

 When called to find the end of a branch or group, pptr must point to the first
 meta code inside the branch, not the branch-starting code. In other cases it
@ -9312,14 +9284,21 @@ for (;; pptr++)
    itemlength = grouplength;
    break;

-    /* Check nested groups - advance past the initial data for each type and
-    then seek a fixed length with get_grouplength(). */
+    /* A (DEFINE) group is never obeyed inline and so it does not contribute to
+    the length of this branch. Skip from the following item to the next
+    unpaired ket. */
+
+    case META_COND_DEFINE:
+    pptr = parsed_skip(pptr + 1, PSKIP_KET);
+    break;
+
+    /* Check other nested groups - advance past the initial data for each type
+    and then seek a fixed length with get_grouplength(). */

    case META_COND_NAME:
    case META_COND_NUMBER:
    case META_COND_RNAME:
    case META_COND_RNUMBER:
-    case META_COND_DEFINE:
    pptr += 2 + SIZEOFFSET;
    goto CHECK_GROUP;

@ -9576,6 +9555,10 @@ for (; *pptr != META_END; pptr++)
    break;

    case META_COND_DEFINE:
+    pptr += SIZEOFFSET;
+    nestlevel++;
+    break;
+
    case META_COND_NAME:
    case META_COND_NUMBER:
    case META_COND_RNAME:
@ -9656,6 +9639,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
   int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
 {
 BOOL utf;                             /* Set TRUE for UTF mode */
+BOOL ucp;                             /* Set TRUE for UCP mode */
 BOOL has_lookbehind = FALSE;          /* Set TRUE if a lookbehind is found */
 BOOL zero_terminated;                 /* Set TRUE for zero-terminated pattern */
 pcre2_real_code *re = NULL;           /* What we will return */
@ -9943,8 +9927,8 @@ if (utf)

 /* Check UCP lockout. */

-if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
-    (PCRE2_UCP|PCRE2_NEVER_UCP))
+ucp = (cb.external_options & PCRE2_UCP) != 0;
+if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0)
  {
  errorcode = ERR75;
  goto HAD_EARLY_ERROR;
@ -10154,6 +10138,7 @@ write to the last 8 bytes of the structure before setting the fields. */
 memset((char *)re + sizeof(pcre2_real_code) - 8, 0, 8);
 re->memctl = ccontext->memctl;
 re->tables = tables;
+re->executable_jit = NULL;
 memset(re->start_bitmap, 0, 32 * sizeof(uint8_t));
 re->blocksize = re_blocksize;
 re->magic_number = MAGIC_NUMBER;
@ -10319,7 +10304,7 @@ function call. */
 if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
  {
  PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
-  if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
+  if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80;
  }

 /* Failed to compile, or error while post-processing. */
@ -10367,21 +10352,25 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)

    if ((firstcuflags & REQ_CASELESS) != 0)
      {
-      if (firstcu < 128 || (!utf && firstcu < 255))
+      if (firstcu < 128 || (!utf && !ucp && firstcu < 255))
        {
        if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS;
        }

-      /* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
-      8-bit UTF mode, codepoints in the range 128-255 are introductory code
-      points and cannot have another case. In 16-bit and 32-bit modes, we can
-      check wide characters when UTF (and therefore UCP) is supported. */
+      /* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise.
+      In 8-bit UTF mode, codepoints in the range 128-255 are introductory code
+      points and cannot have another case, but if UCP is set they may do. */

-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
-      else if (firstcu <= MAX_UTF_CODE_POINT &&
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      else if (ucp && !utf && UCD_OTHERCASE(firstcu) != firstcu)
+        re->flags |= PCRE2_FIRSTCASELESS;
+#else
+      else if ((utf || ucp) && firstcu <= MAX_UTF_CODE_POINT &&
               UCD_OTHERCASE(firstcu) != firstcu)
        re->flags |= PCRE2_FIRSTCASELESS;
 #endif
+#endif  /* SUPPORT_UNICODE */
      }
    }

@ -10430,14 +10419,20 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)

      if ((reqcuflags & REQ_CASELESS) != 0)
        {
-        if (reqcu < 128 || (!utf && reqcu < 255))
+        if (reqcu < 128 || (!utf && !ucp && reqcu < 255))
          {
          if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
          }
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
-        else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      else if (ucp && !utf && UCD_OTHERCASE(reqcu) != reqcu)
+        re->flags |= PCRE2_LASTCASELESS;
+#else
+      else if ((utf || ucp) && reqcu <= MAX_UTF_CODE_POINT &&
+               UCD_OTHERCASE(reqcu) != reqcu)
        re->flags |= PCRE2_LASTCASELESS;
 #endif
+#endif  /* SUPPORT_UNICODE */
        }
      }
    }
--- a/pcre2/src/pcre2_config.c
+++ b/pcre2/src/pcre2_config.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #endif

 /* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
-its value gets changed by pcre2_internal.h to be in code units. */
+its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
+be in code units. */

 static int configured_link_size = LINK_SIZE;

@ -94,6 +95,7 @@ if (where == NULL)  /* Requests a length */
    case PCRE2_CONFIG_NEWLINE:
    case PCRE2_CONFIG_PARENSLIMIT:
    case PCRE2_CONFIG_STACKRECURSE:    /* Obsolete */
+    case PCRE2_CONFIG_TABLES_LENGTH:
    case PCRE2_CONFIG_UNICODE:
    return sizeof(uint32_t);

@ -191,6 +193,10 @@ switch (what)
  *((uint32_t *)where) = 0;
  break;

+  case PCRE2_CONFIG_TABLES_LENGTH:
+  *((uint32_t *)where) = TABLES_LENGTH;
+  break;
+
  case PCRE2_CONFIG_UNICODE_VERSION:
    {
 #if defined SUPPORT_UNICODE
--- a/pcre2/src/pcre2_dfa_match.c
+++ b/pcre2/src/pcre2_dfa_match.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -548,6 +548,7 @@ PCRE2_SPTR start_code = mb->start_code;

 #ifdef SUPPORT_UNICODE
 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
 #else
 BOOL utf = FALSE;
 #endif
@ -2190,7 +2191,7 @@ for (;;)
      if (clen == 0) break;

 #ifdef SUPPORT_UNICODE
-      if (utf)
+      if (utf_or_ucp)
        {
        if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
          {
@ -2204,7 +2205,7 @@ for (;;)
        }
      else
 #endif  /* SUPPORT_UNICODE */
-      /* Not UTF mode */
+      /* Not UTF or UCP mode */
        {
        if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
          { ADD_NEW(state_offset + 2, 0); }
@ -2339,7 +2340,7 @@ for (;;)
        {
        uint32_t otherd;
 #ifdef SUPPORT_UNICODE
-        if (utf && d >= 128)
+        if (utf_or_ucp && d >= 128)
          otherd = UCD_OTHERCASE(d);
        else
 #endif  /* SUPPORT_UNICODE */
@ -2374,7 +2375,7 @@ for (;;)
        if (caseless)
          {
 #ifdef SUPPORT_UNICODE
-          if (utf && d >= 128)
+          if (utf_or_ucp && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
 #endif  /* SUPPORT_UNICODE */
@ -2417,7 +2418,7 @@ for (;;)
        if (caseless)
          {
 #ifdef SUPPORT_UNICODE
-          if (utf && d >= 128)
+          if (utf_or_ucp && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
 #endif  /* SUPPORT_UNICODE */
@ -2458,7 +2459,7 @@ for (;;)
        if (caseless)
          {
 #ifdef SUPPORT_UNICODE
-          if (utf && d >= 128)
+          if (utf_or_ucp && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
 #endif  /* SUPPORT_UNICODE */
@ -2491,7 +2492,7 @@ for (;;)
        if (caseless)
          {
 #ifdef SUPPORT_UNICODE
-          if (utf && d >= 128)
+          if (utf_or_ucp && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
 #endif  /* SUPPORT_UNICODE */
@ -2531,7 +2532,7 @@ for (;;)
        if (caseless)
          {
 #ifdef SUPPORT_UNICODE
-          if (utf && d >= 128)
+          if (utf_or_ucp && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
 #endif  /* SUPPORT_UNICODE */
@ -3526,10 +3527,15 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
    {
    first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
-    if (utf && first_cu > 127)
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
+      first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
+#else
+    if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
      first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
 #endif
+#endif  /* SUPPORT_UNICODE */
    }
  }
 else
@ -3545,9 +3551,15 @@ if ((re->flags & PCRE2_LASTSET) != 0)
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
    {
    req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
-    if (utf && req_cu > 127) req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
+      req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
+#else
+    if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
+      req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
 #endif
+#endif  /* SUPPORT_UNICODE */
    }
  }

--- a/pcre2/src/pcre2_internal.h
+++ b/pcre2/src/pcre2_internal.h
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -76,6 +76,17 @@ typedef int BOOL;
 #include <valgrind/memcheck.h>
 #endif

+/* -ftrivial-auto-var-init support supports initializing all local variables
+to avoid some classes of bug, but this can cause an unacceptable slowdown
+for large on-stack arrays in hot functions. This macro lets us annotate
+such arrays. */
+
+#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
+#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define PCRE2_KEEP_UNINITIALIZED
+#endif
+
 /* Older versions of MSVC lack snprintf(). This define allows for
 warning/error-free compilation and testing with MSVC compilers back to at least
 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
@ -579,7 +590,7 @@ total length of the tables. */
 #define fcc_offset    256                           /* Flip case */
 #define cbits_offset  512                           /* Character classes */
 #define ctypes_offset (cbits_offset + cbit_length)  /* Character types */
-#define tables_length (ctypes_offset + 256)
+#define TABLES_LENGTH (ctypes_offset + 256)


 /* -------------------- Character and string names ------------------------ */
@ -1759,13 +1770,11 @@ typedef struct pcre2_memctl {

 /* Structure for building a chain of open capturing subpatterns during
 compiling, so that instructions to close them can be compiled when (*ACCEPT) is
-encountered. This is also used to identify subpatterns that contain recursive
-back references to themselves, so that they can be made atomic. */
+encountered. */

 typedef struct open_capitem {
  struct open_capitem *next;    /* Chain link */
  uint16_t number;              /* Capture number */
-  uint16_t flag;                /* Set TRUE if recursive back ref */
  uint16_t assert_depth;        /* Assertion depth when opened */
 } open_capitem;

@ -1954,7 +1963,7 @@ is available. */
 #define _pcre2_was_newline           PCRE2_SUFFIX(_pcre2_was_newline_)
 #define _pcre2_xclass                PCRE2_SUFFIX(_pcre2_xclass_)

-extern int          _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
+extern int          _pcre2_auto_possessify(PCRE2_UCHAR *,
                      const compile_block *);
 extern int          _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
                      int *, uint32_t, uint32_t, BOOL, compile_block *);
--- a/pcre2/src/pcre2_intmodedep.h
+++ b/pcre2/src/pcre2_intmodedep.h
@ -618,6 +618,7 @@ here.) */
 typedef struct pcre2_real_code {
  pcre2_memctl memctl;            /* Memory control fields */
  const uint8_t *tables;          /* The character tables */
+  void    *executable_jit;        /* Pointer to JIT code */
  uint8_t  start_bitmap[32];      /* Bitmap for starting code unit < 256 */
  CODE_BLOCKSIZE_TYPE blocksize;  /* Total (bytes) that was malloc-ed */
  uint32_t magic_number;          /* Paranoid and endianness check */
--- a/pcre2/src/pcre2_maketables.c
+++ b/pcre2/src/pcre2_maketables.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -41,10 +41,11 @@ POSSIBILITY OF SUCH DAMAGE.

 /* This module contains the external function pcre2_maketables(), which builds
 character tables for PCRE2 in the current locale. The file is compiled on its
-own as part of the PCRE2 library. However, it is also included in the
-compilation of dftables.c, in which case the macro DFTABLES is defined. */
+own as part of the PCRE2 library. It is also included in the compilation of
+pcre2_dftables.c as a freestanding program, in which case the macro
+PCRE2_DFTABLES is defined. */

-#ifndef DFTABLES
+#ifndef PCRE2_DFTABLES    /* Compiling the library */
 #  ifdef HAVE_CONFIG_H
 #  include "config.h"
 #  endif
@ -61,28 +62,29 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
 a pointer to them. They are build using the ctype functions, and consequently
 their contents will depend upon the current locale setting. When compiled as
 part of the library, the store is obtained via a general context malloc, if
-supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
-program) malloc() is used, and the function has a different name so as not to
-clash with the prototype in pcre2.h.
+supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
+freestanding auxiliary program) malloc() is used, and the function has a
+different name so as not to clash with the prototype in pcre2.h.

-Arguments:   none when DFTABLES is defined
+Arguments:   none when PCRE2_DFTABLES is defined
               else a PCRE2 general context or NULL
 Returns:     pointer to the contiguous block of data
+               else NULL if memory allocation failed
 */

-#ifdef DFTABLES  /* Included in freestanding dftables.c program */
+#ifdef PCRE2_DFTABLES  /* Included in freestanding pcre2_dftables program */
 static const uint8_t *maketables(void)
 {
-uint8_t *yield = (uint8_t *)malloc(tables_length);
+uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);

-#else  /* Not DFTABLES, compiling the library */
+#else  /* Not PCRE2_DFTABLES, that is, compiling the library */
 PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
 pcre2_maketables(pcre2_general_context *gcontext)
 {
 uint8_t *yield = (uint8_t *)((gcontext != NULL)?
-  gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
-  malloc(tables_length));
-#endif  /* DFTABLES */
+  gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
+  malloc(TABLES_LENGTH));
+#endif  /* PCRE2_DFTABLES */

 int i;
 uint8_t *p;
@ -103,8 +105,8 @@ exclusive ones - in some locales things may be different.

 Note that the table for "space" includes everything "isspace" gives, including
 VT in the default locale. This makes it work for the POSIX class [:space:].
-From release 8.34 is is also correct for Perl space, because Perl added VT at
-release 5.18.
+From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
+space, because Perl added VT at release 5.18.

 Note also that it is possible for a character to be alnum or alpha without
 being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
@ -120,7 +122,7 @@ for (i = 0; i < 256; i++)
  if (isalnum(i))  p[cbit_word   + i/8] |= 1u << (i&7);
  if (i == '_')    p[cbit_word   + i/8] |= 1u << (i&7);
  if (isspace(i))  p[cbit_space  + i/8] |= 1u << (i&7);
-  if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
+  if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
  if (isgraph(i))  p[cbit_graph  + i/8] |= 1u << (i&7);
  if (isprint(i))  p[cbit_print  + i/8] |= 1u << (i&7);
  if (ispunct(i))  p[cbit_punct  + i/8] |= 1u << (i&7);
@ -130,8 +132,8 @@ p += cbit_length;

 /* Finally, the character type table. In this, we used to exclude VT from the
 white space chars, because Perl didn't recognize it as such for \s and for
-comments within regexes. However, Perl changed at release 5.18, so PCRE changed
-at release 8.34. */
+comments within regexes. However, Perl changed at release 5.18, so PCRE1
+changed at release 8.34 and it's always been this way for PCRE2. */

 for (i = 0; i < 256; i++)
  {
@ -147,7 +149,7 @@ for (i = 0; i < 256; i++)
 return yield;
 }

-#ifndef DFTABLES
+#ifndef PCRE2_DFTABLES   /* Compiling the library */
 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
 pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
 {
--- a/pcre2/src/pcre2_match.c
+++ b/pcre2/src/pcre2_match.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2015-2019 University of Cambridge
+          New API code Copyright (c) 2015-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -381,8 +381,12 @@ length = Fovector[offset+1] - Fovector[offset];
 if (caseless)
  {
 #if defined SUPPORT_UNICODE
-  if ((mb->poptions & PCRE2_UTF) != 0)
+  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+
+  if (utf || (mb->poptions & PCRE2_UCP) != 0)
    {
+    PCRE2_SPTR endptr = p + length;
+
    /* Match characters up to the end of the reference. NOTE: the number of
    code units matched may differ, because in UTF-8 there are some characters
    whose upper and lower case codes have different numbers of bytes. For
@ -390,16 +394,25 @@ if (caseless)
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
    sequence of two of the latter. It is important, therefore, to check the
    length along the reference, not along the subject (earlier code did this
-    wrong). */
+    wrong). UCP without uses Unicode properties but without UTF encoding. */

-    PCRE2_SPTR endptr = p + length;
    while (p < endptr)
      {
      uint32_t c, d;
      const ucd_record *ur;
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
+
+      if (utf)
+        {
        GETCHARINC(c, eptr);
        GETCHARINC(d, p);
+        }
+      else
+        {
+        c = *eptr++;
+        d = *p++;
+        }
+
      ur = GET_UCD(d);
      if (c != d && c != (uint32_t)((int)d + ur->other_case))
        {
@ -415,7 +428,7 @@ if (caseless)
  else
 #endif

-  /* Not in UTF mode */
+  /* Not in UTF or UCP mode */
    {
    for (; length > 0; length--)
      {
@ -432,7 +445,8 @@ if (caseless)
  }

 /* In the caseful case, we can just compare the code units, whether or not we
-are in UTF mode. When partial matching, we have to do this unit-by-unit. */
+are in UTF and/or UCP mode. When partial matching, we have to do this unit by
+unit. */

 else
  {
@ -574,7 +588,7 @@ match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
 heapframe *F;           /* Current frame pointer */
 heapframe *N = NULL;    /* Temporary frame pointers */
 heapframe *P = NULL;
-heapframe *assert_accept_frame;  /* For passing back the frame with captures */
+heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
 PCRE2_SIZE frame_copy_size;     /* Amount to copy when creating a new frame */

 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
@ -598,12 +612,13 @@ BOOL condition;         /* Used in conditional groups */
 BOOL cur_is_word;       /* Used in "word" tests */
 BOOL prev_is_word;      /* Used in "word" tests */

-/* UTF flag */
+/* UTF and UCP flags */

 #ifdef SUPPORT_UNICODE
 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
 #else
-BOOL utf = FALSE;
+BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
 #endif

 /* This is the length of the last part of a backtracking frame that must be
@ -928,6 +943,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      }
    else
 #endif
+
    /* Not UTF mode */
      {
      if (mb->end_subject - Feptr < 1)
@ -987,10 +1003,30 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
        }
      }
+
+    /* If UCP is set without UTF we must do the same as above, but with one
+    character per code unit. */
+
+    else if (ucp)
+      {
+      uint32_t cc = UCHAR21(Feptr);
+      fc = Fecode[1];
+      if (fc < 128)
+        {
+        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
+        }
+      else
+        {
+        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
+        }
+      Feptr++;
+      Fecode += 2;
+      }
+
    else
 #endif   /* SUPPORT_UNICODE */

-    /* Not UTF mode; use the table for characters < 256. */
+    /* Not UTF or UCP mode; use the table for characters < 256. */
      {
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
@ -1010,6 +1046,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      SCHECK_PARTIAL();
      RRETURN(MATCH_NOMATCH);
      }
+
 #ifdef SUPPORT_UNICODE
    if (utf)
      {
@ -1026,15 +1063,42 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        if (ch > 127)
          ch = UCD_OTHERCASE(ch);
        else
-          ch = TABLE_GET(ch, mb->fcc, ch);
+          ch = (mb->fcc)[ch];
        if (ch == fc) RRETURN(MATCH_NOMATCH);
        }
      }
+
+    /* UCP without UTF is as above, but with one character per code unit. */
+
+    else if (ucp)
+      {
+      uint32_t ch;
+      fc = UCHAR21INC(Feptr);
+      ch = Fecode[1];
+      Fecode += 2;
+
+      if (ch == fc)
+        {
+        RRETURN(MATCH_NOMATCH);  /* Caseful match */
+        }
+      else if (Fop == OP_NOTI)   /* If caseless */
+        {
+        if (ch > 127)
+          ch = UCD_OTHERCASE(ch);
+        else
+          ch = (mb->fcc)[ch];
+        if (ch == fc) RRETURN(MATCH_NOMATCH);
+        }
+      }
+
    else
 #endif  /* SUPPORT_UNICODE */
+
+    /* Neither UTF nor UCP is set */
+
      {
      uint32_t ch = Fecode[1];
-      fc = *Feptr++;
+      fc = UCHAR21INC(Feptr);
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
        RRETURN(MATCH_NOMATCH);
      Fecode += 2;
@ -1244,7 +1308,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 #endif  /* SUPPORT_UNICODE */

    /* When not in UTF mode, load a single-code-unit character. Then proceed as
-    above. */
+    above, using Unicode casing if either UTF or UCP is set. */

    Lc = *Fecode++;

@ -1253,11 +1317,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    if (Fop >= OP_STARI)
      {
 #if PCRE2_CODE_UNIT_WIDTH == 8
-      /* Lc must be < 128 in UTF-8 mode. */
+#ifdef SUPPORT_UNICODE
+      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
+      else
+#endif  /* SUPPORT_UNICODE */
+      /* Lc will be < 128 in UTF-8 mode. */
      Loc = mb->fcc[Lc];
 #else /* 16-bit & 32-bit */
 #ifdef SUPPORT_UNICODE
-      if (utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
+      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
      else
 #endif  /* SUPPORT_UNICODE */
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
@ -1490,7 +1558,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    if (Fop >= OP_NOTSTARI)     /* Caseless */
      {
 #ifdef SUPPORT_UNICODE
-      if (utf && Lc > 127)
+      if ((utf || ucp) && Lc > 127)
        Loc = UCD_OTHERCASE(Lc);
      else
 #endif /* SUPPORT_UNICODE */
@ -6045,7 +6113,6 @@ BOOL firstline;
 BOOL has_first_cu = FALSE;
 BOOL has_req_cu = FALSE;
 BOOL startline;
-BOOL utf;

 #if PCRE2_CODE_UNIT_WIDTH == 8
 BOOL memchr_not_found_first_cu = FALSE;
@ -6069,13 +6136,19 @@ PCRE2_SPTR match_partial;
 BOOL use_jit;
 #endif

+/* This flag is needed even when Unicode is not supported for convenience
+(it is used by the IS_NEWLINE macro). */
+
+BOOL utf = FALSE;
+
 #ifdef SUPPORT_UNICODE
+BOOL ucp = FALSE;
 BOOL allow_invalid;
 uint32_t fragment_options = 0;
 #ifdef SUPPORT_JIT
 BOOL jit_checked_utf = FALSE;
 #endif
-#endif
+#endif  /* SUPPORT_UNICODE */

 PCRE2_SIZE frame_size;

@ -6091,7 +6164,8 @@ proves to be too small, it is replaced by a larger one on the heap. To get a
 vector of the size required that is aligned for pointers, allocate it as a
 vector of pointers. */

-PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
+PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
+    PCRE2_KEEP_UNINITIALIZED;
 mb->stack_frames = (heapframe *)stack_frames_vector;

 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
@ -6147,12 +6221,13 @@ use_jit = (re->executable_jit != NULL &&
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
 #endif

-/* Initialize UTF parameters. */
+/* Initialize UTF/UCP parameters. */

-utf = (re->overall_options & PCRE2_UTF) != 0;
 #ifdef SUPPORT_UNICODE
+utf = (re->overall_options & PCRE2_UTF) != 0;
 allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
-#endif
+ucp = (re->overall_options & PCRE2_UCP) != 0;
+#endif  /* SUPPORT_UNICODE */

 /* Convert the partial matching flags into an integer. */

@ -6589,9 +6664,13 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
    {
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
-    if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
+#else
+    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
 #endif
+#endif  /* SUPPORT_UNICODE */
    }
  }
 else
@ -6607,9 +6686,13 @@ if ((re->flags & PCRE2_LASTSET) != 0)
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
    {
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
-    if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
+#else
+    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
 #endif
+#endif  /* SUPPORT_UNICODE */
    }
  }

@ -6756,15 +6839,16 @@ for(;;)
 #endif
          }

-        /* If we can't find the required code unit, having reached the true end
-        of the subject, break the bumpalong loop, to force a match failure,
-        except when doing partial matching, when we let the next cycle run at
-        the end of the subject. To see why, consider the pattern /(?<=abc)def/,
-        which partially matches "abc", even though the string does not contain
-        the starting character "d". If we have not reached the true end of the
-        subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
-        we also let the cycle run, because the matching string is legitimately
-        allowed to start with the first code unit of a newline. */
+        /* If we can't find the required first code unit, having reached the
+        true end of the subject, break the bumpalong loop, to force a match
+        failure, except when doing partial matching, when we let the next cycle
+        run at the end of the subject. To see why, consider the pattern
+        /(?<=abc)def/, which partially matches "abc", even though the string
+        does not contain the starting character "d". If we have not reached the
+        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
+        temporarily modified) we also let the cycle run, because the matching
+        string is legitimately allowed to start with the first code unit of a
+        newline. */

        if (mb->partial == 0 && start_match >= mb->end_subject)
          {
--- a/pcre2/src/pcre2_serialize.c
+++ b/pcre2/src/pcre2_serialize.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -90,7 +90,7 @@ if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
 if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;

 /* Compute total size. */
-total_size = sizeof(pcre2_serialized_data) + tables_length;
+total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
 tables = NULL;

 for (i = 0; i < number_of_codes; i++)
@ -121,8 +121,8 @@ data->number_of_codes = number_of_codes;

 /* Copy all compiled code data. */
 dst_bytes = bytes + sizeof(pcre2_serialized_data);
-memcpy(dst_bytes, tables, tables_length);
-dst_bytes += tables_length;
+memcpy(dst_bytes, tables, TABLES_LENGTH);
+dst_bytes += TABLES_LENGTH;

 for (i = 0; i < number_of_codes; i++)
  {
@ -143,6 +143,8 @@ for (i = 0; i < number_of_codes; i++)
    sizeof(pcre2_memctl));
  (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, 
    sizeof(void *));
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
+    sizeof(void *));        
 
  dst_bytes += re->blocksize;
  }
@ -187,12 +189,12 @@ src_bytes = bytes + sizeof(pcre2_serialized_data);
 /* Decode tables. The reference count for the tables is stored immediately
 following them. */

-tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
+tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
 if (tables == NULL) return PCRE2_ERROR_NOMEMORY;

-memcpy(tables, src_bytes, tables_length);
-*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
-src_bytes += tables_length;
+memcpy(tables, src_bytes, TABLES_LENGTH);
+*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
+src_bytes += TABLES_LENGTH;

 /* Decode the byte stream. We must not try to read the size from the compiled
 code block in the stream, because it might be unaligned, which causes errors on
@ -238,6 +240,7 @@ for (i = 0; i < number_of_codes; i++)
  /* At the moment only one table is supported. */

  dst_re->tables = tables;
+  dst_re->executable_jit = NULL;
  dst_re->flags |= PCRE2_DEREF_TABLES;

  codes[i] = dst_re;
--- a/pcre2/src/pcre2_study.c
+++ b/pcre2/src/pcre2_study.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */

 /* Returns from set_start_bits() */

-enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
+enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };


 /*************************************************
@ -772,15 +772,19 @@ Arguments:
  p             points to the first code unit of the character
  caseless      TRUE if caseless
  utf           TRUE for UTF mode
+  ucp           TRUE for UCP mode

 Returns:        pointer after the character
 */

 static PCRE2_SPTR
-set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
+set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
+  BOOL ucp)
 {
 uint32_t c = *p++;   /* First code unit */
-(void)utf;           /* Stop compiler warning when UTF not supported */
+
+(void)utf;           /* Stop compiler warnings when UTF not supported */
+(void)ucp;

 /* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
 0xff. */
@ -810,22 +814,26 @@ if (utf)
 if (caseless)
  {
 #ifdef SUPPORT_UNICODE
+  if (utf || ucp)
+    {
+    c = UCD_OTHERCASE(c);
+#if PCRE2_CODE_UNIT_WIDTH == 8
    if (utf)
      {
-#if PCRE2_CODE_UNIT_WIDTH == 8
      PCRE2_UCHAR buff[6];
-    c = UCD_OTHERCASE(c);
      (void)PRIV(ord2utf)(c, buff);
      SET_BIT(buff[0]);
+      }
+    else if (c < 256) SET_BIT(c);
 #else  /* 16-bit or 32-bit mode */
-    c = UCD_OTHERCASE(c);
    if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
 #endif
    }
+
  else
 #endif  /* SUPPORT_UNICODE */

-  /* Not UTF */
+  /* Not UTF or UCP */

  if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
  }
@ -924,19 +932,26 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
 must continue at the outer level to find at least one mandatory code unit. At
 the outermost level, this function fails unless the result is SSB_DONE.

+We restrict recursion (for nested groups) to 1000 to avoid stack overflow
+issues.
+
 Arguments:
  re           points to the compiled regex block
  code         points to an expression
  utf          TRUE if in UTF mode
+  ucp          TRUE if in UCP mode
+  depthptr     pointer to recurse depth

 Returns:       SSB_FAIL     => Failed to find any starting code units
               SSB_DONE     => Found mandatory starting code units
               SSB_CONTINUE => Found optional starting code units
               SSB_UNKNOWN  => Hit an unrecognized opcode
+               SSB_TOODEEP  => Recursion is too deep
 */

 static int
-set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
+set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp,
+  int *depthptr)
 {
 uint32_t c;
 int yield = SSB_DONE;
@ -947,6 +962,9 @@ int table_limit = utf? 16:32;
 int table_limit = 32;
 #endif

+*depthptr += 1;
+if (*depthptr > 1000) return SSB_TOODEEP;
+
 do
  {
  BOOL try_next = TRUE;
@ -1103,13 +1121,17 @@ do
      case OP_SCRIPT_RUN:
      case OP_ASSERT:
      case OP_ASSERT_NA:
-      rc = set_start_bits(re, tcode, utf);
-      if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
-      if (rc == SSB_DONE) try_next = FALSE; else
+      rc = set_start_bits(re, tcode, utf, ucp, depthptr);
+      if (rc == SSB_DONE)
+        {
+        try_next = FALSE;
+        }
+      else if (rc == SSB_CONTINUE)
        {
        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
        tcode += 1 + LINK_SIZE;
        }
+      else return rc;   /* FAIL, UNKNOWN, or TOODEEP */
      break;

      /* If we hit ALT or KET, it means we haven't found anything mandatory in
@ -1155,8 +1177,8 @@ do
      case OP_BRAZERO:
      case OP_BRAMINZERO:
      case OP_BRAPOSZERO:
-      rc = set_start_bits(re, ++tcode, utf);
-      if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
+      rc = set_start_bits(re, ++tcode, utf, ucp, depthptr);
+      if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
      tcode += 1 + LINK_SIZE;
      break;
@ -1177,7 +1199,7 @@ do
      case OP_QUERY:
      case OP_MINQUERY:
      case OP_POSQUERY:
-      tcode = set_table_bit(re, tcode + 1, FALSE, utf);
+      tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp);
      break;

      case OP_STARI:
@ -1186,7 +1208,7 @@ do
      case OP_QUERYI:
      case OP_MINQUERYI:
      case OP_POSQUERYI:
-      tcode = set_table_bit(re, tcode + 1, TRUE, utf);
+      tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp);
      break;

      /* Single-char upto sets the bit and tries the next */
@ -1194,13 +1216,13 @@ do
      case OP_UPTO:
      case OP_MINUPTO:
      case OP_POSUPTO:
-      tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf);
+      tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp);
      break;

      case OP_UPTOI:
      case OP_MINUPTOI:
      case OP_POSUPTOI:
-      tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf);
+      tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp);
      break;

      /* At least one single char sets the bit and stops */
@ -1212,7 +1234,7 @@ do
      case OP_PLUS:
      case OP_MINPLUS:
      case OP_POSPLUS:
-      (void)set_table_bit(re, tcode + 1, FALSE, utf);
+      (void)set_table_bit(re, tcode + 1, FALSE, utf, ucp);
      try_next = FALSE;
      break;

@ -1223,7 +1245,7 @@ do
      case OP_PLUSI:
      case OP_MINPLUSI:
      case OP_POSPLUSI:
-      (void)set_table_bit(re, tcode + 1, TRUE, utf);
+      (void)set_table_bit(re, tcode + 1, TRUE, utf, ucp);
      try_next = FALSE;
      break;

@ -1652,6 +1674,7 @@ PRIV(study)(pcre2_real_code *re)
 int count = 0;
 PCRE2_UCHAR *code;
 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
+BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;

 /* Find start of compiled code */

@ -1664,7 +1687,8 @@ code units. */

 if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
  {
-  int rc = set_start_bits(re, code, utf);
+  int depth = 0;
+  int rc = set_start_bits(re, code, utf, ucp, &depth);
  if (rc == SSB_UNKNOWN) return 1;

  /* If a list of starting code units was set up, scan the list to see if only
@ -1712,27 +1736,27 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
          }

        /* c contains the code unit value, in the range 0-255. In 8-bit UTF
-        mode, only values < 128 can be used. */
+        mode, only values < 128 can be used. In all the other cases, c is a
+        character value. */

 #if PCRE2_CODE_UNIT_WIDTH == 8
-        if (c > 127) goto DONE;
+        if (utf && c > 127) goto DONE;
 #endif
-        if (a < 0) a = c;   /* First one found */
+        if (a < 0) a = c;   /* First one found, save in a */
        else if (b < 0)     /* Second one found */
          {
          int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);

 #ifdef SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
-          if (utf && UCD_CASESET(c) != 0) goto DONE;   /* Multiple case set */
-#else   /* 16-bit or 32-bit */
+          if (utf || ucp)
+            {
            if (UCD_CASESET(c) != 0) goto DONE;     /* Multiple case set */
-          if (utf && c > 127) d = UCD_OTHERCASE(c);
-#endif  /* Code width */
+            if (c > 127) d = UCD_OTHERCASE(c);
+            }
 #endif  /* SUPPORT_UNICODE */

-          if (d != a) goto DONE;   /* Not other case of a */
-          b = c;
+          if (d != a) goto DONE;   /* Not the other case of a */
+          b = c;                   /* Save second in b */
          }
        else goto DONE;   /* More than two characters found */
        }
--- a/pcre2/src/pcre2_substitute.c
+++ b/pcre2/src/pcre2_substitute.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -49,8 +49,9 @@ POSSIBILITY OF SUCH DAMAGE.

 #define SUBSTITUTE_OPTIONS \
  (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
-   PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
-   PCRE2_SUBSTITUTE_UNSET_EMPTY)
+   PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
+   PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
+   PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)



@ -194,6 +195,7 @@ overflow, either give an error immediately, or keep on, accumulating the
 length. */

 #define CHECKMEMCPY(from,length) \
+  { \
  if (!overflowed && lengthleft < length) \
    { \
    if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
@ -209,6 +211,7 @@ length. */
    memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
    buff_offset += length; \
    lengthleft -= length; \
+    } \
  }

 /* Here's the function */
@ -226,11 +229,14 @@ int forcecasereset = 0;
 uint32_t ovector_count;
 uint32_t goptions = 0;
 uint32_t suboptions;
-BOOL match_data_created = FALSE;
-BOOL literal = FALSE;
+pcre2_match_data *internal_match_data = NULL;
+BOOL escaped_literal = FALSE;
 BOOL overflowed = FALSE;
+BOOL use_existing_match;
+BOOL replacement_only;
 #ifdef SUPPORT_UNICODE
 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
+BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
 #endif
 PCRE2_UCHAR temp[6];
 PCRE2_SPTR ptr;
@ -254,17 +260,48 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */
 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
  return PCRE2_ERROR_BADOPTION;

-/* If no match data block is provided, create one. */
+/* Check for using a match that has already happened. Note that the subject
+pointer in the match data may be NULL after a no-match. */
+
+use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
+replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
+
+/* If starting from an existing match, there must be an externally provided
+match data block. We create an internal match_data block in two cases: (a) an
+external one is not supplied (and we are not starting from an existing match);
+(b) an existing match is to be used for the first substitution. In the latter
+case, we copy the existing match into the internal block. This ensures that no
+changes are made to the existing match data block. */

 if (match_data == NULL)
+  {
+  pcre2_general_context *gcontext;
+  if (use_existing_match) return PCRE2_ERROR_NULL;
+  gcontext = (mcontext == NULL)?
+    (pcre2_general_context *)code :
+    (pcre2_general_context *)mcontext;
+  match_data = internal_match_data =
+    pcre2_match_data_create_from_pattern(code, gcontext);
+  if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
+  }
+
+else if (use_existing_match)
  {
  pcre2_general_context *gcontext = (mcontext == NULL)?
    (pcre2_general_context *)code :
    (pcre2_general_context *)mcontext;
-  match_data = pcre2_match_data_create_from_pattern(code, gcontext);
-  if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
-  match_data_created = TRUE;
+  int pairs = (code->top_bracket + 1 < match_data->oveccount)?
+    code->top_bracket + 1 : match_data->oveccount;
+  internal_match_data = pcre2_match_data_create(match_data->oveccount,
+    gcontext);
+  if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
+  memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
+    + 2*pairs*sizeof(PCRE2_SIZE));
+  match_data = internal_match_data;
  }
+
+/* Remember ovector details */
+
 ovector = pcre2_get_ovector_pointer(match_data);
 ovector_count = pcre2_get_ovector_count(match_data);

@ -286,7 +323,7 @@ repend = replacement + rlength;
 #ifdef SUPPORT_UNICODE
 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  {
-  rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
+  rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
  if (rc != 0)
    {
    match_data->leftchar = 0;
@ -300,7 +337,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
 suboptions = options & SUBSTITUTE_OPTIONS;
 options &= ~SUBSTITUTE_OPTIONS;

-/* Copy up to the start offset */
+/* Error if the start match offset is greater than the length of the subject. */

 if (start_offset > length)
  {
@ -308,9 +345,13 @@ if (start_offset > length)
  rc = PCRE2_ERROR_BADOFFSET;
  goto EXIT;
  }
-CHECKMEMCPY(subject, start_offset);

-/* Loop for global substituting. */
+/* Copy up to the start offset, unless only the replacement is required. */
+
+if (!replacement_only) CHECKMEMCPY(subject, start_offset);
+
+/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
+match is taken from the match_data that was passed in. */

 subs = 0;
 do
@ -318,7 +359,12 @@ do
  PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
  uint32_t ptrstackptr = 0;

-  rc = pcre2_match(code, subject, length, start_offset, options|goptions,
+  if (use_existing_match)
+    {
+    rc = match_data->rc;
+    use_existing_match = FALSE;
+    }
+  else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
    match_data, mcontext);

 #ifdef SUPPORT_UNICODE
@ -364,11 +410,11 @@ do
 #endif
      }

-    /* Copy what we have advanced past, reset the special global options, and
-    continue to the next match. */
+    /* Copy what we have advanced past (unless not required), reset the special
+    global options, and continue to the next match. */

    fraglength = start_offset - save_start;
-    CHECKMEMCPY(subject + save_start, fraglength);
+    if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
    goptions = 0;
    continue;
    }
@ -412,21 +458,30 @@ do
    }
  subs++;

-  /* Copy the text leading up to the match, and remember where the insert
-  begins and how many ovector pairs are set. */
+  /* Copy the text leading up to the match (unless not required), and remember
+  where the insert begins and how many ovector pairs are set. */

  if (rc == 0) rc = ovector_count;
  fraglength = ovector[0] - start_offset;
-  CHECKMEMCPY(subject + start_offset, fraglength);
+  if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
  scb.output_offsets[0] = buff_offset;
  scb.oveccount = rc;

-  /* Process the replacement string. Literal mode is set by \Q, but only in
-  extended mode when backslashes are being interpreted. In extended mode we
-  must handle nested substrings that are to be reprocessed. */
+  /* Process the replacement string. If the entire replacement is literal, just
+  copy it with length check. */

  ptr = replacement;
-  for (;;)
+  if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
+    {
+    CHECKMEMCPY(ptr, rlength);
+    }
+
+  /* Within a non-literal replacement, which must be scanned character by
+  character, local literal mode can be set by \Q, but only in extended mode
+  when backslashes are being interpreted. In extended mode we must handle
+  nested substrings that are to be reprocessed. */
+
+  else for (;;)
    {
    uint32_t ch;
    unsigned int chlen;
@ -443,11 +498,11 @@ do

    /* Handle the next character */

-    if (literal)
+    if (escaped_literal)
      {
      if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
        {
-        literal = FALSE;
+        escaped_literal = FALSE;
        ptr += 2;
        continue;
        }
@ -704,7 +759,7 @@ do
          if (forcecase != 0)
            {
 #ifdef SUPPORT_UNICODE
-            if (utf)
+            if (utf || ucp)
              {
              uint32_t type = UCD_CHARTYPE(ch);
              if (PRIV(ucp_gentype)[type] == ucp_L &&
@ -784,7 +839,7 @@ do
        continue;

        case ESC_Q:
-        literal = TRUE;
+        escaped_literal = TRUE;
        continue;

        case 0:      /* Data character */
@ -806,7 +861,7 @@ do
      if (forcecase != 0)
        {
 #ifdef SUPPORT_UNICODE
-        if (utf)
+        if (utf || ucp)
          {
          uint32_t type = UCD_CHARTYPE(ch);
          if (PRIV(ucp_gentype)[type] == ucp_L &&
@ -855,7 +910,7 @@ do

      buff_offset -= newlength;
      lengthleft += newlength;
-      CHECKMEMCPY(subject + ovector[0], oldlength);    
+      if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);

      /* A negative return means do not do any more. */

@ -864,8 +919,9 @@ do
    }

  /* Save the details of this match. See above for how this data is used. If we
-  matched an empty string, do the magic for global matches. Finally, update the
-  start offset to point to the rest of the subject string. */
+  matched an empty string, do the magic for global matches. Update the start
+  offset to point to the rest of the subject string. If we re-used an existing
+  match for the first match, switch to the internal match data block. */

  ovecsave[0] = ovector[0];
  ovecsave[1] = ovector[1];
@ -876,12 +932,17 @@ do
  start_offset = ovector[1];
  } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */

-/* Copy the rest of the subject. */
+/* Copy the rest of the subject unless not required, and terminate the output
+with a binary zero. */
+
+if (!replacement_only)
+  {
+  fraglength = length - start_offset;
+  CHECKMEMCPY(subject + start_offset, fraglength);
+  }

-fraglength = length - start_offset;
-CHECKMEMCPY(subject + start_offset, fraglength);
 temp[0] = 0;
-CHECKMEMCPY(temp , 1);
+CHECKMEMCPY(temp, 1);

 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
 and matching has carried on after a full buffer, in order to compute the length
@ -903,7 +964,7 @@ else
  }

 EXIT:
-if (match_data_created) pcre2_match_data_free(match_data);
+if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
  else match_data->rc = rc;
 return rc;

--- a/pcre2/src/pcre2_tables.c
+++ b/pcre2/src/pcre2_tables.c
@ -265,6 +265,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
 #define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
 #define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
+#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
 #define STRING_Cn0 STR_C STR_n "\0"
 #define STRING_Co0 STR_C STR_o "\0"
 #define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
@ -275,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
 #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
 #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0"
 #define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
 #define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
 #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
@ -306,6 +308,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
 #define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
 #define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
+#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0"
 #define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
 #define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
 #define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
@ -429,6 +432,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Xsp0 STR_X STR_s STR_p "\0"
 #define STRING_Xuc0 STR_X STR_u STR_c "\0"
 #define STRING_Xwd0 STR_X STR_w STR_d "\0"
+#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0"
 #define STRING_Yi0 STR_Y STR_i "\0"
 #define STRING_Z0 STR_Z "\0"
 #define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
@ -464,6 +468,7 @@ const char PRIV(utt_names)[] =
  STRING_Chakma0
  STRING_Cham0
  STRING_Cherokee0
+  STRING_Chorasmian0
  STRING_Cn0
  STRING_Co0
  STRING_Common0
@ -474,6 +479,7 @@ const char PRIV(utt_names)[] =
  STRING_Cyrillic0
  STRING_Deseret0
  STRING_Devanagari0
+  STRING_Dives_Akuru0
  STRING_Dogra0
  STRING_Duployan0
  STRING_Egyptian_Hieroglyphs0
@ -505,6 +511,7 @@ const char PRIV(utt_names)[] =
  STRING_Katakana0
  STRING_Kayah_Li0
  STRING_Kharoshthi0
+  STRING_Khitan_Small_Script0
  STRING_Khmer0
  STRING_Khojki0
  STRING_Khudawadi0
@ -628,6 +635,7 @@ const char PRIV(utt_names)[] =
  STRING_Xsp0
  STRING_Xuc0
  STRING_Xwd0
+  STRING_Yezidi0
  STRING_Yi0
  STRING_Z0
  STRING_Zanabazar_Square0
@ -663,176 +671,180 @@ const ucp_type_table PRIV(utt)[] = {
  { 203, PT_SC, ucp_Chakma },
  { 210, PT_SC, ucp_Cham },
  { 215, PT_SC, ucp_Cherokee },
-  { 224, PT_PC, ucp_Cn },
-  { 227, PT_PC, ucp_Co },
-  { 230, PT_SC, ucp_Common },
-  { 237, PT_SC, ucp_Coptic },
-  { 244, PT_PC, ucp_Cs },
-  { 247, PT_SC, ucp_Cuneiform },
-  { 257, PT_SC, ucp_Cypriot },
-  { 265, PT_SC, ucp_Cyrillic },
-  { 274, PT_SC, ucp_Deseret },
-  { 282, PT_SC, ucp_Devanagari },
-  { 293, PT_SC, ucp_Dogra },
-  { 299, PT_SC, ucp_Duployan },
-  { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 329, PT_SC, ucp_Elbasan },
-  { 337, PT_SC, ucp_Elymaic },
-  { 345, PT_SC, ucp_Ethiopic },
-  { 354, PT_SC, ucp_Georgian },
-  { 363, PT_SC, ucp_Glagolitic },
-  { 374, PT_SC, ucp_Gothic },
-  { 381, PT_SC, ucp_Grantha },
-  { 389, PT_SC, ucp_Greek },
-  { 395, PT_SC, ucp_Gujarati },
-  { 404, PT_SC, ucp_Gunjala_Gondi },
-  { 418, PT_SC, ucp_Gurmukhi },
-  { 427, PT_SC, ucp_Han },
-  { 431, PT_SC, ucp_Hangul },
-  { 438, PT_SC, ucp_Hanifi_Rohingya },
-  { 454, PT_SC, ucp_Hanunoo },
-  { 462, PT_SC, ucp_Hatran },
-  { 469, PT_SC, ucp_Hebrew },
-  { 476, PT_SC, ucp_Hiragana },
-  { 485, PT_SC, ucp_Imperial_Aramaic },
-  { 502, PT_SC, ucp_Inherited },
-  { 512, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 534, PT_SC, ucp_Inscriptional_Parthian },
-  { 557, PT_SC, ucp_Javanese },
-  { 566, PT_SC, ucp_Kaithi },
-  { 573, PT_SC, ucp_Kannada },
-  { 581, PT_SC, ucp_Katakana },
-  { 590, PT_SC, ucp_Kayah_Li },
-  { 599, PT_SC, ucp_Kharoshthi },
-  { 610, PT_SC, ucp_Khmer },
-  { 616, PT_SC, ucp_Khojki },
-  { 623, PT_SC, ucp_Khudawadi },
-  { 633, PT_GC, ucp_L },
-  { 635, PT_LAMP, 0 },
-  { 638, PT_SC, ucp_Lao },
-  { 642, PT_SC, ucp_Latin },
-  { 648, PT_SC, ucp_Lepcha },
-  { 655, PT_SC, ucp_Limbu },
-  { 661, PT_SC, ucp_Linear_A },
-  { 670, PT_SC, ucp_Linear_B },
-  { 679, PT_SC, ucp_Lisu },
-  { 684, PT_PC, ucp_Ll },
-  { 687, PT_PC, ucp_Lm },
-  { 690, PT_PC, ucp_Lo },
-  { 693, PT_PC, ucp_Lt },
-  { 696, PT_PC, ucp_Lu },
-  { 699, PT_SC, ucp_Lycian },
-  { 706, PT_SC, ucp_Lydian },
-  { 713, PT_GC, ucp_M },
-  { 715, PT_SC, ucp_Mahajani },
-  { 724, PT_SC, ucp_Makasar },
-  { 732, PT_SC, ucp_Malayalam },
-  { 742, PT_SC, ucp_Mandaic },
-  { 750, PT_SC, ucp_Manichaean },
-  { 761, PT_SC, ucp_Marchen },
-  { 769, PT_SC, ucp_Masaram_Gondi },
-  { 783, PT_PC, ucp_Mc },
-  { 786, PT_PC, ucp_Me },
-  { 789, PT_SC, ucp_Medefaidrin },
-  { 801, PT_SC, ucp_Meetei_Mayek },
-  { 814, PT_SC, ucp_Mende_Kikakui },
-  { 828, PT_SC, ucp_Meroitic_Cursive },
-  { 845, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 866, PT_SC, ucp_Miao },
-  { 871, PT_PC, ucp_Mn },
-  { 874, PT_SC, ucp_Modi },
-  { 879, PT_SC, ucp_Mongolian },
-  { 889, PT_SC, ucp_Mro },
-  { 893, PT_SC, ucp_Multani },
-  { 901, PT_SC, ucp_Myanmar },
-  { 909, PT_GC, ucp_N },
-  { 911, PT_SC, ucp_Nabataean },
-  { 921, PT_SC, ucp_Nandinagari },
-  { 933, PT_PC, ucp_Nd },
-  { 936, PT_SC, ucp_New_Tai_Lue },
-  { 948, PT_SC, ucp_Newa },
-  { 953, PT_SC, ucp_Nko },
-  { 957, PT_PC, ucp_Nl },
-  { 960, PT_PC, ucp_No },
-  { 963, PT_SC, ucp_Nushu },
-  { 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
-  { 992, PT_SC, ucp_Ogham },
-  { 998, PT_SC, ucp_Ol_Chiki },
-  { 1007, PT_SC, ucp_Old_Hungarian },
-  { 1021, PT_SC, ucp_Old_Italic },
-  { 1032, PT_SC, ucp_Old_North_Arabian },
-  { 1050, PT_SC, ucp_Old_Permic },
-  { 1061, PT_SC, ucp_Old_Persian },
-  { 1073, PT_SC, ucp_Old_Sogdian },
-  { 1085, PT_SC, ucp_Old_South_Arabian },
-  { 1103, PT_SC, ucp_Old_Turkic },
-  { 1114, PT_SC, ucp_Oriya },
-  { 1120, PT_SC, ucp_Osage },
-  { 1126, PT_SC, ucp_Osmanya },
-  { 1134, PT_GC, ucp_P },
-  { 1136, PT_SC, ucp_Pahawh_Hmong },
-  { 1149, PT_SC, ucp_Palmyrene },
-  { 1159, PT_SC, ucp_Pau_Cin_Hau },
-  { 1171, PT_PC, ucp_Pc },
-  { 1174, PT_PC, ucp_Pd },
-  { 1177, PT_PC, ucp_Pe },
-  { 1180, PT_PC, ucp_Pf },
-  { 1183, PT_SC, ucp_Phags_Pa },
-  { 1192, PT_SC, ucp_Phoenician },
-  { 1203, PT_PC, ucp_Pi },
-  { 1206, PT_PC, ucp_Po },
-  { 1209, PT_PC, ucp_Ps },
-  { 1212, PT_SC, ucp_Psalter_Pahlavi },
-  { 1228, PT_SC, ucp_Rejang },
-  { 1235, PT_SC, ucp_Runic },
-  { 1241, PT_GC, ucp_S },
-  { 1243, PT_SC, ucp_Samaritan },
-  { 1253, PT_SC, ucp_Saurashtra },
-  { 1264, PT_PC, ucp_Sc },
-  { 1267, PT_SC, ucp_Sharada },
-  { 1275, PT_SC, ucp_Shavian },
-  { 1283, PT_SC, ucp_Siddham },
-  { 1291, PT_SC, ucp_SignWriting },
-  { 1303, PT_SC, ucp_Sinhala },
-  { 1311, PT_PC, ucp_Sk },
-  { 1314, PT_PC, ucp_Sm },
-  { 1317, PT_PC, ucp_So },
-  { 1320, PT_SC, ucp_Sogdian },
-  { 1328, PT_SC, ucp_Sora_Sompeng },
-  { 1341, PT_SC, ucp_Soyombo },
-  { 1349, PT_SC, ucp_Sundanese },
-  { 1359, PT_SC, ucp_Syloti_Nagri },
-  { 1372, PT_SC, ucp_Syriac },
-  { 1379, PT_SC, ucp_Tagalog },
-  { 1387, PT_SC, ucp_Tagbanwa },
-  { 1396, PT_SC, ucp_Tai_Le },
-  { 1403, PT_SC, ucp_Tai_Tham },
-  { 1412, PT_SC, ucp_Tai_Viet },
-  { 1421, PT_SC, ucp_Takri },
-  { 1427, PT_SC, ucp_Tamil },
-  { 1433, PT_SC, ucp_Tangut },
-  { 1440, PT_SC, ucp_Telugu },
-  { 1447, PT_SC, ucp_Thaana },
-  { 1454, PT_SC, ucp_Thai },
-  { 1459, PT_SC, ucp_Tibetan },
-  { 1467, PT_SC, ucp_Tifinagh },
-  { 1476, PT_SC, ucp_Tirhuta },
-  { 1484, PT_SC, ucp_Ugaritic },
-  { 1493, PT_SC, ucp_Unknown },
-  { 1501, PT_SC, ucp_Vai },
-  { 1505, PT_SC, ucp_Wancho },
-  { 1512, PT_SC, ucp_Warang_Citi },
-  { 1524, PT_ALNUM, 0 },
-  { 1528, PT_PXSPACE, 0 },
-  { 1532, PT_SPACE, 0 },
-  { 1536, PT_UCNC, 0 },
-  { 1540, PT_WORD, 0 },
-  { 1544, PT_SC, ucp_Yi },
-  { 1547, PT_GC, ucp_Z },
-  { 1549, PT_SC, ucp_Zanabazar_Square },
-  { 1566, PT_PC, ucp_Zl },
-  { 1569, PT_PC, ucp_Zp },
-  { 1572, PT_PC, ucp_Zs }
+  { 224, PT_SC, ucp_Chorasmian },
+  { 235, PT_PC, ucp_Cn },
+  { 238, PT_PC, ucp_Co },
+  { 241, PT_SC, ucp_Common },
+  { 248, PT_SC, ucp_Coptic },
+  { 255, PT_PC, ucp_Cs },
+  { 258, PT_SC, ucp_Cuneiform },
+  { 268, PT_SC, ucp_Cypriot },
+  { 276, PT_SC, ucp_Cyrillic },
+  { 285, PT_SC, ucp_Deseret },
+  { 293, PT_SC, ucp_Devanagari },
+  { 304, PT_SC, ucp_Dives_Akuru },
+  { 316, PT_SC, ucp_Dogra },
+  { 322, PT_SC, ucp_Duployan },
+  { 331, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 352, PT_SC, ucp_Elbasan },
+  { 360, PT_SC, ucp_Elymaic },
+  { 368, PT_SC, ucp_Ethiopic },
+  { 377, PT_SC, ucp_Georgian },
+  { 386, PT_SC, ucp_Glagolitic },
+  { 397, PT_SC, ucp_Gothic },
+  { 404, PT_SC, ucp_Grantha },
+  { 412, PT_SC, ucp_Greek },
+  { 418, PT_SC, ucp_Gujarati },
+  { 427, PT_SC, ucp_Gunjala_Gondi },
+  { 441, PT_SC, ucp_Gurmukhi },
+  { 450, PT_SC, ucp_Han },
+  { 454, PT_SC, ucp_Hangul },
+  { 461, PT_SC, ucp_Hanifi_Rohingya },
+  { 477, PT_SC, ucp_Hanunoo },
+  { 485, PT_SC, ucp_Hatran },
+  { 492, PT_SC, ucp_Hebrew },
+  { 499, PT_SC, ucp_Hiragana },
+  { 508, PT_SC, ucp_Imperial_Aramaic },
+  { 525, PT_SC, ucp_Inherited },
+  { 535, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 557, PT_SC, ucp_Inscriptional_Parthian },
+  { 580, PT_SC, ucp_Javanese },
+  { 589, PT_SC, ucp_Kaithi },
+  { 596, PT_SC, ucp_Kannada },
+  { 604, PT_SC, ucp_Katakana },
+  { 613, PT_SC, ucp_Kayah_Li },
+  { 622, PT_SC, ucp_Kharoshthi },
+  { 633, PT_SC, ucp_Khitan_Small_Script },
+  { 653, PT_SC, ucp_Khmer },
+  { 659, PT_SC, ucp_Khojki },
+  { 666, PT_SC, ucp_Khudawadi },
+  { 676, PT_GC, ucp_L },
+  { 678, PT_LAMP, 0 },
+  { 681, PT_SC, ucp_Lao },
+  { 685, PT_SC, ucp_Latin },
+  { 691, PT_SC, ucp_Lepcha },
+  { 698, PT_SC, ucp_Limbu },
+  { 704, PT_SC, ucp_Linear_A },
+  { 713, PT_SC, ucp_Linear_B },
+  { 722, PT_SC, ucp_Lisu },
+  { 727, PT_PC, ucp_Ll },
+  { 730, PT_PC, ucp_Lm },
+  { 733, PT_PC, ucp_Lo },
+  { 736, PT_PC, ucp_Lt },
+  { 739, PT_PC, ucp_Lu },
+  { 742, PT_SC, ucp_Lycian },
+  { 749, PT_SC, ucp_Lydian },
+  { 756, PT_GC, ucp_M },
+  { 758, PT_SC, ucp_Mahajani },
+  { 767, PT_SC, ucp_Makasar },
+  { 775, PT_SC, ucp_Malayalam },
+  { 785, PT_SC, ucp_Mandaic },
+  { 793, PT_SC, ucp_Manichaean },
+  { 804, PT_SC, ucp_Marchen },
+  { 812, PT_SC, ucp_Masaram_Gondi },
+  { 826, PT_PC, ucp_Mc },
+  { 829, PT_PC, ucp_Me },
+  { 832, PT_SC, ucp_Medefaidrin },
+  { 844, PT_SC, ucp_Meetei_Mayek },
+  { 857, PT_SC, ucp_Mende_Kikakui },
+  { 871, PT_SC, ucp_Meroitic_Cursive },
+  { 888, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 909, PT_SC, ucp_Miao },
+  { 914, PT_PC, ucp_Mn },
+  { 917, PT_SC, ucp_Modi },
+  { 922, PT_SC, ucp_Mongolian },
+  { 932, PT_SC, ucp_Mro },
+  { 936, PT_SC, ucp_Multani },
+  { 944, PT_SC, ucp_Myanmar },
+  { 952, PT_GC, ucp_N },
+  { 954, PT_SC, ucp_Nabataean },
+  { 964, PT_SC, ucp_Nandinagari },
+  { 976, PT_PC, ucp_Nd },
+  { 979, PT_SC, ucp_New_Tai_Lue },
+  { 991, PT_SC, ucp_Newa },
+  { 996, PT_SC, ucp_Nko },
+  { 1000, PT_PC, ucp_Nl },
+  { 1003, PT_PC, ucp_No },
+  { 1006, PT_SC, ucp_Nushu },
+  { 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 1035, PT_SC, ucp_Ogham },
+  { 1041, PT_SC, ucp_Ol_Chiki },
+  { 1050, PT_SC, ucp_Old_Hungarian },
+  { 1064, PT_SC, ucp_Old_Italic },
+  { 1075, PT_SC, ucp_Old_North_Arabian },
+  { 1093, PT_SC, ucp_Old_Permic },
+  { 1104, PT_SC, ucp_Old_Persian },
+  { 1116, PT_SC, ucp_Old_Sogdian },
+  { 1128, PT_SC, ucp_Old_South_Arabian },
+  { 1146, PT_SC, ucp_Old_Turkic },
+  { 1157, PT_SC, ucp_Oriya },
+  { 1163, PT_SC, ucp_Osage },
+  { 1169, PT_SC, ucp_Osmanya },
+  { 1177, PT_GC, ucp_P },
+  { 1179, PT_SC, ucp_Pahawh_Hmong },
+  { 1192, PT_SC, ucp_Palmyrene },
+  { 1202, PT_SC, ucp_Pau_Cin_Hau },
+  { 1214, PT_PC, ucp_Pc },
+  { 1217, PT_PC, ucp_Pd },
+  { 1220, PT_PC, ucp_Pe },
+  { 1223, PT_PC, ucp_Pf },
+  { 1226, PT_SC, ucp_Phags_Pa },
+  { 1235, PT_SC, ucp_Phoenician },
+  { 1246, PT_PC, ucp_Pi },
+  { 1249, PT_PC, ucp_Po },
+  { 1252, PT_PC, ucp_Ps },
+  { 1255, PT_SC, ucp_Psalter_Pahlavi },
+  { 1271, PT_SC, ucp_Rejang },
+  { 1278, PT_SC, ucp_Runic },
+  { 1284, PT_GC, ucp_S },
+  { 1286, PT_SC, ucp_Samaritan },
+  { 1296, PT_SC, ucp_Saurashtra },
+  { 1307, PT_PC, ucp_Sc },
+  { 1310, PT_SC, ucp_Sharada },
+  { 1318, PT_SC, ucp_Shavian },
+  { 1326, PT_SC, ucp_Siddham },
+  { 1334, PT_SC, ucp_SignWriting },
+  { 1346, PT_SC, ucp_Sinhala },
+  { 1354, PT_PC, ucp_Sk },
+  { 1357, PT_PC, ucp_Sm },
+  { 1360, PT_PC, ucp_So },
+  { 1363, PT_SC, ucp_Sogdian },
+  { 1371, PT_SC, ucp_Sora_Sompeng },
+  { 1384, PT_SC, ucp_Soyombo },
+  { 1392, PT_SC, ucp_Sundanese },
+  { 1402, PT_SC, ucp_Syloti_Nagri },
+  { 1415, PT_SC, ucp_Syriac },
+  { 1422, PT_SC, ucp_Tagalog },
+  { 1430, PT_SC, ucp_Tagbanwa },
+  { 1439, PT_SC, ucp_Tai_Le },
+  { 1446, PT_SC, ucp_Tai_Tham },
+  { 1455, PT_SC, ucp_Tai_Viet },
+  { 1464, PT_SC, ucp_Takri },
+  { 1470, PT_SC, ucp_Tamil },
+  { 1476, PT_SC, ucp_Tangut },
+  { 1483, PT_SC, ucp_Telugu },
+  { 1490, PT_SC, ucp_Thaana },
+  { 1497, PT_SC, ucp_Thai },
+  { 1502, PT_SC, ucp_Tibetan },
+  { 1510, PT_SC, ucp_Tifinagh },
+  { 1519, PT_SC, ucp_Tirhuta },
+  { 1527, PT_SC, ucp_Ugaritic },
+  { 1536, PT_SC, ucp_Unknown },
+  { 1544, PT_SC, ucp_Vai },
+  { 1548, PT_SC, ucp_Wancho },
+  { 1555, PT_SC, ucp_Warang_Citi },
+  { 1567, PT_ALNUM, 0 },
+  { 1571, PT_PXSPACE, 0 },
+  { 1575, PT_SPACE, 0 },
+  { 1579, PT_UCNC, 0 },
+  { 1583, PT_WORD, 0 },
+  { 1587, PT_SC, ucp_Yezidi },
+  { 1594, PT_SC, ucp_Yi },
+  { 1597, PT_GC, ucp_Z },
+  { 1599, PT_SC, ucp_Zanabazar_Square },
+  { 1616, PT_PC, ucp_Zl },
+  { 1619, PT_PC, ucp_Zp },
+  { 1622, PT_PC, ucp_Zs }
 };

 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
--- a/pcre2/src/pcre2_ucd.c
+++ b/pcre2/src/pcre2_ucd.c
--- a/pcre2/src/pcre2_ucp.h
+++ b/pcre2/src/pcre2_ucp.h
@ -286,7 +286,12 @@ enum {
  ucp_Elymaic,
  ucp_Nandinagari,
  ucp_Nyiakeng_Puachue_Hmong,
-  ucp_Wancho
+  ucp_Wancho,
+  /* New for Unicode 13.0.0 */
+  ucp_Chorasmian,
+  ucp_Dives_Akuru,
+  ucp_Khitan_Small_Script,
+  ucp_Yezidi
 };

 #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
--- a/pcre2/src/pcre2_valid_utf.c
+++ b/pcre2/src/pcre2_valid_utf.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -347,7 +347,7 @@ for (p = string; length > 0; p++)
    length--;
    if ((*p & 0xfc00) != 0xdc00)
      {
-      *erroroffset = p - string;
+      *erroroffset = p - string - 1;
      return PCRE2_ERROR_UTF16_ERR2;
      }
    }