/* Secret Labs' Regular Expression Engine
 *
 * regular expression matching engine
 *
 * partial history:
 * 1999-10-24 fl   created (based on existing template matcher code)
 * 2000-03-06 fl   first alpha, sort of
 * 2000-08-01 fl   fixes for 1.6b1
 * 2000-08-07 fl   use PyOS_CheckStack() if available
 * 2000-09-20 fl   added expand method
 * 2001-03-20 fl   lots of fixes for 2.1b2
 * 2001-04-15 fl   export copyright as Python attribute, not global
 * 2001-04-28 fl   added __copy__ methods (work in progress)
 * 2001-05-14 fl   fixes for 1.5.2 compatibility
 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
 * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
 * 2001-10-21 fl   added sub/subn primitive
 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
 * 2002-11-09 fl   fixed empty sub/subn return type
 * 2003-04-18 mvl  fully support 4-byte codes
 * 2003-10-17 gn   implemented non recursive scheme
 * 2009-07-26 mrab completely re-designed matcher code
 *
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
 *
 * This version of the SRE library can be redistributed under CNRI's
 * Python 1.6 license.  For any other use, please contact Secret Labs
 * AB (info@pythonware.com).
 *
 * Portions of this engine have been developed in cooperation with
 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
 * other compatibility work.
 */

#if !defined(RE_RECURSIVE)
/* Parsed on first (non-recursive) pass only. */

/* #define VERBOSE */
#if defined(VERBOSE)
#define TRACE(X) printf X;
#else
#define TRACE(X)
#endif

#define RE_MULTITHREADED

#include "Python.h"
#include "structmember.h" /* offsetof */
#include <ctype.h>
#include "_regex.h"
#include "pyport.h"

typedef enum {FALSE, TRUE} BOOL;

/* Name of this module, minus the leading underscore. */
#define RE_MODULE "regex"
#define RE_MODULE_UPPER "REGEX"

/* Release the GIL when matching on a immutable string. */
#define RE_MULTITHREADED

/* Error codes. */
#define RE_ERROR_SUCCESS 1 /* Successful match. */
#define RE_ERROR_FAILURE 0 /* Unsuccessful match. */
#define RE_ERROR_ILLEGAL -1 /* Illegal opcode. */
#define RE_ERROR_MEMORY -9 /* Out of memory. */
#define RE_ERROR_INTERRUPTED -10 /* Signal handler raised exception. */
#define RE_ERROR_REPLACEMENT -11 /* Invalid replacement string. */
#define RE_ERROR_INVALID_GROUP_REF -12 /* Invalid group reference. */
#define RE_ERROR_GROUP_INDEX_TYPE -13 /* Group index type error. */
#define RE_ERROR_NO_SUCH_GROUP -14 /* No such group. */
#define RE_ERROR_INDEX -15 /* String index. */

/* Number of backtrack entries per allocated block. */
#define RE_BACKTRACK_BLOCK_SIZE 64

#define BITS_PER_INDEX 16
#define BITS_PER_CODE 32
#define INDEXES_PER_CODE (BITS_PER_CODE / BITS_PER_INDEX)

/* The shortest string prefix for which we'll use a fast string search. */
#define MIN_FAST_PREFIX 0

static char copyright[] =
    " RE 2.3.0 Copyright (c) 1997-2002 by Secret Labs AB ";

/* The exception to return on error. */
static PyObject* error_exception;

/* Handlers for ASCII, locale and Unicode. */
typedef struct RE_EncodingTable {
    BOOL (*in_category)(RE_CODE category, RE_CODE ch);
    RE_CODE (*lower)(RE_CODE ch);
    RE_CODE (*upper)(RE_CODE ch);
    RE_CODE (*title)(RE_CODE ch);
    BOOL (*same_char_ignore)(RE_CODE ch_1, RE_CODE ch_2);
    BOOL (*same_char_ignore_3)(RE_CODE ch_1, RE_CODE ch_2, RE_CODE ch_2_lower,
      RE_CODE ch_2_upper, RE_CODE ch_2_title);
    BOOL (*in_range_ignore)(RE_CODE min_value, RE_CODE max_value, RE_CODE ch);
    BOOL (*in_range_ignore_3)(RE_CODE min_value, RE_CODE max_value, RE_CODE
      ch_lower, RE_CODE ch_upper, RE_CODE ch_title);
} RE_EncodingTable;

/* Position with the regex and text. */
typedef struct RE_Position {
    struct RE_Node* node;
    void* text_ptr;
} RE_Position;

/* Storage for backtrack data. */
typedef struct RE_BacktrackData {
    RE_CODE op;
    union {
        struct {
            RE_Position position;
        } branch;
        struct {
            RE_Position position;
            Py_ssize_t ofs;
            void* start;
            size_t count;
            size_t max_count;
        } repeat;
    };
} RE_BacktrackData;

/* Storage for backtrack data is allocated in blocks for speed. */
typedef struct RE_BacktrackBlock {
    struct RE_BacktrackBlock* previous;
    struct RE_BacktrackBlock* next;
    size_t capacity;
    size_t count;
    RE_BacktrackData items[RE_BACKTRACK_BLOCK_SIZE];
} RE_BacktrackBlock;

typedef struct RE_NextNode {
    struct RE_Node* node;
    struct RE_Node* test;
    struct RE_Node* match_next;
    Py_ssize_t match_step;
} RE_NextNode;

/* A pattern node. */
typedef struct RE_Node {
    int index;
    RE_CODE op;
    BOOL match;
    Py_ssize_t step;
    Py_ssize_t value_capacity;
    Py_ssize_t value_count;
    RE_CODE* values;
    RE_NextNode next_1;
    RE_NextNode next_2;
} RE_Node;

/* Data about a group in a context. */
typedef struct RE_GroupData {
    void* begin;
    void* end;
} RE_GroupData;

/* Data about a repeat in a context. */
typedef struct RE_RepeatData {
    size_t count;
    void* start;
    size_t max_count;
} RE_RepeatData;

/* Data about groups or repeats. */
typedef union RE_Data {
    RE_GroupData group;
    RE_RepeatData repeat;
} RE_Data;

/* Info about a capture group. */
typedef struct RE_GroupInfo {
    int id;
    BOOL referenced;
    BOOL has_name;
    Py_ssize_t value_offset;
    Py_ssize_t end_index;
} RE_GroupInfo;

/* Info about a repeat. */
typedef struct RE_RepeatInfo {
    int id;
    Py_ssize_t value_offset;
} RE_RepeatInfo;

/* The state object used during matching. */
typedef struct RE_State {
    struct PatternObject* pattern; /* Parent PatternObject. */
    /* Info about the string being matched. */
    PyObject* string;
    Py_ssize_t charsize;
    RE_EncodingTable* encoding; /* Encoding handlers. */
    void* text_start;
    void* text_end;
    void* slice_start;
    void* slice_end;
    BOOL overlapped; /* Matches can be overlapped. */
    BOOL reverse; /* Search backwards. */
    void** marks;
    Py_ssize_t lastindex;
    Py_ssize_t lastgroup;
    void* search_anchor;
    void* match_ptr;
    void* text_ptr;
    void* final_newline; /* Pointer to newline at end of string, or NULL. */
    BOOL zero_width; /* Enable the correct handling of zero-width matches. */
    BOOL must_advance; /* The end of the match must advance past its start. */
    /* Storage for backtrack info. */
    RE_BacktrackBlock backtrack_block;
    RE_BacktrackBlock* current_block;
    RE_BacktrackData* backtrack;
    Py_ssize_t saved_groups_capacity;
    Py_ssize_t saved_groups_count;
    RE_Data* saved_groups;
    RE_Data* data; /* The data (groups followed by repeats). */
#if defined(RE_MULTITHREADED)
    BOOL is_multithreaded; /* Whether to release the GIL while matching. */
    PyThreadState* saved_GIL; /* Storage for GIL handling. */
#endif
    size_t min_width;
} RE_State;

/* The PatternObject created from a regular expression. */
typedef struct PatternObject {
    PyObject_HEAD
    PyObject* pattern; /* Pattern source (or None). */
    Py_ssize_t flags; /* Flags used when compiling pattern source. */
    PyObject* weakreflist; /* List of weak references */
    /* Nodes into which the regular expression is compiled. */
    RE_Node* start_node;
    RE_Node* success_node;
    Py_ssize_t data_count; /* Number of data (groups + repeats). */
    Py_ssize_t group_count; /* Number of capture groups. */
    Py_ssize_t repeat_count; /* Number of repeats. */
    Py_ssize_t group_end_index; /* Number of group closures. */
    PyObject* groupindex;
    PyObject* indexgroup;
    /* Storage for the pattern nodes. */
    Py_ssize_t node_capacity;
    Py_ssize_t node_count;
    RE_Node** node_list;
    /* Info about the capture groups. */
    Py_ssize_t group_info_capacity;
    RE_GroupInfo* group_info;
    /* Info about the repeats. */
    Py_ssize_t repeat_info_capacity;
    RE_RepeatInfo* repeat_info;
    size_t min_width;
    Py_ssize_t* bad_character_offset;
    Py_ssize_t* good_suffix_offset;
} PatternObject;

/* The MatchObject created when a match is found. */
typedef struct MatchObject {
    PyObject_HEAD
    PyObject* string; /* Link to the target string. */
    PatternObject* pattern; /* Link to the regex (pattern) object. */
    Py_ssize_t pos; /* Start of current slice. */
    Py_ssize_t endpos; /* End of current slice. */
    Py_ssize_t lastindex; /* Last group seen by the engine (-1 if none). */
    Py_ssize_t lastgroup; /* Last named group seen by the engine (-1 if none). */
    Py_ssize_t group_count;
    Py_ssize_t* marks;
    PyObject* regs;
} MatchObject;

/* The ScannerObject. */
typedef struct ScannerObject {
    PyObject_HEAD
    PatternObject* pattern;
    RE_State state;
} ScannerObject;

/* The SplitterObject. */
typedef struct SplitterObject {
    PyObject_HEAD
    PatternObject* pattern;
    RE_State state;
    Py_ssize_t maxsplit;
    void* last;
    Py_ssize_t split_count;
    Py_ssize_t index;
    BOOL finished;
} SplitterObject;

typedef struct RE_CompileArgs {
    RE_CODE* code;
    RE_CODE* end_code;
    PatternObject* pattern;
    BOOL forward;
    size_t min_width;
    RE_Node* start;
    RE_Node* end;
} RE_CompileArgs;

typedef struct JoinInfo {
    PyObject* list;
    PyObject* item;
} JoinInfo;

/* Checks whether a character is in a range. */
Py_LOCAL(BOOL) in_range(RE_CODE min_value, RE_CODE max_value, RE_CODE ch) {
    return min_value <= ch && ch <= max_value;
}

/* ASCII-specific. */

#define RE_ASCII_MAX 0x7F

/* Checks whether an ASCII character is in the given category. */
static BOOL ascii_in_category(RE_CODE category, RE_CODE ch) {
    if (ch > RE_ASCII_MAX)
        /* Outside the ASCII range. */
        return FALSE;

    switch (category) {
    case RE_CAT_ALNUM:
        return (re_ascii_category[ch] & RE_MASK_ALNUM) != 0;
    case RE_CAT_ALPHA:
        return (re_ascii_category[ch] & RE_MASK_ALPHA) != 0;
    case RE_CAT_ASCII:
        return TRUE;
    case RE_CAT_BLANK:
        return ch == '\t' || ch == ' ';
    case RE_CAT_CNTRL:
        return ch < 0x20 || ch == 0x7F;
    case RE_CAT_DIGIT:
        return (re_ascii_category[ch] & RE_MASK_DIGIT) != 0;
    case RE_CAT_GRAPH:
        return 0x21 <= ch && ch <= 0x7E;
    case RE_CAT_LINEBREAK:
        return ch == '\n';
    case RE_CAT_LOWER:
        return (re_ascii_category[ch] & RE_MASK_LOWER) != 0;
    case RE_CAT_PRINT:
        return 0x20 <= ch && ch <= 0x7E;
    case RE_CAT_PUNCT:
        return (re_ascii_category[ch] & RE_MASK_PUNCT) != 0;
    case RE_CAT_SPACE:
        return (re_ascii_category[ch] & RE_MASK_SPACE) != 0;
    case RE_CAT_UPPER:
        return (re_ascii_category[ch] & RE_MASK_UPPER) != 0;
    case RE_CAT_WORD:
        return ch == '_' || (re_ascii_category[ch] & RE_MASK_ALNUM) != 0;
    case RE_CAT_XDIGIT:
        return (re_ascii_category[ch] & RE_MASK_XDIGIT) != 0;
    default:
        return FALSE;
    }
}

/* Converts an ASCII character to lowercase. */
static RE_CODE ascii_lower(RE_CODE ch) {
    if (ch > RE_ASCII_MAX || (re_ascii_category[ch] & RE_MASK_UPPER) == 0)
        return ch;

    return ch ^ 0x20;
}

/* Converts an ASCII character to uppercase. */
static RE_CODE ascii_upper(RE_CODE ch) {
    if (ch > RE_ASCII_MAX || (re_ascii_category[ch] & RE_MASK_LOWER) == 0)
        return ch;

    return ch ^ 0x20;
}

/* Checks whether 2 characters are the same, ignoring case. */
static BOOL ascii_same_char_ignore(RE_CODE ch_1, RE_CODE ch_2) {
    return ascii_lower(ch_1) == ch_2 || ascii_upper(ch_1) == ch_2 || ch_1 ==
      ascii_lower(ch_2) || ch_1 == ascii_upper(ch_2);
}

/* Checks whether 2 characters are the same, ignoring case.
 *
 * Optimised for comparing multiple times against a fixed character.
 */
static BOOL ascii_same_char_ignore_3(RE_CODE ch_1, RE_CODE ch_2, RE_CODE
  ch_2_lower, RE_CODE ch_2_upper, RE_CODE ch_2_title) {
    return ascii_lower(ch_1) == ch_2 || ascii_upper(ch_1) == ch_2 || ch_1 ==
      ch_2_lower || ch_1 == ch_2_upper;
}

/* Checks whether a character is in a range, ignoring case. */
static BOOL ascii_in_range_ignore(RE_CODE min_value, RE_CODE max_value, RE_CODE
  ch) {
    return in_range(min_value, max_value, ascii_lower(ch)) ||
      in_range(min_value, max_value, ascii_upper(ch));
}

/* Checks whether a character is in a range, ignoring case.
 *
 * Optimised for comparing multiple times against a fixed character.
 */
static BOOL ascii_in_range_ignore_3(RE_CODE min_value, RE_CODE max_value,
  RE_CODE ch_lower, RE_CODE ch_upper, RE_CODE ch_title) {
    return in_range(min_value, max_value, ch_lower) ||
      in_range(min_value, max_value, ch_upper);
}

/* The handlers for ASCII characters. */
static RE_EncodingTable ascii_encoding = {
    ascii_in_category,
    ascii_lower,
    ascii_upper,
    ascii_upper, /* Titlecase for ASCII is the same as uppercase. */
    ascii_same_char_ignore,
    ascii_same_char_ignore_3,
    ascii_in_range_ignore,
    ascii_in_range_ignore_3,
};

/* Locale-specific. */

#define RE_LOCALE_MAX 0xFF

/* Checks whether a locale character is in the given category. */
static BOOL locale_in_category(RE_CODE category, RE_CODE ch) {
    if (ch > RE_LOCALE_MAX)
        /* Outside the locale range. */
        return FALSE;

    switch (category) {
    case RE_CAT_ALNUM:
        return isalnum(ch) != 0;
    case RE_CAT_ALPHA:
        return isalpha(ch) != 0;
    case RE_CAT_ASCII:
        return ch <= RE_ASCII_MAX;
    case RE_CAT_BLANK:
        return ch == '\t' || ch == ' ';
    case RE_CAT_CNTRL:
        return iscntrl(ch) != 0;
    case RE_CAT_DIGIT:
        return isdigit(ch) != 0;
    case RE_CAT_GRAPH:
        return isgraph(ch) != 0;
    case RE_CAT_LINEBREAK:
        return ch == '\n';
    case RE_CAT_LOWER:
        return islower(ch) != 0;
    case RE_CAT_PRINT:
        return isprint(ch) != 0;
    case RE_CAT_PUNCT:
        return ispunct(ch) != 0;
    case RE_CAT_SPACE:
        return isspace(ch) != 0;
    case RE_CAT_UPPER:
        return isupper(ch) != 0;
    case RE_CAT_WORD:
        return ch == '_' || isalnum(ch) != 0;
    case RE_CAT_XDIGIT:
        return ch <= RE_ASCII_MAX && (re_ascii_category[ch] & RE_MASK_XDIGIT) !=
          0;
    default:
        return FALSE;
    }
}

/* Converts a locale character to lowercase. */
static RE_CODE locale_lower(RE_CODE ch) {
    if (ch > RE_LOCALE_MAX)
        return ch;

    return tolower(ch);
}

/* Converts a locale character to uppercase. */
static RE_CODE locale_upper(RE_CODE ch) {
    if (ch > RE_LOCALE_MAX)
        return ch;

    return toupper(ch);
}

/* Checks whether 2 characters are the same, ignoring case. */
static BOOL locale_same_char_ignore(RE_CODE ch_1, RE_CODE ch_2) {
    return locale_lower(ch_1) == ch_2 || locale_upper(ch_1) == ch_2 || ch_1 ==
      locale_lower(ch_2) || ch_1 == locale_upper(ch_2);
}

/* Checks whether 2 characters are the same, ignoring case.
 *
 * Optimised for comparing multiple times against a fixed character.
 */
static BOOL locale_same_char_ignore_3(RE_CODE ch_1, RE_CODE ch_2, RE_CODE
  ch_2_lower, RE_CODE ch_2_upper, RE_CODE ch_2_title) {
    return locale_lower(ch_1) == ch_2 || locale_upper(ch_1) == ch_2 || ch_1 ==
      ch_2_lower || ch_1 == ch_2_upper;
}

/* Checks whether a character is in a range, ignoring case. */
static BOOL locale_in_range_ignore(RE_CODE min_value, RE_CODE max_value, RE_CODE
  ch) {
    return in_range(min_value, max_value, locale_lower(ch)) ||
      in_range(min_value, max_value, locale_upper(ch));
}

/* Checks whether a character is in a range, ignoring case.
 *
 * Optimised for comparing multiple times against a fixed character.
 */
static BOOL locale_in_range_ignore_3(RE_CODE min_value, RE_CODE max_value,
  RE_CODE ch_lower, RE_CODE ch_upper, RE_CODE ch_title) {
    return in_range(min_value, max_value, ch_lower) ||
      in_range(min_value, max_value, ch_upper);
}

/* The handlers for locale characters. */
static RE_EncodingTable locale_encoding = {
    locale_in_category,
    locale_lower,
    locale_upper,
    locale_upper, /* Titlecase for locale is the same as uppercase. */
    locale_same_char_ignore,
    locale_same_char_ignore_3,
    locale_in_range_ignore,
    locale_in_range_ignore_3,
};

/* Unicode-specific. */

/* Unicode character properties. */

/* (Typedefs copied from unicodedata.c) */

#if PY_VERSION_HEX < 0x02060500
typedef struct {
    const unsigned char category;	/* index into
					   _PyUnicode_CategoryNames */
    const unsigned char	combining; 	/* combining class value 0 - 255 */
    const unsigned char	bidirectional; 	/* index into
					   _PyUnicode_BidirectionalNames */
    const unsigned char mirrored;	/* true if mirrored in bidir mode */
    const unsigned char east_asian_width;	/* index into
						   _PyUnicode_EastAsianWidth */
} _PyUnicode_DatabaseRecord;

typedef struct change_record {
    /* sequence of fields should be the same as in merge_old_version */
    const unsigned char bidir_changed;
    const unsigned char category_changed;
    const unsigned char decimal_changed;
    const int numeric_changed;
} change_record;
#elif PY_VERSION_HEX < 0x02070000
typedef struct {
    const unsigned char category;	/* index into
					   _PyUnicode_CategoryNames */
    const unsigned char	combining; 	/* combining class value 0 - 255 */
    const unsigned char	bidirectional; 	/* index into
					   _PyUnicode_BidirectionalNames */
    const unsigned char mirrored;	/* true if mirrored in bidir mode */
    const unsigned char east_asian_width;	/* index into
						   _PyUnicode_EastAsianWidth */
} _PyUnicode_DatabaseRecord;

typedef struct change_record {
    /* sequence of fields should be the same as in merge_old_version */
    const unsigned char bidir_changed;
    const unsigned char category_changed;
    const unsigned char decimal_changed;
    const unsigned char mirrored_changed;
    const int numeric_changed;
} change_record;
#else
typedef struct {
    const unsigned char category;	/* index into
					   _PyUnicode_CategoryNames */
    const unsigned char	combining; 	/* combining class value 0 - 255 */
    const unsigned char	bidirectional; 	/* index into
					   _PyUnicode_BidirectionalNames */
    const unsigned char mirrored;	/* true if mirrored in bidir mode */
    const unsigned char east_asian_width;	/* index into
						   _PyUnicode_EastAsianWidth */
    const unsigned char normalization_quick_check; /* see is_normalized() */
} _PyUnicode_DatabaseRecord;

typedef struct change_record {
    /* sequence of fields should be the same as in merge_old_version */
    const unsigned char bidir_changed;
    const unsigned char category_changed;
    const unsigned char decimal_changed;
    const unsigned char mirrored_changed;
    const double numeric_changed;
} change_record;
#endif

/* data file generated by Tools/unicode/makeunicodedata.py */
#include "unicodedata_db.h"

static const _PyUnicode_DatabaseRecord*
_getrecord_ex(Py_UCS4 code)
{
    int index;
    if (code >= 0x110000)
        index = 0;
    else {
        index = index1[(code>>SHIFT)];
        index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
    }

    return &_PyUnicode_Database_Records[index];
}
/* End of copied code. */

/* Checks whether a Unicode character is in the given category. */
static BOOL unicode_in_category(RE_CODE category, RE_CODE ch) {
    unsigned int flag;

    switch (category) {
    case RE_CAT_ALNUM:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ALNUM) != 0;
    case RE_CAT_ALPHA:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_L) != 0;
    case RE_CAT_ASCII:
        return ch <= RE_ASCII_MAX;
    case RE_CAT_BLANK:
        if (ch == '\t')
            return TRUE;

        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ZS) != 0;
    case RE_CAT_C:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_C) != 0;
    case RE_CAT_CC:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_CC) != 0;
    case RE_CAT_CF:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_CF) != 0;
    case RE_CAT_CN:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_CN) != 0;
    case RE_CAT_CNTRL:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_CC) != 0;
    case RE_CAT_CO:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_CO) != 0;
    case RE_CAT_CS:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_CS) != 0;
    case RE_CAT_DIGIT:
        return _PyUnicode_IsDigit(ch);
    case RE_CAT_GRAPH:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_NONGRAPH) == 0;
    case RE_CAT_L:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_L) != 0;
    case RE_CAT_LINEBREAK:
        return ch == '\n';
    case RE_CAT_LL:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LL) != 0;
    case RE_CAT_LM:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LM) != 0;
    case RE_CAT_LO:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LO) != 0;
    case RE_CAT_LOWER:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LL) != 0;
    case RE_CAT_LT:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LT) != 0;
    case RE_CAT_LU:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LU) != 0;
    case RE_CAT_M:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_M) != 0;
    case RE_CAT_MC:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_MC) != 0;
    case RE_CAT_ME:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ME) != 0;
    case RE_CAT_MN:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_MN) != 0;
    case RE_CAT_N:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_N) != 0;
    case RE_CAT_ND:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ND) != 0;
    case RE_CAT_NL:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_NL) != 0;
    case RE_CAT_NO:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_NO) != 0;
    case RE_CAT_P:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_P) != 0;
    case RE_CAT_PC:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PC) != 0;
    case RE_CAT_PD:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PD) != 0;
    case RE_CAT_PE:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PE) != 0;
    case RE_CAT_PF:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PF) != 0;
    case RE_CAT_PI:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PI) != 0;
    case RE_CAT_PO:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PO) != 0;
    case RE_CAT_PRINT:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_C) == 0;
    case RE_CAT_PS:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PS) != 0;
    case RE_CAT_PUNCT:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_PUNCT) != 0;
    case RE_CAT_S:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_S) != 0;
    case RE_CAT_SC:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_SC) != 0;
    case RE_CAT_SK:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_SK) != 0;
    case RE_CAT_SM:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_SM) != 0;
    case RE_CAT_SO:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_SO) != 0;
    case RE_CAT_SPACE:
        return _PyUnicode_IsWhitespace(ch);
    case RE_CAT_UPPER:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_LU) != 0;
    case RE_CAT_WORD:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_WORD) != 0;
    case RE_CAT_XDIGIT:
        return ch <= RE_ASCII_MAX && (re_ascii_category[ch] & RE_MASK_XDIGIT) !=
          0;
    case RE_CAT_Z:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_Z) != 0;
    case RE_CAT_ZL:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ZL) != 0;
    case RE_CAT_ZP:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ZP) != 0;
    case RE_CAT_ZS:
        flag = 1 << _getrecord_ex((Py_UCS4)ch)->category;
        return (flag & RE_PROP_MASK_ZS) != 0;
    default:
        if (RE_MIN_BLOCK <= category && category <= RE_MAX_BLOCK) {
            RE_BlockRange* range;

            range = &re_block_ranges[category - RE_MIN_BLOCK];
            return range->min_char <= ch && ch <= range->max_char;
        } else if (RE_MIN_SCRIPT <= category && category <= RE_MAX_SCRIPT) {
            int lo;
            int hi;

            lo = RE_MIN_SCRIPT - RE_MIN_SCRIPT;
            hi = RE_MAX_SCRIPT - RE_MIN_SCRIPT;
            while (lo <= hi) {
                int mid;
                RE_ScriptRange* range;

                mid = (lo + hi) / 2;
                range = &re_script_ranges[mid];
                if (ch < range->min_char)
                    hi = mid - 1;
                else if (ch > range->max_char)
                    lo = mid + 1;
                else
                    return category == range->script;
            }
        }

        return FALSE;
    }
}

/* Converts a Unicode character to lowercase. */
static RE_CODE unicode_lower(RE_CODE ch) {
    return Py_UNICODE_TOLOWER((Py_UNICODE)ch);
}

/* Converts a Unicode character to uppercase. */
static RE_CODE unicode_upper(RE_CODE ch) {
    return Py_UNICODE_TOUPPER((Py_UNICODE)ch);
}

/* Converts a Unicode character to titlecase. */
static RE_CODE unicode_title(RE_CODE ch) {
    return Py_UNICODE_TOTITLE((Py_UNICODE)ch);
}

/* Checks whether 2 characters are the same, ignoring case. */
static BOOL unicode_same_char_ignore(RE_CODE ch_1, RE_CODE ch_2) {
    return unicode_lower(ch_1) == ch_2 || unicode_upper(ch_1) == ch_2 ||
      unicode_title(ch_1) == ch_2 || ch_1 == unicode_lower(ch_2) || ch_1 ==
      unicode_upper(ch_2) || ch_1 == unicode_title(ch_2);
}

/* Checks whether 2 characters are the same, ignoring case.
 *
 * Optimised for comparing multiple times against a fixed character.
 */
static BOOL unicode_same_char_ignore_3(RE_CODE ch_1, RE_CODE ch_2, RE_CODE
  ch_2_lower, RE_CODE ch_2_upper, RE_CODE ch_2_title) {
    return unicode_lower(ch_1) == ch_2 || unicode_upper(ch_1) == ch_2 ||
      unicode_title(ch_1) == ch_2 || ch_1 == ch_2_lower || ch_1 ==
      ch_2_upper || ch_1 == ch_2_title;
}

/* Checks whether a character is in a range, ignoring case. */
static BOOL unicode_in_range_ignore(RE_CODE min_value, RE_CODE max_value,
  RE_CODE ch) {
    return in_range(min_value, max_value, unicode_lower(ch)) ||
      in_range(min_value, max_value, unicode_upper(ch)) || in_range(min_value,
      max_value, unicode_title(ch));
}

/* Checks whether a character is in a range, ignoring case.
 *
 * Optimised for comparing multiple times against a fixed character.
 */
static BOOL unicode_in_range_ignore_3(RE_CODE min_value, RE_CODE max_value,
  RE_CODE ch_lower, RE_CODE ch_upper, RE_CODE ch_title) {
    return in_range(min_value, max_value, ch_lower) || in_range(min_value,
      max_value, ch_upper) || in_range(min_value, max_value, ch_title);
}

/* The handlers for Unicode characters. */
static RE_EncodingTable unicode_encoding = {
    unicode_in_category,
    unicode_lower,
    unicode_upper,
    unicode_title,
    unicode_same_char_ignore,
    unicode_same_char_ignore_3,
    unicode_in_range_ignore,
    unicode_in_range_ignore_3,
};

/* Sets the error message. */
Py_LOCAL(void) set_error(int status, PyObject* object) {
    TRACE(("<<set_error>>\n"))

    switch (status) {
    case RE_ERROR_GROUP_INDEX_TYPE:
        if (object)
            PyErr_Format(PyExc_TypeError,
              "group indices must be integers or strings, not %.200s",
              object->ob_type->tp_name);
        else
            PyErr_Format(PyExc_TypeError,
              "group indices must be integers or strings");
        break;
    case RE_ERROR_ILLEGAL:
        PyErr_SetString(PyExc_RuntimeError, "invalid RE code");
        break;
    case RE_ERROR_INTERRUPTED:
        /* An exception has already been raised, so let it fly. */
        break;
    case RE_ERROR_INVALID_GROUP_REF:
        PyErr_SetString(error_exception, "invalid group reference");
        break;
    case RE_ERROR_MEMORY:
        PyErr_NoMemory();
        break;
    case RE_ERROR_NO_SUCH_GROUP:
        PyErr_SetString(PyExc_IndexError, "no such group");
        break;
    case RE_ERROR_REPLACEMENT:
        PyErr_SetString(error_exception, "invalid replacement");
        break;
    case RE_ERROR_INDEX:
        PyErr_SetString(PyExc_TypeError, "string indices must be integers");
        break;
    default:
        /* Other error codes indicate compiler/engine bugs. */
        PyErr_SetString(PyExc_RuntimeError,
          "internal error in regular expression engine");
        break;
    }
}

/* Allocates memory.
 *
 * Sets the Python error handler and returns NULL if the allocation fails.
 */
Py_LOCAL(void*) re_alloc(size_t size) {
    void* new_ptr;

    new_ptr = PyMem_Malloc(size);
    if (!new_ptr)
        set_error(RE_ERROR_MEMORY, NULL);

    return new_ptr;
}

/* Reallocates memory.
 *
 * Sets the Python error handler and returns NULL if the reallocation fails.
 */
Py_LOCAL(void*) re_realloc(void *ptr, size_t size) {
    void* new_ptr;

    new_ptr = PyMem_Realloc(ptr, size);
    if (!new_ptr)
        set_error(RE_ERROR_MEMORY, NULL);

    return new_ptr;
}

/* Deallocates memory. */
Py_LOCAL(void) re_dealloc(void* ptr) {
    PyMem_Free(ptr);
}

#if defined(RE_MULTITHREADED)
/* Releases the GIL. */
Py_LOCAL(void) release_GIL(RE_State* state) {
    state->saved_GIL = PyEval_SaveThread();
}

/* Acquires the GIL. */
Py_LOCAL(void) acquire_GIL(RE_State* state) {
    PyEval_RestoreThread(state->saved_GIL);
}

#endif
/* Allocates memory, holding the GIL during the allocation.
 *
 * Sets the Python error handler and returns NULL if the allocation fails.
 */
Py_LOCAL(void*) safe_alloc(RE_State* state, size_t size) {
    void* new_ptr;

#if defined(RE_MULTITHREADED)
    if (state->is_multithreaded)
        acquire_GIL(state);

#endif
    new_ptr = re_alloc(size);

#if defined(RE_MULTITHREADED)
    if (state->is_multithreaded)
        release_GIL(state);

#endif
    return new_ptr;
}

/* Reallocates memory, holding the GIL during the reallocation.
 *
 * Sets the Python error handler and returns NULL if the reallocation fails.
 */
Py_LOCAL(void*) safe_realloc(RE_State* state, void *ptr, size_t size) {
    void* new_ptr;

#if defined(RE_MULTITHREADED)
    if (state->is_multithreaded)
        acquire_GIL(state);

#endif
    new_ptr = re_realloc(ptr, size);

#if defined(RE_MULTITHREADED)
    if (state->is_multithreaded)
        release_GIL(state);

#endif
    return new_ptr;
}

/* Checks for KeyboardInterrupt, holding the GIL during the check. */
Py_LOCAL(BOOL) safe_check_signals(RE_State* state) {
    BOOL result;

#if defined(RE_MULTITHREADED)
    if (state->is_multithreaded)
        acquire_GIL(state);

#endif
    result = PyErr_CheckSignals();

#if defined(RE_MULTITHREADED)
    if (state->is_multithreaded)
        release_GIL(state);

#endif
    return result;
}

/* Checks whether a character is in a big bitset. */
Py_LOCAL(BOOL) in_big_bitset(RE_Node* node, RE_CODE ch) {
    /* values are: max_char indexes... subsets... */
    RE_CODE* values;
    RE_CODE max_char;
    RE_CODE* start_of_indexes;
    RE_CODE* start_of_subsets;
    size_t index_of_subset;
    size_t offset_into_subset;
    size_t index;
    RE_CODE* subset;
    size_t b;
    BOOL match;

    values = node->values;
    max_char = values[0];
    if (ch > max_char)
        return !node->match;

    /* Point to the start of the indexes and subsets. */
    start_of_indexes = values + 1;
    start_of_subsets = start_of_indexes + (max_char / 256 / INDEXES_PER_CODE) +
      1;

    /* We want the index of the subset and the offset within the subset. */
    index_of_subset = ch / 256;
    offset_into_subset = ch % 256;

    /* Identical subsets are merged together, so we need to determine which
     * subset to look in.
     */
    index = start_of_indexes[index_of_subset / INDEXES_PER_CODE];
    index >>= BITS_PER_INDEX * (index_of_subset % INDEXES_PER_CODE);
    index_of_subset = index & ~(~(RE_CODE)0 << BITS_PER_INDEX);

    /* Point to the subset. */
    subset = start_of_subsets + index_of_subset * (256 / BITS_PER_CODE);

    /* Get the bit flag for the character. */
    b = subset[offset_into_subset / BITS_PER_CODE] >> (offset_into_subset %
      BITS_PER_CODE);

    match = (b & 1) != 0;

    return match == node->match;
}

/* Checks whether a character is in a small bitset. */
Py_LOCAL(BOOL) in_small_bitset(RE_Node* node, RE_CODE ch) {
    /* values are: top_bits bitset */
    RE_CODE* values;
    RE_CODE* subset;
    size_t index;
    size_t b;
    BOOL match;

    values = node->values;
    if ((ch >> 8) != values[0])
        return !node->match;

    subset = values + 1;
    index = ch & 0xFF;
    b = subset[index / BITS_PER_CODE] >> (index % BITS_PER_CODE);
    match = (b & 1) != 0;

    return match == node->match;
}

/* Checks whether a character is in a set. */
Py_LOCAL(BOOL) in_set(RE_EncodingTable* encoding, RE_Node* node, RE_CODE ch)
  {
    RE_Node* member;
    BOOL (*in_category)(RE_CODE, RE_CODE);

    member = node->next_2.node;
    in_category = encoding->in_category;

    while (member) {
        switch (member->op) {
        case RE_OP_ANY:
            TRACE(("%s\n", re_op_text[member->op]))
            if (ch != '\n')
                return node->match;
            break;
        case RE_OP_BIG_BITSET:
            /* values are: size max_char indexes... subsets... */
            TRACE(("%s\n", re_op_text[member->op]))
            if (in_big_bitset(member, ch))
                return node->match;
            break;
        case RE_OP_CATEGORY: /* A character category. */
            /* values are: category */
            TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
              member->values[0]))
            if (in_category(member->values[0], ch) == member->match)
                return node->match;
            break;
        case RE_OP_CHARACTER: /* A character literal. */
            /* values are: char_code */
            TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
              values[0]))
            if ((ch == member->values[0]) == member->match)
                return node->match;
            break;
        case RE_OP_RANGE: /* A character range. */
            /* values are: min_char max_char */
            TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match,
              member->values[0], member->values[1]))
            if (in_range(member->values[0], member->values[1], ch) ==
              member->match)
                return node->match;
            break;
        case RE_OP_SMALL_BITSET:
            /* values are: size top_bits bitset */
            TRACE(("%s\n", re_op_text[member->op]))
            if (in_small_bitset(member, ch))
                return node->match;
            break;
        case RE_OP_STRING: /* Actually a character set. */
        {
            TRACE(("%s %d\n", re_op_text[member->op], [member->values[0]))
            size_t count;
            RE_CODE* chars;
            size_t i;

            count = member->value_count;
            chars = member->values;
            for (i = 0; i < count; i ++) {
                if (ch == chars[i])
                    return node->match;
            }
            break;
        }
        default:
            return FALSE;
        }

        member = member->next_1.node;
    }

    return !node->match;
}

/* Checks whether a character is in a set, ignoring case. */
Py_LOCAL(BOOL) in_set_ignore(RE_EncodingTable* encoding, RE_Node* node,
  RE_CODE ch) {
    RE_Node* member;
    RE_CODE ch_lower;
    RE_CODE ch_upper;
    RE_CODE ch_title;

    member = node->next_2.node;
    ch_lower = encoding->lower(ch);
    ch_upper = encoding->upper(ch);
    ch_title = encoding->title(ch);

    while (member) {
        switch (member->op) {
        case RE_OP_ANY:
            TRACE(("%s\n", re_op_text[member->op]))
            if (ch != '\n')
                return node->match;
            break;
        case RE_OP_BIG_BITSET:
            /* values are: size max_char indexes... subsets... */
            TRACE(("%s\n", re_op_text[member->op]))
            if (in_big_bitset(member, ch_lower) || in_big_bitset(member,
              ch_upper) || in_big_bitset(member,
              ch_title))
                return node->match;
            break;
        case RE_OP_CATEGORY: /* A character category. */
            /* values are: category */
            TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
              member->values[0]))
            if (encoding->in_category(member->values[0], ch) == member->match)
                return node->match;
            break;
        case RE_OP_CHARACTER: /* A character literal. */
            /* values are: char_code */
            TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
              member->values[0]))
            if (encoding->same_char_ignore_3(member->values[0], ch, ch_lower,
              ch_upper, ch_title) == member->match)
                return node->match;
            break;
        case RE_OP_RANGE: /* A character range. */
            /* values are: min_char max_char */
            TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
              member->values[0], member->values[1]))
            if (encoding->in_range_ignore_3(member->values[0],
              member->values[1], ch_lower, ch_upper, ch_title) == member->match)
                return node->match;
            break;
        case RE_OP_SMALL_BITSET:
            /* values are: size top_bits bitset */
            TRACE(("%s\n", re_op_text[member->op]))
            if (in_small_bitset(member, ch_lower) || in_small_bitset(member,
              ch_upper) || in_small_bitset(member, ch_title))
                return node->match;
            break;
        case RE_OP_STRING: /* Actually a character set. */
        {
            TRACE(("%s %d\n", re_op_text[member->op], member->values[0]))
            BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE
              ch2_lower, RE_CODE ch2_upper, RE_CODE ch2_title);
            size_t count;
            RE_CODE* chars;
            size_t i;

            same_char_ignore_3 = encoding->same_char_ignore_3;

            count = member->value_count;
            chars = member->values;
            for (i = 0; i < count; i ++) {
                if (same_char_ignore_3(chars[i], ch, ch_lower, ch_upper,
                  ch_title))
                    return node->match;
            }
            break;
        }
        default:
            return FALSE;
        }

        member = member->next_1.node;
    }

    return !node->match;
}

/* Gets the integer offset into the string.
 *
 * Returns -1 if the text pointer is NULL.
 */
Py_LOCAL(Py_ssize_t) text_offset(RE_State* state, void* ptr) {
    return ptr ? ((char*)ptr - (char*)state->text_start) / state->charsize : -1;
}

/* Pushes the groups. */
Py_LOCAL(BOOL) push_groups(RE_State* state) {
    PatternObject* pattern;
    size_t new_count;
    size_t new_capacity;

    pattern = state->pattern;
    if (pattern->group_count == 0)
        return TRUE;

    new_count = state->saved_groups_count + pattern->group_count;
    new_capacity = state->saved_groups_capacity;
    while (new_count > new_capacity)
        new_capacity *= 2;

    if (new_capacity != state->saved_groups_capacity) {
        RE_Data* new_groups;

        new_groups = (RE_Data*)safe_realloc(state, state->saved_groups,
          new_capacity * sizeof(RE_Data));
        if (!new_groups)
            return FALSE;
        state->saved_groups_capacity = new_capacity;
        state->saved_groups = new_groups;
    }

    memmove(state->saved_groups + state->saved_groups_count, state->data,
      pattern->group_count * sizeof(RE_Data));
    state->saved_groups_count = new_count;

    return TRUE;
}

/* Pops the groups. */
Py_LOCAL(void) pop_groups(RE_State* state) {
    PatternObject* pattern;

    pattern = state->pattern;
    if (pattern->group_count == 0)
        return;

    state->saved_groups_count -= pattern->group_count;
    memmove(state->data, state->saved_groups + state->saved_groups_count,
      pattern->group_count * sizeof(RE_Data));
}

/* Reloads the groups that have been pushed.
 *
 * Equivalent to pop then push.
 */
Py_LOCAL(void) reload_groups(RE_State* state) {
    PatternObject* pattern;

    pattern = state->pattern;
    if (pattern->group_count == 0)
        return;

    memmove(state->data, state->saved_groups + state->saved_groups_count -
      pattern->group_count, pattern->group_count * sizeof(RE_Data));
}

/* Drops the groups that have been pushed. */
Py_LOCAL(void) drop_groups(RE_State* state) {
    state->saved_groups_count -= state->pattern->group_count;
}

/* Initialises the state for a match. */
Py_LOCAL(void) init_match(RE_State* state) {
    state->saved_groups_count = 0;

    /* Reset the backtrack. */
    state->current_block = &state->backtrack_block;
    state->current_block->count = 0;

    state->backtrack = NULL;
    state->search_anchor = state->text_ptr;
    state->match_ptr = state->text_ptr;

    memset(state->data, 0, state->pattern->data_count * sizeof(RE_Data));
}

/* Adds a new backtrack entry. */
Py_LOCAL(BOOL) add_backtrack(RE_State* state, RE_CODE op) {
    RE_BacktrackBlock* current = state->current_block;

    if (current->count >= current->capacity) {
        if (!current->next) {
            size_t capacity;
            size_t size;
            RE_BacktrackBlock* next;

            capacity = current->capacity * 2;
            size = sizeof(RE_BacktrackBlock) + (capacity -
              RE_BACKTRACK_BLOCK_SIZE) * sizeof(RE_BacktrackData);
            next = (RE_BacktrackBlock*)safe_alloc(state, size);
            if (!next)
                return FALSE;

            next->previous = current;
            next->next = NULL;
            next->capacity = capacity;
            current->next = next;
        }
        current = current->next;
        current->count = 0;
        state->current_block = current;
    }
    state->backtrack = &current->items[current->count++];
    state->backtrack->op = op;

    return TRUE;
}

/* Gets the last backtrack entry.
 *
 * It'll never be called when there are _no_ entries.
 */
Py_LOCAL(RE_BacktrackData*) last_backtrack(RE_State* state) {
    return &state->current_block->items[state->current_block->count - 1];
}

/* Discards the last backtrack entry.
 *
 * It'll never be called to discard the _only_ entry.
 */
Py_LOCAL(void) discard_backtrack(RE_State* state) {
    --state->current_block->count;
    if (state->current_block->count == 0 && state->current_block->previous)
        state->current_block = state->current_block->previous;
}

/* Set up for byte-string. */
#define RE_CHAR unsigned char
#define RE_AT_BOUNDARY bat_boundary
#define RE_TRY_MATCH btry_match
#define RE_COUNT_ONE bcount_one
#define RE_MATCH_ONE bmatch_one
#define RE_SEARCH_CONTEXT bsearch_context
#define RE_MATCH_CONTEXT bmatch_context
#define RE_MATCH bmatch
#endif

/* Parsed on both passes (for byte-string and Unicode). */

/* Checks whether the current text position is on a word boundary. */
Py_LOCAL(BOOL) RE_AT_BOUNDARY(RE_State* state, RE_CHAR* text_ptr) {
    BOOL (*in_category)(RE_CODE, RE_CODE);
    BOOL before;
    BOOL after;

    in_category = state->encoding->in_category;

    before = text_ptr > (RE_CHAR*)state->text_start && in_category(RE_CAT_WORD,
      text_ptr[-1]);
    after = text_ptr < (RE_CHAR*)state->text_end && in_category(RE_CAT_WORD,
      text_ptr[0]);

    return before != after;
}

Py_LOCAL(BOOL) RE_TRY_MATCH(RE_State* state, RE_NextNode* next, RE_CHAR*
  text_ptr, RE_Position* next_position);

/* Counts a repeated character pattern. */
Py_LOCAL(size_t) RE_COUNT_ONE(RE_State* state, RE_Node* node, RE_CHAR* text_ptr,
  RE_CODE max_count) {
    RE_CHAR* start_ptr;
    RE_EncodingTable* encoding;

    if (max_count < 1)
        return 0;

    start_ptr = text_ptr;
    encoding = state->encoding;

    switch (node->op) {
    case RE_OP_ANY:
    {
        size_t available;
        RE_CHAR* limit_ptr;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr + max_count;

        while (text_ptr < limit_ptr && text_ptr[0] != '\n')
            ++text_ptr;

        return text_ptr - start_ptr;
    }
    case RE_OP_ANY_ALL:
    {
        size_t available;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;

        return max_count;
    }
    case RE_OP_ANY_ALL_REV:
    {
        size_t available;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;

        return max_count;
    }
    case RE_OP_ANY_REV:
    {
        size_t available;
        RE_CHAR* limit_ptr;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr - max_count;

        while (text_ptr > limit_ptr && text_ptr[-1] != '\n')
            --text_ptr;

        return start_ptr - text_ptr;
    }
    case RE_OP_CATEGORY:
    {
        /* values are: category */
        size_t available;
        RE_CHAR* limit_ptr;
        BOOL (*in_category)(RE_CODE category, RE_CODE ch);
        BOOL match;
        RE_CODE category;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr + max_count;

        in_category = state->encoding->in_category;

        match = node->match;
        category = node->values[0];
        while (text_ptr < limit_ptr && in_category(category, text_ptr[0]) ==
          match)
            ++text_ptr;

        return text_ptr - start_ptr;
    }
    case RE_OP_CATEGORY_REV:
    {
        /* values are: category */
        size_t available;
        RE_CHAR* limit_ptr;
        BOOL (*in_category)(RE_CODE category, RE_CODE ch);
        BOOL match;
        RE_CODE category;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr - max_count;

        in_category = state->encoding->in_category;

        match = node->match;
        category = node->values[0];
        while (text_ptr > limit_ptr && in_category(category, text_ptr[-1]) ==
          match)
            --text_ptr;

        return start_ptr - text_ptr;
    }
    case RE_OP_CHARACTER:
    {
        /* values are: char_code */
        size_t available;
        RE_CHAR* limit_ptr;
        BOOL match;
        RE_CODE ch;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr + max_count;

        match = node->match;
        ch = node->values[0];
        while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match)
            ++text_ptr;

        return text_ptr - start_ptr;
    }
    case RE_OP_CHARACTER_IGNORE:
    {
        /* values are: char_code */
        size_t available;
        RE_CHAR* limit_ptr;
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE ch2_lower,
          RE_CODE ch2_upper, RE_CODE ch2_title);
        BOOL match;
        RE_CODE ch;
        RE_CODE ch_lower;
        RE_CODE ch_upper;
        RE_CODE ch_title;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr + max_count;

        encoding = state->encoding;
        same_char_ignore_3 = encoding->same_char_ignore_3;

        match = node->match;
        ch = node->values[0];
        ch_lower = encoding->lower(ch);
        ch_upper = encoding->upper(ch);
        ch_title = encoding->title(ch);

        while (text_ptr < limit_ptr && same_char_ignore_3(text_ptr[0], ch,
          ch_lower, ch_upper, ch_title) == match)
            ++text_ptr;

        return text_ptr - start_ptr;
    }
    case RE_OP_CHARACTER_IGNORE_REV:
    {
        /* values are: char_code */
        size_t available;
        RE_CHAR* limit_ptr;
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE ch2_lower,
          RE_CODE ch2_upper, RE_CODE ch2_title);
        BOOL match;
        RE_CODE ch;
        RE_CODE ch_lower;
        RE_CODE ch_upper;
        RE_CODE ch_title;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr - max_count;

        encoding = state->encoding;
        same_char_ignore_3 = encoding->same_char_ignore_3;

        match = node->match;
        ch = node->values[0];
        ch_lower = encoding->lower(ch);
        ch_upper = encoding->upper(ch);
        ch_title = encoding->title(ch);

        while (text_ptr > limit_ptr && same_char_ignore_3(text_ptr[-1], ch,
          ch_lower, ch_upper, ch_title) == match)
            --text_ptr;

        return start_ptr - text_ptr;
    }
    case RE_OP_CHARACTER_REV:
    {
        /* values are: char_code */
        size_t available;
        RE_CHAR* limit_ptr;
        BOOL match;
        RE_CODE ch;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr - max_count;

        match = node->match;
        ch = node->values[0];
        while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match)
            --text_ptr;

        return start_ptr - text_ptr;
    }
    case RE_OP_SET:
    {
        size_t available;
        RE_CHAR* limit_ptr;
        RE_EncodingTable* encoding;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr + max_count;

        encoding = state->encoding;

        while (text_ptr < limit_ptr && in_set(encoding, node, text_ptr[0]))
            ++text_ptr;

        return text_ptr - start_ptr;
    }
    case RE_OP_SET_IGNORE:
    {
        size_t available;
        RE_CHAR* limit_ptr;
        RE_EncodingTable* encoding;

        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr + max_count;

        encoding = state->encoding;

        while (text_ptr < limit_ptr && in_set_ignore(encoding, node,
          text_ptr[0]))
            ++text_ptr;

        return text_ptr - start_ptr;
    }
    case RE_OP_SET_IGNORE_REV:
    {
        size_t available;
        RE_CHAR* limit_ptr;
        RE_EncodingTable* encoding;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr - max_count;

        encoding = state->encoding;

        while (text_ptr > limit_ptr && in_set_ignore(encoding, node,
          text_ptr[-1]))
            --text_ptr;

        return start_ptr - text_ptr;
    }
    case RE_OP_SET_REV:
    {
        size_t available;
        RE_CHAR* limit_ptr;
        RE_EncodingTable* encoding;

        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (max_count > available)
            max_count = available;
        limit_ptr = text_ptr - max_count;

        encoding = state->encoding;

        while (text_ptr > limit_ptr && in_set(encoding, node, text_ptr[-1]))
            --text_ptr;

        return start_ptr - text_ptr;
    }
    }

    return 0;
}

/* Tries to match a character pattern. */
Py_LOCAL(BOOL) RE_MATCH_ONE(RE_State* state, RE_EncodingTable* encoding,
  RE_Node* node, RE_CHAR* text_ptr) {
    RE_CHAR* start_ptr;

    start_ptr = text_ptr;

    switch (node->op) {
    case RE_OP_ANY:
        return text_ptr < (RE_CHAR*)state->slice_end && text_ptr[0] != '\n';
    case RE_OP_ANY_ALL:
        return text_ptr != (RE_CHAR*)state->slice_end;
    case RE_OP_ANY_ALL_REV:
        return text_ptr != (RE_CHAR*)state->slice_start;
    case RE_OP_ANY_REV:
        return text_ptr > (RE_CHAR*)state->slice_start && text_ptr[-1] != '\n';
    case RE_OP_CATEGORY:
    {
        /* values are: category */
        return text_ptr < (RE_CHAR*)state->slice_end &&
          state->encoding->in_category(node->values[0], text_ptr[0]) ==
          node->match;
    }
    case RE_OP_CATEGORY_REV:
    {
        /* values are: category */
        return text_ptr > (RE_CHAR*)state->slice_start &&
          state->encoding->in_category(node->values[0], text_ptr[-1]) ==
          node->match;
    }
    case RE_OP_CHARACTER:
        return text_ptr < (RE_CHAR*)state->slice_end && (text_ptr[0] ==
          node->values[0]) == node->match;
    case RE_OP_CHARACTER_IGNORE:
        return text_ptr < (RE_CHAR*)state->slice_end &&
          state->encoding->same_char_ignore(text_ptr[0], node->values[0]) ==
          node->match;
    case RE_OP_CHARACTER_IGNORE_REV:
        return text_ptr > (RE_CHAR*)state->slice_start &&
          state->encoding->same_char_ignore(text_ptr[-1], node->values[0]) ==
          node->match;
    case RE_OP_CHARACTER_REV:
        return text_ptr > (RE_CHAR*)state->slice_start && (text_ptr[-1] ==
          node->values[0]) == node->match;
    case RE_OP_SET:
        return text_ptr < (RE_CHAR*)state->slice_end && in_set(state->encoding,
          node, text_ptr[0]);
    case RE_OP_SET_IGNORE:
        return text_ptr < (RE_CHAR*)state->slice_end &&
          in_set_ignore(state->encoding, node, text_ptr[0]);
    case RE_OP_SET_IGNORE_REV:
        return text_ptr > (RE_CHAR*)state->slice_start &&
          in_set_ignore(state->encoding, node, text_ptr[-1]);
    case RE_OP_SET_REV:
        return text_ptr > (RE_CHAR*)state->slice_start &&
          in_set(state->encoding, node, text_ptr[-1]);
    }

    return FALSE;
}

/* Tries a match at the current text position.
 *
 * Returns TRUE and the next node and text position if the match succeeds.
 */
Py_LOCAL(BOOL) RE_TRY_MATCH(RE_State* state, RE_NextNode* next, RE_CHAR*
  text_ptr, RE_Position* next_position) {
    RE_Node* test = next->test;

    switch (test->op) {
    case RE_OP_ANY: /* Any character, except a newline. */
        if (text_ptr >= (RE_CHAR*)state->slice_end || text_ptr[0] == '\n')
            return FALSE;
        break;
    case RE_OP_ANY_ALL: /* Any character at all. */
        if (text_ptr >= (RE_CHAR*)state->slice_end)
            return FALSE;
        break;
    case RE_OP_ANY_ALL_REV: /* Any character at all. */
        if (text_ptr <= (RE_CHAR*)state->slice_start)
            return FALSE;
        break;
    case RE_OP_ANY_REV: /* Any character, except a newline. */
        if (text_ptr <= (RE_CHAR*)state->slice_start || text_ptr[-1] == '\n')
            return FALSE;
        break;
    case RE_OP_BOUNDARY: /* At a word boundary. */
        if (RE_AT_BOUNDARY(state, text_ptr) != test->match)
            return FALSE;
        break;
    case RE_OP_CATEGORY: /* A character category. */
        /* values are: category */
        if (text_ptr >= (RE_CHAR*)state->slice_end ||
          state->encoding->in_category(test->values[0], text_ptr[0]) !=
          test->match)
            return FALSE;
        break;
    case RE_OP_CATEGORY_REV: /* A character category. */
        /* values are: category */
        if (text_ptr <= (RE_CHAR*)state->slice_start ||
          state->encoding->in_category(test->values[0], text_ptr[-1]) !=
          test->match)
            return FALSE;
        break;
    case RE_OP_CHARACTER: /* A character literal. */
        if (text_ptr >= (RE_CHAR*)state->slice_end || (text_ptr[0] ==
          test->values[0]) != test->match)
            return FALSE;
        break;
    case RE_OP_CHARACTER_IGNORE: /* A character literal, ignoring case. */
        if (text_ptr >= (RE_CHAR*)state->slice_end ||
          state->encoding->same_char_ignore(text_ptr[0],
          test->values[0]) != test->match)
            return FALSE;
        break;
    case RE_OP_CHARACTER_IGNORE_REV: /* A character literal, ignoring case. */
        if (text_ptr <= (RE_CHAR*)state->slice_start ||
          state->encoding->same_char_ignore(text_ptr[-1],
          test->values[0]) != test->match)
            return FALSE;
        break;
    case RE_OP_CHARACTER_REV: /* A character literal. */
        if (text_ptr <= (RE_CHAR*)state->slice_start || (text_ptr[-1] ==
          test->values[0]) != test->match)
            return FALSE;
        break;
    case RE_OP_END_OF_LINE: /* At the end of a line. */
        if (text_ptr != state->text_end && text_ptr[0] != '\n')
            return FALSE;
        break;
    case RE_OP_END_OF_STRING: /* At the end of the string. */
        if (text_ptr != state->text_end)
            return FALSE;
        break;
    case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */
        if (text_ptr != state->text_end && text_ptr != state->final_newline)
            return FALSE;
        break;
    case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */
        if (text_ptr != state->search_anchor)
            return FALSE;
        break;
    case RE_OP_SET: /* Character set. */
        if (text_ptr >= (RE_CHAR*)state->slice_end || !in_set(state->encoding,
          test, text_ptr[0]))
            return FALSE;
        break;
    case RE_OP_SET_IGNORE: /* Character set, ignoring case. */
        if (text_ptr >= (RE_CHAR*)state->slice_end ||
          !in_set_ignore(state->encoding, test, text_ptr[0]))
            return FALSE;
        break;
    case RE_OP_SET_IGNORE_REV: /* Character set, ignoring case. */
        if (text_ptr <= (RE_CHAR*)state->slice_start ||
          !in_set_ignore(state->encoding, test, text_ptr[-1]))
            return FALSE;
        break;
    case RE_OP_SET_REV: /* Character set. */
        if (text_ptr <= (RE_CHAR*)state->slice_start || !in_set(state->encoding,
          test, text_ptr[-1]))
            return FALSE;
        break;
    case RE_OP_START_OF_LINE: /* At the start of a line. */
        if (text_ptr != state->text_start && text_ptr[-1] != '\n')
            return FALSE;
        break;
    case RE_OP_START_OF_STRING: /* At the start of the string. */
        if (text_ptr != state->text_start)
            return FALSE;
        break;
    case RE_OP_STRING: /* A string literal. */
    {
        size_t length;
        size_t available;
        RE_CHAR* text_p;
        RE_CODE* values;
        size_t i;

        length = test->value_count;
        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (length > available)
            return FALSE;

        text_p = text_ptr;
        values = test->values;
        for (i = 0; i < length; i++) {
            if (text_p[i] != values[i])
                return FALSE;
        }
        break;
    }
    case RE_OP_STRING_IGNORE: /* A string literal, ignoring case. */
    {
        size_t length;
        size_t available;
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore)(RE_CODE ch1, RE_CODE ch2);
        RE_CHAR* text_p;
        RE_CODE* values;
        size_t i;

        length = test->value_count;
        available = (RE_CHAR*)state->slice_end - text_ptr;
        if (length > available)
            return FALSE;

        encoding = state->encoding;
        same_char_ignore = encoding->same_char_ignore;

        text_p = text_ptr;
        values = test->values;
        for (i = 0; i < length; i++) {
            if (!same_char_ignore(text_p[i], values[i]))
                return FALSE;
        }
        break;
    }
    case RE_OP_STRING_IGNORE_REV: /* A string literal, ignoring case. */
    {
        size_t length;
        size_t available;
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore)(RE_CODE ch1, RE_CODE ch2);
        RE_CHAR* text_p;
        RE_CODE* values;
        size_t i;

        length = test->value_count;
        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (length > available)
            return FALSE;

        encoding = state->encoding;
        same_char_ignore = encoding->same_char_ignore;

        text_p = text_ptr - length;
        values = test->values;
        for (i = 0; i < length; i++) {
            if (!same_char_ignore(text_p[i], values[i]))
                return FALSE;
        }
        break;
    }
    case RE_OP_STRING_REV: /* A string literal. */
    {
        size_t length;
        size_t available;
        RE_CHAR* text_p;
        RE_CODE* values;
        size_t i;

        length = test->value_count;
        available = text_ptr - (RE_CHAR*)state->slice_start;
        if (length > available)
            return FALSE;

        text_p = text_ptr - length;
        values = test->values;
        for (i = 0; i < length; i++) {
            if (text_p[i] != values[i])
                return FALSE;
        }
        break;
    }
    }

    next_position->node = next->match_next;
    next_position->text_ptr = text_ptr + next->match_step;

    return TRUE;
}

/* Searches for the start of a match. */
Py_LOCAL(BOOL) RE_SEARCH_CONTEXT(RE_State* state, RE_NextNode* next,
  RE_Position* new_position) {
    RE_CHAR* slice_start;
    RE_CHAR* slice_end;
    RE_CHAR* text_ptr;
    RE_CHAR* limit_ptr;
    RE_Node* test = next->test;
    RE_Node* node = next->node;

    slice_start = state->slice_start;
    slice_end = state->slice_end;
    text_ptr = state->text_ptr;

    if (state->reverse) {
        limit_ptr = slice_start + state->min_width;
        if (text_ptr < limit_ptr)
            return FALSE;
    } else {
        limit_ptr = slice_end - state->min_width;
        if (text_ptr > limit_ptr)
            return FALSE;
    }

    switch (test->op) {
    case RE_OP_ANY: /* Any character, except a newline. */
        for (;;) {
            if (text_ptr[0] != '\n')
                break;
            ++text_ptr;
            if (text_ptr > limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + 1;
            return TRUE;
        }
        break;
    case RE_OP_ANY_ALL: /* Any character at all. */
        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + 1;
            return TRUE;
        }
        break;
    case RE_OP_ANY_ALL_REV: /* Any character at all. */
        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr - 1;
            return TRUE;
        }
        break;
    case RE_OP_ANY_REV: /* Any character, except a newline. */
        for (;;) {
            if (text_ptr[-1] != '\n')
                break;
            --text_ptr;
            if (text_ptr < limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr - 1;
            return TRUE;
        }
        break;
    case RE_OP_BOUNDARY: /* At a word boundary. */
    {
        BOOL match;
        Py_ssize_t step;

        match = test->match;
        step = state->reverse ? -1 : 1;

        for (;;) {
            if (RE_AT_BOUNDARY(state, text_ptr) == match)
                break;
            if (text_ptr == limit_ptr)
                return FALSE;
            text_ptr += step;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr;
            return TRUE;
        }
        break;
    }
    case RE_OP_CATEGORY: /* A character category. */
    {
        /* values are: category */
        BOOL (*in_category)(RE_CODE, RE_CODE);
        BOOL match;
        RE_CODE category;

        in_category = state->encoding->in_category;
        match = test->match;
        category = test->values[0];

        for (;;) {
            if (in_category(category, text_ptr[0]) == match)
                break;
            ++text_ptr;
            if (text_ptr > limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_CATEGORY_REV: /* A character category. */
    {
        /* values are: category */
        BOOL (*in_category)(RE_CODE, RE_CODE);
        BOOL match;
        RE_CODE category;

        in_category = state->encoding->in_category;
        match = test->match;
        category = test->values[0];

        for (;;) {
            if (in_category(category, text_ptr[-1]) == match)
                break;
            --text_ptr;
            if (text_ptr < limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_CHARACTER: /* A character literal. */
    {
        BOOL match;
        RE_CODE ch;

        match = test->match;
        ch = test->values[0];

        for (;;) {
            if ((text_ptr[0] == ch) == match)
                break;
            ++text_ptr;
            if (text_ptr > limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_CHARACTER_IGNORE: /* A character literal, ignoring case. */
    {
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE ch2_lower,
          RE_CODE ch2_upper, RE_CODE ch2_title);
        BOOL match;
        RE_CODE ch;
        RE_CODE ch_lower;
        RE_CODE ch_upper;
        RE_CODE ch_title;

        encoding = state->encoding;
        same_char_ignore_3 = encoding->same_char_ignore_3;
        match = test->match;
        ch = test->values[0];
        ch_lower = encoding->lower(ch);
        ch_upper = encoding->upper(ch);
        ch_title = encoding->title(ch);

        for (;;) {
            if (same_char_ignore_3(text_ptr[0], ch, ch_lower, ch_upper,
              ch_title) == match)
                break;
            ++text_ptr;
            if (text_ptr > limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_CHARACTER_REV: /* A character literal. */
    {
        BOOL match;
        RE_CODE ch;

        match = test->match;
        ch = test->values[0];

        for (;;) {
            if ((text_ptr[-1] == ch) == match)
                break;
            --text_ptr;
            if (text_ptr < limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_CHARACTER_IGNORE_REV: /* A character literal, ignoring case. */
    {
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE ch2_lower,
          RE_CODE ch2_upper, RE_CODE ch2_title);
        BOOL match;
        RE_CODE ch;
        RE_CODE ch_lower;
        RE_CODE ch_upper;
        RE_CODE ch_title;

        encoding = state->encoding;
        same_char_ignore_3 = encoding->same_char_ignore_3;
        match = test->match;
        ch = test->values[0];
        ch_lower = encoding->lower(ch);
        ch_upper = encoding->upper(ch);
        ch_title = encoding->title(ch);

        for (;;) {
            if (same_char_ignore_3(text_ptr[-1], ch, ch_lower, ch_upper,
              ch_title) == match)
                break;
            --text_ptr;
            if (text_ptr < limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_END_OF_LINE: /* At the end of a line. */
    {
        RE_CHAR* text_end;
        Py_ssize_t step;

        text_end = state->text_end;
        step = state->reverse ? -1 : 1;

        for (;;) {
            if (text_ptr == text_end || text_ptr[0] == '\n')
                break;
            if (text_ptr == limit_ptr)
                return FALSE;
            text_ptr += step;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr;
            return TRUE;
        }
        break;
    }
    case RE_OP_END_OF_STRING: /* At the end of the string. */
        if (state->reverse) {
            if (text_ptr != state->text_end)
                return FALSE;
        } else {
            if (slice_end != state->text_end)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = state->text_end;
            new_position->node = node->next_1.node;
            new_position->text_ptr = state->text_end;
            return TRUE;
        }
        break;
    case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */
        if (state->reverse) {
            if (text_ptr >= (RE_CHAR*)state->text_end)
                text_ptr = state->text_end;
            else if (text_ptr >= (RE_CHAR*)state->final_newline)
                text_ptr = state->final_newline;
            else
                return FALSE;

            if (text_ptr < slice_start)
                return FALSE;
        } else {
            if (text_ptr <= (RE_CHAR*)state->final_newline)
                text_ptr = state->final_newline;
            else if (text_ptr <= (RE_CHAR*)state->text_end)
                text_ptr = state->text_end;
            else
                return FALSE;

            if (text_ptr > slice_end)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr;
            return TRUE;
        }
        break;
    case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */
        if (state->reverse) {
            if (text_ptr < (RE_CHAR*)state->search_anchor)
                return FALSE;
        } else {
            if (text_ptr > (RE_CHAR*)state->search_anchor)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = state->search_anchor;
            new_position->node = node->next_1.node;
            new_position->text_ptr = state->search_anchor;
            return TRUE;
        }
        break;
    case RE_OP_SET: /* Character set. */
    {
        RE_EncodingTable* encoding;

        encoding = state->encoding;

        for (;;) {
            if (in_set(encoding, test, text_ptr[0]))
                break;
            ++text_ptr;
            if (text_ptr > limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_SET_IGNORE: /* Character set, ignoring case. */
    {
        RE_EncodingTable* encoding;

        encoding = state->encoding;

        for (;;) {
            if (in_set_ignore(encoding, test, text_ptr[0]))
                break;
            ++text_ptr;
            if (text_ptr > limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_SET_IGNORE_REV: /* Character set, ignoring case. */
    {
        RE_EncodingTable* encoding;

        encoding = state->encoding;

        for (;;) {
            if (in_set_ignore(encoding, test, text_ptr[-1]))
                break;
            --text_ptr;
            if (text_ptr < limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_SET_REV: /* Character set. */
    {
        RE_EncodingTable* encoding;

        encoding = state->encoding;

        for (;;) {
            if (in_set(encoding, test, text_ptr[-1]))
                break;
            --text_ptr;
            if (text_ptr < limit_ptr)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + node->step;
            return TRUE;
        }
        break;
    }
    case RE_OP_START_OF_LINE: /* At the start of a line. */
    {
        RE_CHAR* text_start;
        Py_ssize_t step;

        text_start = state->text_start;
        step = state->reverse ? -1 : 1;

        for (;;) {
            if (text_ptr == text_start || text_ptr[-1] == '\n')
                break;
            if (text_ptr == limit_ptr)
                return FALSE;
            text_ptr += step;
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr;
            return TRUE;
        }
        break;
    }
    case RE_OP_START_OF_STRING: /* At the start of the string. */
        if (state->reverse) {
            if (slice_start != state->text_start)
                return FALSE;
        } else {
            if (text_ptr != state->text_start)
                return FALSE;
        }

        if (test == node) {
            state->match_ptr = state->text_start;
            new_position->node = node->next_1.node;
            new_position->text_ptr = state->text_start;
            return TRUE;
        }
        break;
    case RE_OP_STRING: /* A string literal. */
    {
        Py_ssize_t length;
        RE_CODE* values;

        length = test->value_count;
        values = test->values;

        /* Are we going to use a Boyer-Moore fast string search? */
        if (state->pattern->bad_character_offset) {
            Py_ssize_t* bad_character_offset;
            Py_ssize_t* good_suffix_offset;
            Py_ssize_t last_pos;
            RE_CODE last_char;

            bad_character_offset = state->pattern->bad_character_offset;
            good_suffix_offset = state->pattern->good_suffix_offset;

            last_pos = length - 1;
            last_char = values[last_pos];

            for (;;) {
                if (text_ptr[last_pos] == last_char) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = last_pos - 1; match && pos >= 0; pos--)
                        match = text_ptr[pos] == values[pos];
                    if (match)
                        break;

                    text_ptr += good_suffix_offset[pos + 1];
                } else
                    text_ptr += bad_character_offset[text_ptr[last_pos] & 0xFF];

                if (text_ptr > limit_ptr)
                    return FALSE;
            }
        } else {
            RE_CODE first_char;

            first_char = values[0];

            for (;;) {
                if (text_ptr[0] == first_char) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = 1; match && pos < length; pos++)
                        match = text_ptr[pos] == values[pos];
                    if (match)
                        break;
                }

                ++text_ptr;

                if (text_ptr > limit_ptr)
                    return FALSE;
            }
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + length;
            return TRUE;
        }
        break;
    }
    case RE_OP_STRING_IGNORE: /* A string literal, ignoring case. */
    {
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore)(RE_CODE ch1, RE_CODE ch2);
        BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE ch2_lower,
          RE_CODE ch2_upper, RE_CODE ch2_title);
        Py_ssize_t length;
        RE_CODE* values;

        encoding = state->encoding;
        same_char_ignore = encoding->same_char_ignore;
        same_char_ignore_3 = encoding->same_char_ignore_3;

        length = test->value_count;
        values = test->values;

        /* Are we going to use a Boyer-Moore fast string search? */
        if (state->pattern->bad_character_offset) {
            Py_ssize_t* bad_character_offset;
            Py_ssize_t* good_suffix_offset;
            Py_ssize_t last_pos;
            RE_CODE last_char;
            RE_CODE last_lower;
            RE_CODE last_upper;
            RE_CODE last_title;

            bad_character_offset = state->pattern->bad_character_offset;
            good_suffix_offset = state->pattern->good_suffix_offset;

            last_pos = length - 1;
            last_char = values[last_pos];
            last_lower = encoding->lower(last_char);
            last_upper = encoding->upper(last_char);
            last_title = encoding->title(last_char);

            for (;;) {
                if (same_char_ignore_3(text_ptr[last_pos], last_char,
                  last_lower, last_upper, last_title)) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = last_pos - 1; match && pos >= 0; pos--)
                        match = same_char_ignore(text_ptr[pos], values[pos]);
                    if (match)
                        break;

                    text_ptr += good_suffix_offset[pos + 1];
                } else
                    text_ptr += bad_character_offset[text_ptr[last_pos] & 0xFF];

                if (text_ptr > limit_ptr)
                    return FALSE;
            }
        } else {
            RE_CODE first_char;
            RE_CODE first_lower;
            RE_CODE first_upper;
            RE_CODE first_title;

            first_char = values[0];
            first_lower = encoding->lower(first_char);
            first_upper = encoding->upper(first_char);
            first_title = encoding->title(first_char);

            for (;;) {
                if (same_char_ignore_3(text_ptr[0], first_char, first_lower,
                  first_upper, first_title)) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = 1; match && pos < length; pos++)
                        match = same_char_ignore(text_ptr[pos], values[pos]);
                    if (match)
                        break;
                }

                ++text_ptr;

                if (text_ptr > limit_ptr)
                    return FALSE;
            }
        }

        if (test == node) {
            state->match_ptr = text_ptr;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr + length;
            return TRUE;
        }
        break;
    }
    case RE_OP_STRING_IGNORE_REV: /* A string literal, ignoring case. */
    {
        RE_EncodingTable* encoding;
        BOOL (*same_char_ignore)(RE_CODE ch1, RE_CODE ch2);
        BOOL (*same_char_ignore_3)(RE_CODE ch1, RE_CODE ch2, RE_CODE ch2_lower,
          RE_CODE ch2_upper, RE_CODE ch2_title);
        Py_ssize_t length;
        RE_CODE* values;

        encoding = state->encoding;
        same_char_ignore = encoding->same_char_ignore;
        same_char_ignore_3 = encoding->same_char_ignore_3;

        length = test->value_count;
        values = test->values;

        text_ptr -= length;
        limit_ptr -= length;

        /* Are we going to use a Boyer-Moore fast string search? */
        if (state->pattern->bad_character_offset) {
            Py_ssize_t* bad_character_offset;
            Py_ssize_t* good_suffix_offset;
            RE_CODE first_char;
            RE_CODE first_lower;
            RE_CODE first_upper;
            RE_CODE first_title;

            bad_character_offset = state->pattern->bad_character_offset;
            good_suffix_offset = state->pattern->good_suffix_offset;

            first_char = values[0];
            first_lower = encoding->lower(first_char);
            first_upper = encoding->upper(first_char);
            first_title = encoding->title(first_char);

            for (;;) {
                if (same_char_ignore_3(text_ptr[0], first_char, first_lower,
                  first_upper, first_title)) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = 1; match && pos < length; pos++)
                        match = same_char_ignore(text_ptr[pos], values[pos]);
                    if (match)
                        break;

                    text_ptr += good_suffix_offset[pos - 1];
                } else
                    text_ptr += bad_character_offset[text_ptr[0] & 0xFF];

                if (text_ptr < limit_ptr)
                    return FALSE;
            }
        } else {
            RE_CODE first_char;
            RE_CODE first_lower;
            RE_CODE first_upper;
            RE_CODE first_title;

            first_char = values[0];
            first_lower = encoding->lower(first_char);
            first_upper = encoding->upper(first_char);
            first_title = encoding->title(first_char);

            for (;;) {
                if (same_char_ignore_3(text_ptr[0], first_char, first_lower,
                  first_upper, first_title)) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = 1; match && pos < length; pos++)
                        match = same_char_ignore(text_ptr[pos], values[pos]);
                    if (match)
                        break;
                }

                --text_ptr;

                if (text_ptr < limit_ptr)
                    return FALSE;
            }
        }

        if (test == node) {
            state->match_ptr = text_ptr + length;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr;
            return TRUE;
        }
        break;
    }
    case RE_OP_STRING_REV: /* A string literal. */
    {
        Py_ssize_t length;
        RE_CODE* values;

        length = test->value_count;
        values = test->values;

        text_ptr -= length;
        limit_ptr -= length;

        /* Are we going to use a Boyer-Moore fast string search? */
        if (state->pattern->bad_character_offset) {
            Py_ssize_t* bad_character_offset;
            Py_ssize_t* good_suffix_offset;
            RE_CODE first_char;

            bad_character_offset = state->pattern->bad_character_offset;
            good_suffix_offset = state->pattern->good_suffix_offset;

            first_char = values[0];

            for (;;) {
                if (text_ptr[0] == first_char) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = 1; match && pos < length; pos++)
                        match = text_ptr[pos] == values[pos];
                    if (match)
                        break;

                    text_ptr += good_suffix_offset[pos - 1];
                } else
                    text_ptr += bad_character_offset[text_ptr[0] & 0xFF];

                if (text_ptr < limit_ptr)
                    return FALSE;
            }
        } else {
            RE_CODE first_char;

            first_char = values[0];

            for (;;) {
                if (text_ptr[0] == first_char) {
                    BOOL match = TRUE;
                    Py_ssize_t pos;

                    for (pos = 1; match && pos < length; pos++)
                        match = text_ptr[pos] == values[pos];
                    if (match)
                        break;
                }

                --text_ptr;

                if (text_ptr < limit_ptr)
                    return FALSE;
            }
        }

        if (test == node) {
            state->match_ptr = text_ptr + length;
            new_position->node = node->next_1.node;
            new_position->text_ptr = text_ptr;
            return TRUE;
        }
        break;
    }
    }

    /* It's a possible match. */
    state->match_ptr = text_ptr;
    new_position->node = node;
    new_position->text_ptr = text_ptr;
    return TRUE;
}

/* Performs a depth-first match or search from the context. */
Py_LOCAL(int) RE_MATCH_CONTEXT(RE_State* state, RE_Node* start_node, BOOL
  search) {
    PatternObject* pattern;
    RE_CHAR* text_start;
    RE_CHAR* text_end;
    RE_CHAR* slice_start;
    RE_CHAR* slice_end;
    RE_CHAR* text_ptr;
    RE_NextNode start_pair;
    RE_EncodingTable* encoding;
    size_t iterations;
    RE_CHAR* final_newline;
    BOOL (*in_category)(RE_CODE, RE_CODE);
    BOOL (*same_char_ignore)(RE_CODE, RE_CODE);
    BOOL (*same_char_ignore_3)(RE_CODE ch_1, RE_CODE ch_2, RE_CODE ch_2_lower,
      RE_CODE ch_2_upper, RE_CODE ch_2_title);
    RE_GroupInfo* group_info;
    RE_RepeatInfo* repeat_info;
    Py_ssize_t step;
    RE_Node* node;
    TRACE(("<<RE_MATCH_CONTEXT>>\n"))

    pattern = state->pattern;

    text_start = state->text_start;
    text_end = state->text_end;
    slice_start = state->slice_start;
    slice_end = state->slice_end;
    text_ptr = state->text_ptr;

    /* Look beyond any initial group node. */
    start_pair.node = start_node;
    start_pair.test = start_node;
    while (start_pair.test->op == RE_OP_BEGIN_GROUP || start_pair.test->op ==
      RE_OP_END_GROUP)
        start_pair.test = start_pair.test->next_1.node;

    /* Is the pattern anchored to the start or end of the string? */
    switch (start_pair.test->op) {
    case RE_OP_START_OF_STRING:
        /* The pattern can be successful only at the start of the string. */
        if (!state->reverse && text_ptr != text_start)
            return RE_ERROR_FAILURE;

        /* Don't bother to search further because it's anchored. */
        search = FALSE;
        break;
    case RE_OP_END_OF_STRING:
        /* The pattern can be successful only at the end of the string. */
        if (state->reverse && text_ptr != text_end)
            return RE_ERROR_FAILURE;

        /* Don't bother to search further because it's anchored. */
        search = FALSE;
        break;
    }

    encoding = state->encoding;
    iterations = 0;
    final_newline = state->final_newline;
    in_category = encoding->in_category;
    same_char_ignore = encoding->same_char_ignore;
    same_char_ignore_3 = encoding->same_char_ignore_3;
    group_info = pattern->group_info;
    repeat_info = pattern->repeat_info;
    step = state->reverse ? -1 : 1;

    /* Save the groups in case we need to restore them for searching. */
    if (!push_groups(state))
        return RE_ERROR_MEMORY;

    /* Add a backtrack entry for failure. */
    if (!add_backtrack(state, RE_OP_FAILURE))
        return RE_ERROR_MEMORY;

start_match:
    /* If we're searching, advance along the string until there could be a
     * match.
     */
    if (search) {
        RE_Position new_position;

next_match:
        if (!RE_SEARCH_CONTEXT(state, &start_pair, &new_position))
            return RE_ERROR_FAILURE;

        node = new_position.node;
        text_ptr = new_position.text_ptr;

        if (node->op == RE_OP_SUCCESS) {
            /* Must the match advance past its start? */
            if (text_ptr != state->search_anchor || !state->must_advance) {
                state->text_ptr = text_ptr;
                return RE_ERROR_SUCCESS;
            }

            state->text_ptr = (RE_CHAR*)state->match_ptr + step;
            goto next_match;
        }
    } else
        node = start_node;

advance:
    /* The main matching loop. */
    for (;;) {
        TRACE(("%d|", text_ptr - state->text_start))

        switch (node->op) {
        case RE_OP_ANY: /* Any character, except a newline. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr >= slice_end || text_ptr[0] == '\n')
                goto backtrack;
            ++text_ptr;
            node = node->next_1.node;
            break;
        case RE_OP_ANY_ALL: /* Any character at all. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr >= slice_end)
                goto backtrack;
            ++text_ptr;
            node = node->next_1.node;
            break;
        case RE_OP_ANY_ALL_REV: /* Any character at all. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr <= slice_start)
                goto backtrack;
            --text_ptr;
            node = node->next_1.node;
            break;
        case RE_OP_ANY_REV: /* Any character, except a newline. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr <= slice_start || text_ptr[-1] == '\n')
                goto backtrack;
            --text_ptr;
            node = node->next_1.node;
            break;
        case RE_OP_ATOMIC: /* Atomic subpattern. */
        {
            RE_BacktrackBlock* current_block;
            size_t backtrack_count;
            size_t saved_groups_count;
            BOOL must_advance;
            int status;
            TRACE(("%s\n", re_op_text[node->op]))

            /* Try to match the subpattern. */
            current_block = state->current_block;
            backtrack_count = current_block->count;
            saved_groups_count = state->saved_groups_count;
            must_advance = state->must_advance;
            state->text_ptr = text_ptr;
            state->must_advance = FALSE;

            status = RE_MATCH_CONTEXT(state, node->next_2.node, FALSE);

            if (status < 0)
                return status;

            state->must_advance = must_advance;
            state->saved_groups_count = saved_groups_count;
            current_block->count = backtrack_count;
            state->current_block = current_block;

            if (status != RE_ERROR_SUCCESS)
                goto backtrack;

            node = node->next_1.node;
            text_ptr = state->text_ptr;
            break;
        }
        case RE_OP_BEGIN_GROUP: /* Beginning of capture group. */
        {
            Py_ssize_t ofs;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;
            state->data[ofs].group.begin = text_ptr;
            node = node->next_1.node;
            break;
        }
        case RE_OP_BOUNDARY: /* At a word boundary. */
            TRACE(("%s %d\n", re_op_text[node->op], node->match))
            if (RE_AT_BOUNDARY(state, text_ptr) != node->match)
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_BRANCH: /* 2-way branch. */
        {
            BOOL try_first;
            RE_Position next_first_position;
            BOOL try_second;
            RE_Position next_second_position;

            try_first = RE_TRY_MATCH(state, &node->next_1, text_ptr,
              &next_first_position);
            try_second = RE_TRY_MATCH(state, &node->next_2, text_ptr,
              &next_second_position);
            if (try_first) {
                if (try_second) {
                    if (!add_backtrack(state, RE_OP_BRANCH) ||
                      !push_groups(state))
                        return RE_ERROR_MEMORY;
                    state->backtrack->branch.position = next_second_position;
                }
                node = next_first_position.node;
                text_ptr = next_first_position.text_ptr;
            } else {
                if (try_second) {
                    node = next_second_position.node;
                    text_ptr = next_second_position.text_ptr;
                } else
                    goto backtrack;
            }
            break;
        }
        case RE_OP_CATEGORY: /* A character category. */
            /* values are: category */
            TRACE(("%s %d %d\n", re_op_text[node->op], node->match,
              node->values[0]))
            if (text_ptr >= slice_end || in_category(node->values[0],
              text_ptr[0]) != node->match)
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_CATEGORY_REV: /* A character category. */
            /* values are: category */
            TRACE(("%s %d %d\n", re_op_text[node->op], node->match,
              node->values[0]))
            if (text_ptr <= slice_start || in_category(node->values[0],
              text_ptr[-1]) != node->match)
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_CHARACTER: /* A character literal. */
            TRACE(("%s %d %d\n", re_op_text[node->op], node->match,
              node->values[0]))
            if (text_ptr >= slice_end || (text_ptr[0] == node->values[0]) !=
              node->match)
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_CHARACTER_IGNORE: /* A character literal, ignoring case. */
            TRACE(("%s %d %d\n", re_op_text[node->op], node->match,
              node->values[0]))
            if (text_ptr >= slice_end || same_char_ignore(text_ptr[0],
              node->values[0]) != node->match)
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_CHARACTER_IGNORE_REV: /* A character literal, ignoring case. */
            TRACE(("%s %d %d\n", re_op_text[node->op], node->match,
              node->values[0]))
            if (text_ptr <= slice_start || same_char_ignore(text_ptr[-1],
              node->values[0]) != node->match)
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_CHARACTER_REV: /* A character literal. */
            TRACE(("%s %d %d\n", re_op_text[node->op], node->match,
              node->values[0]))
            if (text_ptr <= slice_start || (text_ptr[-1] == node->values[0]) !=
              node->match)
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */
        {
            Py_ssize_t ofs;
            RE_RepeatData* rp_data;
            BOOL try_body;
            RE_Position next_body_position;
            BOOL try_tail;
            RE_Position next_tail_position;
            RE_BacktrackData* bt_data;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the repeat values in the context. Repeat
             * indexes are 0-based.
             */
            ofs = repeat_info[node->values[0]].value_offset;
            rp_data = &state->data[ofs].repeat;

            /* Matched the body again. */
            ++rp_data->count;

            /* Could the body or tail match?
             *
             * We won't try to match the body again if it didn't consume any
             * characters this time because that could stop us from advancing
             * through the text.
             */
            try_body = rp_data->count < rp_data->max_count &&
              RE_TRY_MATCH(state, &node->next_1, text_ptr, &next_body_position);
            try_tail = rp_data->count >= node->values[1] && RE_TRY_MATCH(state,
              &node->next_2, text_ptr, &next_tail_position);
            if (!try_body && !try_tail) {
                /* Neither the body nor the tail could match. */
                --rp_data->count;
                goto backtrack;
            }

            if (try_body) {
                if (try_tail) {
                    /* Both the body and the tail could match, but the body
                     * takes precedence. If the body fails to match then we want
                     * to try the tail before backtracking into the body.
                     */

                    /* Record backtracking info for backtracking into the body.
                     */
                    bt_data = last_backtrack(state);
                    if (bt_data->op == RE_OP_END_GREEDY_REPEAT &&
                      !bt_data->repeat.position.node && bt_data->repeat.ofs ==
                      ofs) {
                        /* The last backtrack entry is for backtracking into the
                         * body like we want to do now, so we can save work by
                         * just re-using it.
                         */
                    } else {
                        if (!add_backtrack(state, RE_OP_END_GREEDY_REPEAT))
                            return RE_ERROR_MEMORY;
                        bt_data = state->backtrack;
                        bt_data->repeat.position.node = NULL; /* Restore then backtrack. */
                        bt_data->repeat.ofs = ofs;
                    }
                    bt_data->repeat.count = rp_data->count - 1;
                    bt_data->repeat.max_count = rp_data->max_count;

                    /* Record backtracking info for matching the tail. */
                    if (!add_backtrack(state, RE_OP_END_GREEDY_REPEAT) ||
                      !push_groups(state))
                        return RE_ERROR_MEMORY;
                    bt_data = state->backtrack;
                    bt_data->repeat.position = next_tail_position;
                    bt_data->repeat.ofs = ofs;
                    bt_data->repeat.count = rp_data->count;
                    bt_data->repeat.max_count = rp_data->max_count;
                } else {
                    /* Only the body could match. If the body fails to match
                     * then we want to backtrack into the body.
                     */

                    /* Record backtracking info for backtracking into the body.
                     */
                    bt_data = last_backtrack(state);
                    if (bt_data->op == RE_OP_END_GREEDY_REPEAT &&
                      !bt_data->repeat.position.node && bt_data->repeat.ofs ==
                      ofs) {
                        /* The last backtrack entry is for backtracking into the
                         * body like we want to do now, so we can save work by
                         * just re-using it.
                         */
                    } else {
                        if (!add_backtrack(state, RE_OP_END_GREEDY_REPEAT))
                            return RE_ERROR_MEMORY;
                        bt_data = state->backtrack;
                        bt_data->repeat.position.node = NULL; /* Restore then backtrack. */
                        bt_data->repeat.ofs = ofs;
                    }
                    bt_data->repeat.count = rp_data->count - 1;
                    bt_data->repeat.max_count = rp_data->max_count;
                }

                /* Advance into the body. */
                node = next_body_position.node;
                text_ptr = next_body_position.text_ptr;
            } else {
                /* Only the tail could match. If the tail fails to match then we
                 * want to backtrack into the body.
                 */

                /* Advance into the tail. */
                node = next_tail_position.node;
                text_ptr = next_tail_position.text_ptr;
            }
            break;
        }
        case RE_OP_END_GROUP: /* End of a capture group. */
        {
            Py_ssize_t ofs;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;
            state->data[ofs].group.end = text_ptr;
            node = node->next_1.node;
            break;
        }
        case RE_OP_END_LAZY_REPEAT: /* End of a lazy repeat. */
        {
            Py_ssize_t ofs;
            RE_RepeatData* rp_data;
            BOOL try_body;
            BOOL try_tail;
            RE_BacktrackData* bt_data;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the repeat values in the context. Repeat
             * indexes are 0-based.
             */
            ofs = repeat_info[node->values[0]].value_offset;
            rp_data = &state->data[ofs].repeat;

            /* Matched the body again. */
            ++rp_data->count;

            /* Could the body or tail match?
             *
             * We won't try to match the body again if it didn't consume any
             * characters this time because that could stop us from advancing
             * through the text.
             */
            try_body = rp_data->count < rp_data->max_count;
            try_tail = rp_data->count >= node->values[1];
            if (try_body) {
                if (try_tail) {
                    /* Both the body and the tail could match, but the tail
                     * takes preference. If the tail fails to match then we want
                     * to try the body again before backtracking into the head.
                     */

                    /* Record backtracking info for backtracking into the body.
                     */
                    if (!add_backtrack(state, RE_OP_END_LAZY_REPEAT) ||
                      !push_groups(state))
                        return RE_ERROR_MEMORY;
                    bt_data = state->backtrack;
                    bt_data->repeat.ofs = ofs;
                    bt_data->repeat.position.node = node->next_1.node;
                    bt_data->repeat.position.text_ptr = text_ptr;

                    /* Advance into the tail. */
                    node = node->next_2.node;
                } else
                    /* Only the body could match. */

                    /* Advance into the body. */
                    node = node->next_1.node;
            } else
                /* Only the tail could match. */

                /* Advance into the tail. */
                node = node->next_2.node;
            break;
        }
        case RE_OP_END_OF_LINE: /* At the end of a line. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr != text_end && text_ptr[0] != '\n')
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_END_OF_STRING: /* At the end of the string. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr != text_end)
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr != text_end && text_ptr != final_newline)
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */
        {
            Py_ssize_t ofs;
            RE_RepeatData* rp_data;
            size_t max_count;
            BOOL try_tail;
            RE_Position next_tail_position;
            RE_BacktrackData* bt_data;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the repeat values in the context. Repeat
             * indexes are 0-based.
             */
            ofs = repeat_info[node->values[0]].value_offset;
            rp_data = &state->data[ofs].repeat;

            /* We might need to backtrack into the head. */
            if (!add_backtrack(state, RE_OP_GREEDY_REPEAT) ||
              !push_groups(state))
                return RE_ERROR_MEMORY;
            bt_data = state->backtrack;
            bt_data->repeat.ofs = ofs;
            bt_data->repeat.count = rp_data->count;
            bt_data->repeat.max_count = rp_data->max_count;

            max_count = node->values[3] ? slice_end - text_ptr : text_ptr -
              slice_start;
            if (max_count > node->values[2])
                max_count = node->values[2];

            rp_data->count = 0;
            rp_data->max_count = max_count;

            /* Does the body have to match at all? */
            try_tail = node->values[1] == 0 && RE_TRY_MATCH(state,
              &node->next_2, text_ptr, &next_tail_position);
            if (try_tail) {
                /* The body doesn't have to match, but it takes precedence over
                 * the tail. If the body fails to match then we want to try the
                 * tail before backtracking into the head.
                 */

                /* Record backtracking info for matching the tail. */
                if (!add_backtrack(state, RE_OP_END_GREEDY_REPEAT) ||
                  !push_groups(state))
                    return RE_ERROR_MEMORY;
                bt_data = state->backtrack;
                bt_data->repeat.position = next_tail_position;
                bt_data->repeat.ofs = ofs;
                bt_data->repeat.count = rp_data->count;
                bt_data->repeat.max_count = rp_data->max_count;
            }

            /* Advance into the body. */
            node = node->next_1.node;
            break;
        }
        case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */
        {
            size_t count;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Count how many times the character repeats, up to the maximum. */
            count = RE_COUNT_ONE(state, node->next_2.node, text_ptr,
              node->values[2]);

            /* Have we matched at least the minimum? */
            if (count < node->values[1])
                goto backtrack;

            if (count > node->values[1]) {
                /* The match is longer than the minimum, so we might need to
                 * backtrack the repeat to consume less.
                 */
                Py_ssize_t ofs;
                RE_RepeatData* rp_data;
                RE_BacktrackData* bt_data;

                /* Get the offset to the repeat values in the context. Repeat
                 * indexes are 0-based.
                 */
                ofs = repeat_info[node->values[0]].value_offset;
                rp_data = &state->data[ofs].repeat;
                if (!add_backtrack(state, RE_OP_GREEDY_REPEAT_ONE) ||
                  !push_groups(state))
                    return RE_ERROR_MEMORY;
                bt_data = state->backtrack;
                bt_data->repeat.position.node = node;
                bt_data->repeat.ofs = ofs;
                bt_data->repeat.start = rp_data->start;
                bt_data->repeat.count = rp_data->count;

                rp_data->start = text_ptr;
                rp_data->count = count;
            }

            text_ptr += (Py_ssize_t)count * node->step;
            node = node->next_1.node;
            break;
        }
        case RE_OP_GROUP_EXISTS: /* Capture group exists. */
        {
            Py_ssize_t ofs;
            RE_CHAR* begin_group;
            RE_CHAR* end_group;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;

            /* Check whether the captured text, if any, exists at this position
             * in the string.
             */
            begin_group = state->data[ofs].group.begin;
            end_group = state->data[ofs].group.end;
            if (text_start <= begin_group && begin_group <= end_group &&
              end_group <= text_end)
                node = node->next_1.node;
            else
                node = node->next_2.node;
            break;
        }
        case RE_OP_LAZY_REPEAT: /* Lazy repeat. */
        {
            Py_ssize_t ofs;
            RE_RepeatData* rp_data;
            size_t max_count;
            RE_BacktrackData* bt_data;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the repeat values in the context. Repeat
             * indexes are 0-based.
             */
            ofs = repeat_info[node->values[0]].value_offset;
            rp_data = &state->data[ofs].repeat;

            /* We might need to backtrack into the head. */
            if (!add_backtrack(state, RE_OP_LAZY_REPEAT) || !push_groups(state))
                return RE_ERROR_MEMORY;
            bt_data = state->backtrack;
            bt_data->repeat.ofs = ofs;
            bt_data->repeat.count = rp_data->count;
            bt_data->repeat.max_count = rp_data->max_count;

            max_count = node->values[3] ? slice_end - text_ptr : text_ptr -
              slice_start;
            if (max_count > node->values[2])
                max_count = node->values[2];

            rp_data->count = 0;
            rp_data->max_count = max_count;

            /* Does the body have to match at all? */
            if (node->values[1] == 0) {
                /* The body doesn't have to match, but the tail takes precedence
                 * over it. If the tail fails to match then we want to try the
                 * body again before backtracking into the head.
                 */

                /* Record backtracking info for matching the body. */
                if (!add_backtrack(state, RE_OP_END_LAZY_REPEAT) ||
                  !push_groups(state))
                    return RE_ERROR_MEMORY;
                bt_data = state->backtrack;
                bt_data->repeat.ofs = ofs;
                bt_data->repeat.position.node = node->next_1.node;
                bt_data->repeat.position.text_ptr = text_ptr;

                /* Advance into the tail. */
                node = node->next_2.node;
            } else {
                /* The body must match. */

                /* Advance into the body. */
                node = node->next_1.node;
            }
            break;
        }
        case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */
        {
            size_t count;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Count how many times the character repeats, up to the minimum. */
            count = RE_COUNT_ONE(state, node->next_2.node, text_ptr,
              node->values[1]);

            /* Have we matched at least the minimum? */
            if (count < node->values[1])
                goto backtrack;

            if (count < node->values[2]) {
                /* The match is shorter than the maximum, so we might need to
                 * backtrack the repeat to consume more.
                 */
                Py_ssize_t ofs;
                RE_RepeatData* rp_data;
                RE_BacktrackData* bt_data;

                /* Get the offset to the repeat values in the context. Repeat
                 * indexes are 0-based.
                 */
                ofs = repeat_info[node->values[0]].value_offset;
                rp_data = &state->data[ofs].repeat;
                if (!add_backtrack(state, RE_OP_LAZY_REPEAT_ONE) ||
                  !push_groups(state))
                    return RE_ERROR_MEMORY;
                bt_data = state->backtrack;
                bt_data->repeat.position.node = node;
                bt_data->repeat.ofs = ofs;
                bt_data->repeat.start = rp_data->start;
                bt_data->repeat.count = rp_data->count;

                rp_data->start = text_ptr;
                rp_data->count = count;
            }

            text_ptr += (Py_ssize_t)count * step;
            node = node->next_1.node;
            break;
        }
        case RE_OP_LOOKAROUND: /* Lookaround. */
        {
            RE_BacktrackBlock* current_block;
            size_t backtrack_count;
            size_t saved_groups_count;
            BOOL must_advance;
            int status;
            BOOL matched;
            TRACE(("%s %d\n", re_op_text[node->op], node->match))

            /* Try to match the subpattern. */
            current_block = state->current_block;
            backtrack_count = current_block->count;
            saved_groups_count = state->saved_groups_count;
            must_advance = state->must_advance;
            state->slice_start = text_start;
            state->slice_end = text_end;
            state->text_ptr = text_ptr;
            state->must_advance = FALSE;

            status = RE_MATCH_CONTEXT(state, node->next_2.node, FALSE);

            if (status < 0)
                return status;

            state->slice_end = slice_end;
            state->slice_start = slice_start;
            state->must_advance = must_advance;
            state->saved_groups_count = saved_groups_count;
            current_block->count = backtrack_count;
            state->current_block = current_block;

            matched = status == RE_ERROR_SUCCESS;
            if (matched != node->match)
                goto backtrack;

            node = node->next_1.node;
            break;
        }
        case RE_OP_REF_GROUP: /* Reference to a capture group. */
        {
            Py_ssize_t ofs;
            RE_CHAR* begin_group;
            RE_CHAR* end_group;
            size_t length;
            size_t available;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;

            /* Check whether the captured text, if any, exists at this position
             * in the string.
             */
            begin_group = state->data[ofs].group.begin;
            end_group = state->data[ofs].group.end;
            if (!(text_start <= begin_group && begin_group <= end_group &&
              end_group <= text_end))
                goto backtrack;

            length = end_group - begin_group;
            available = slice_end - text_ptr;
            if (length > available)
                goto backtrack;

            for (i = 0; i < length; i++) {
                if (text_ptr[i] != begin_group[i])
                    goto backtrack;
            }
            text_ptr += length;

            node = node->next_1.node;
            break;
        }
        case RE_OP_REF_GROUP_IGNORE: /* Reference to a capture group, ignoring case. */
        {
            Py_ssize_t ofs;
            RE_CHAR* begin_group;
            RE_CHAR* end_group;
            size_t length;
            size_t available;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;

            /* Check whether the captured text, if any, exists at this position
             * in the string.
             */
            begin_group = state->data[ofs].group.begin;
            end_group = state->data[ofs].group.end;
            if (!(text_start <= begin_group && begin_group <= end_group &&
              end_group <= text_end))
                goto backtrack;

            length = end_group - begin_group;
            available = slice_end - text_ptr;
            if (length > available)
                goto backtrack;

            for (i = 0; i < length; i++) {
                if (!same_char_ignore(text_ptr[i], begin_group[i]))
                    goto backtrack;
            }
            text_ptr += length;

            node = node->next_1.node;
            break;
        }
        case RE_OP_REF_GROUP_IGNORE_REV: /* Reference to a capture group, ignoring case. */
        {
            Py_ssize_t ofs;
            RE_CHAR* begin_group;
            RE_CHAR* end_group;
            size_t length;
            size_t available;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;

            /* Check whether the captured text, if any, exists at this position
             * in the string.
             */
            begin_group = state->data[ofs].group.begin;
            end_group = state->data[ofs].group.end;
            if (!(text_start <= begin_group && begin_group <= end_group &&
              end_group <= text_end))
                goto backtrack;

            length = end_group - begin_group;
            available = text_ptr - slice_start;
            if (length > available)
                goto backtrack;

            text_ptr -= length;
            for (i = 0; i < length; i++) {
                if (!same_char_ignore(text_ptr[i], begin_group[i]))
                    goto backtrack;
            }

            node = node->next_1.node;
            break;
        }
        case RE_OP_REF_GROUP_REV: /* Reference to a capture group. */
        {
            Py_ssize_t ofs;
            RE_CHAR* begin_group;
            RE_CHAR* end_group;
            size_t length;
            size_t available;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))

            /* Get the offset to the group value in the context. Capture group
             * indexes are 1-based (excluding group 0, which is the entire
             * matched string).
             */
            ofs = group_info[node->values[0] - 1].value_offset;

            /* Check whether the captured text, if any, exists at this position
             * in the string.
             */
            begin_group = state->data[ofs].group.begin;
            end_group = state->data[ofs].group.end;
            if (!(text_start <= begin_group && begin_group <= end_group &&
              end_group <= text_end))
                goto backtrack;

            length = end_group - begin_group;
            available = text_ptr - slice_start;
            if (length > available)
                goto backtrack;

            text_ptr -= length;
            for (i = 0; i < length; i++) {
                if (text_ptr[i] != begin_group[i])
                    goto backtrack;
            }

            node = node->next_1.node;
            break;
        }
        case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */
            TRACE(("%s %d\n", re_op_text[node->op], node->values[0]))
            if (text_ptr != state->search_anchor)
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_SET: /* Character set. */
            TRACE(("%s %d\n", re_op_text[node->op], node->match))
            if (text_ptr >= slice_end || !in_set(encoding, node, text_ptr[0]))
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_SET_IGNORE: /* Character set, ignoring case. */
            TRACE(("%s %d\n", re_op_text[node->op], node->match))
            if (text_ptr >= slice_end || !in_set_ignore(encoding, node,
              text_ptr[0]))
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_SET_IGNORE_REV: /* Character set, ignoring case. */
            TRACE(("%s %d\n", re_op_text[node->op], node->match))
            if (text_ptr <= slice_start || !in_set_ignore(encoding, node,
              text_ptr[-1]))
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_SET_REV: /* Character set. */
            TRACE(("%s %d\n", re_op_text[node->op], node->match))
            if (text_ptr <= slice_start || !in_set(encoding, node,
              text_ptr[-1]))
                goto backtrack;
            text_ptr += node->step;
            node = node->next_1.node;
            break;
        case RE_OP_START_OF_LINE: /* At the start of a line. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr != text_start && text_ptr[-1] != '\n')
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_START_OF_STRING: /* At the start of the string. */
            TRACE(("%s\n", re_op_text[node->op]))
            if (text_ptr != text_start)
                goto backtrack;
            node = node->next_1.node;
            break;
        case RE_OP_STRING: /* A string literal. */
        {
            size_t length;
            size_t available;
            RE_CODE* values;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->value_count))

            length = node->value_count;
            available = slice_end - text_ptr;
            if (length > available)
                goto backtrack;

            values = node->values;
            for (i = 0; i < length; i++) {
                if (text_ptr[i] != values[i])
                    goto backtrack;
            }
            text_ptr += length;
            node = node->next_1.node;
            break;
        }
        case RE_OP_STRING_IGNORE: /* A string literal, ignoring case. */
        {
            size_t length;
            size_t available;
            RE_CODE* values;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->value_count))

            length = node->value_count;
            available = slice_end - text_ptr;
            if (length > available)
                goto backtrack;

            values = node->values;
            for (i = 0; i < length; i++) {
                if (!same_char_ignore(text_ptr[i], values[i]))
                    goto backtrack;
            }
            text_ptr += length;
            node = node->next_1.node;
            break;
        }
        case RE_OP_STRING_IGNORE_REV: /* A string literal, ignoring case. */
        {
            size_t length;
            size_t available;
            RE_CODE* values;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->value_count))

            length = node->value_count;
            available = text_ptr - slice_start;
            if (length > available)
                goto backtrack;

            values = node->values;
            text_ptr -= length;
            for (i = 0; i < length; i++) {
                if (!same_char_ignore(text_ptr[i], values[i]))
                    goto backtrack;
            }
            node = node->next_1.node;
            break;
        }
        case RE_OP_STRING_REV: /* A string literal. */
        {
            size_t length;
            size_t available;
            RE_CODE* values;
            size_t i;
            TRACE(("%s %d\n", re_op_text[node->op], node->value_count))

            length = node->value_count;
            available = text_ptr - slice_start;
            if (length > available)
                goto backtrack;

            values = node->values;
            text_ptr -= length;
            for (i = 0; i < length; i++) {
                if (text_ptr[i] != values[i])
                    goto backtrack;
            }
            node = node->next_1.node;
            break;
        }
        case RE_OP_SUCCESS: /* Success. */
            /* Must the match advance past its start? */
            if (text_ptr == state->search_anchor && state->must_advance)
                goto backtrack;

            state->text_ptr = text_ptr;
            return RE_ERROR_SUCCESS;
        default: /* Illegal opcode! */
            TRACE(("UNKNOWN OP %d\n", node->op))
            return RE_ERROR_ILLEGAL;
        }

        /* Should we abort the matching? */
        ++iterations;
        if ((iterations & 0xFFFF) == 0 && safe_check_signals(state))
            return RE_ERROR_INTERRUPTED;
    }

backtrack:
    for (;;) {
        RE_BacktrackData* bt_data;

        bt_data = last_backtrack(state);

        switch (bt_data->op) {
        case RE_OP_BRANCH: /* 2-way branch. */
        {
            pop_groups(state);
            node = bt_data->branch.position.node;
            text_ptr = bt_data->branch.position.text_ptr;
            discard_backtrack(state);
            goto advance;
        }
        case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */
        {
            RE_RepeatData* rp_data;

            rp_data = &state->data[bt_data->repeat.ofs].repeat;
            rp_data->count = bt_data->repeat.count;
            rp_data->max_count = bt_data->repeat.max_count;
            if (bt_data->repeat.position.node) {
                /* Restore then advance. */
                pop_groups(state);
                node = bt_data->repeat.position.node;
                text_ptr = bt_data->repeat.position.text_ptr;
                discard_backtrack(state);
                goto advance;
            } else
                /* Restore then backtrack. */
                discard_backtrack(state);
            break;
        }
        case RE_OP_END_LAZY_REPEAT: /* End of a lazy repeat. */
        {
            /* Restore then advance. */
            RE_RepeatData* rp_data;

            pop_groups(state);
            rp_data = &state->data[bt_data->repeat.ofs].repeat;
            node = bt_data->repeat.position.node;
            text_ptr = bt_data->repeat.position.text_ptr;
            discard_backtrack(state);
            goto advance;
        }
        case RE_OP_FAILURE:
        {
            RE_CHAR* end_ptr;

            /* Do we have to advance? */
            if (!search)
                return RE_ERROR_FAILURE;

            /* Can we advance? */
            text_ptr = state->match_ptr;
            end_ptr = state->reverse ? slice_start : slice_end;
            if (text_ptr == end_ptr)
                return RE_ERROR_FAILURE;

            /* Skip over any repeated leading characters. */
            switch (start_node->op) {
            case RE_OP_GREEDY_REPEAT_ONE:
            case RE_OP_LAZY_REPEAT_ONE:
            {
                size_t available;
                size_t count;

                available = state->reverse ? text_ptr - slice_start :
                  slice_end - text_ptr;
                count = RE_COUNT_ONE(state, start_node->next_2.node, text_ptr,
                  available);
                if (count > start_node->values[2])
                    count -= start_node->values[2];
                text_ptr += (Py_ssize_t)count * step;
                break;
            }
            }

            /* Advance and try to match again. */
            state->text_ptr = text_ptr + step;
            reload_groups(state);
            goto start_match;
        }
        case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */
        {
            RE_RepeatData* rp_data;

            pop_groups(state);
            rp_data = &state->data[bt_data->repeat.ofs].repeat;
            rp_data->count = bt_data->repeat.count;
            rp_data->max_count = bt_data->repeat.max_count;
            discard_backtrack(state);
            break;
        }
        case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */
        {
            RE_RepeatData* rp_data;
            size_t count;
            Py_ssize_t step;
            RE_CHAR* ptr;
            BOOL match;

            node = bt_data->repeat.position.node;

            rp_data = &state->data[bt_data->repeat.ofs].repeat;

            /* Unmatch one character at a time until the tail could match or we
             * have reached the minimum.
             */
            text_ptr = (RE_CHAR*)rp_data->start;

            count = rp_data->count;
            step = node->step;
            ptr = text_ptr + (Py_ssize_t)count * step;

            switch (node->next_1.test->op) {
            case RE_OP_CHARACTER:
            {
                RE_CODE ch;
                BOOL m;

                ch = node->next_1.test->values[0];
                m = node->next_1.test->match;

                for (;;) {
                    --count;
                    ptr -= step;
                    match = ptr < slice_end && (ptr[0] == ch) == m;
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_CHARACTER_IGNORE:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;
                BOOL m;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);
                m = node->next_1.test->match;

                for (;;) {
                    --count;
                    ptr -= step;
                    match = ptr < slice_end && same_char_ignore_3(ptr[0], ch,
                      ch_lower, ch_upper, ch_title) == m;
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_CHARACTER_IGNORE_REV:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;
                BOOL m;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);
                m = node->next_1.test->match;

                for (;;) {
                    --count;
                    ptr -= step;
                    match = ptr > slice_start && same_char_ignore_3(ptr[-1], ch,
                      ch_lower, ch_upper, ch_title) == m;
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_CHARACTER_REV:
            {
                RE_CODE ch;
                BOOL m;

                ch = node->next_1.test->values[0];
                m = node->next_1.test->match;

                for (;;) {
                    --count;
                    ptr -= step;
                    match = ptr > slice_start && (ptr[-1] == ch) == m;
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_STRING:
            {
                RE_CODE ch;

                ch = node->next_1.test->values[0];

                for (;;) {
                    RE_Position next_position;

                    --count;
                    ptr -= step;
                    match = ptr[0] == ch && RE_TRY_MATCH(state, &node->next_1,
                      ptr, &next_position);
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_STRING_IGNORE:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);

                for (;;) {
                    RE_Position next_position;

                    --count;
                    ptr -= step;
                    match = same_char_ignore_3(ptr[0], ch,
                      ch_lower, ch_upper, ch_title) && RE_TRY_MATCH(state,
                      &node->next_1, ptr, &next_position);
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_STRING_IGNORE_REV:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);

                for (;;) {
                    RE_Position next_position;

                    --count;
                    ptr -= step;
                    match = same_char_ignore_3(ptr[-1], ch,
                      ch_lower, ch_upper, ch_title) && RE_TRY_MATCH(state,
                      &node->next_1, ptr, &next_position);
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            case RE_OP_STRING_REV:
            {
                RE_CODE ch;

                ch = node->next_1.test->values[0];

                for (;;) {
                    RE_Position next_position;

                    --count;
                    ptr -= step;
                    match = ptr[-1] == ch && RE_TRY_MATCH(state, &node->next_1,
                      ptr, &next_position);
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }
            default:
                for (;;) {
                    RE_Position next_position;

                    --count;
                    ptr -= step;
                    match = RE_TRY_MATCH(state, &node->next_1, ptr,
                      &next_position);
                    if (match)
                        break;
                    if (count == node->values[1])
                        break;
                }
                break;
            }

            if (match) {
                /* The tail could match. */
                if (count > node->values[1]) {
                    /* The match is longer than the minimum, so we might need to
                     * backtrack the repeat again to consume less.
                     */
                    rp_data->count = count;
                    reload_groups(state);
                } else {
                    /* We've reached the minimum, so we won't need to backtrack
                     * the repeat again.
                     */
                    pop_groups(state);
                    rp_data->start = bt_data->repeat.start;
                    rp_data->count = bt_data->repeat.count;
                    discard_backtrack(state);
                }

                node = node->next_1.node;
                text_ptr = ptr;
                goto advance;
            } else {
                /* We've backtracked the repeat as far as we can. */
                drop_groups(state);
                rp_data->start = bt_data->repeat.start;
                rp_data->count = bt_data->repeat.count;
                discard_backtrack(state);
            }
            break;
        }
        case RE_OP_LAZY_REPEAT: /* Lazy repeat. */
        {
            RE_RepeatData* rp_data;

            pop_groups(state);
            rp_data = &state->data[bt_data->repeat.ofs].repeat;
            rp_data->count = bt_data->repeat.count;
            rp_data->max_count = bt_data->repeat.max_count;
            discard_backtrack(state);
            break;
        }
        case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */
        {
            RE_RepeatData* rp_data;
            size_t count;
            Py_ssize_t step;
            RE_CHAR* ptr;
            size_t max_count;
            BOOL match;

            node = bt_data->repeat.position.node;

            rp_data = &state->data[bt_data->repeat.ofs].repeat;

            /* Match one character at a time until the tail could match or we
             * have reached the maximum.
             */
            text_ptr = (RE_CHAR*)rp_data->start;
            count = rp_data->count;

            step = node->step;
            ptr = text_ptr + (Py_ssize_t)count * step;
            max_count = step > 0 ? slice_end - text_ptr : text_ptr -
              slice_start;
            if (max_count > node->values[2])
                max_count = node->values[2];

            switch (node->next_1.test->op) {
            case RE_OP_CHARACTER:
            {
                RE_CODE ch;
                BOOL m;

                ch = node->next_1.test->values[0];
                m = node->next_1.test->match;

                for (;;) {
                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr < slice_end && (ptr[0] == ch) == m;
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_CHARACTER_IGNORE:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;
                BOOL m;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);
                m = node->next_1.test->match;

                for (;;) {
                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr < slice_end && same_char_ignore_3(ptr[0], ch,
                      ch_lower, ch_upper, ch_title) == m;
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_CHARACTER_IGNORE_REV:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;
                BOOL m;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);
                m = node->next_1.test->match;

                for (;;) {
                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr > slice_start && same_char_ignore_3(ptr[-1], ch,
                      ch_lower, ch_upper, ch_title) == m;
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_CHARACTER_REV:
            {
                RE_CODE ch;
                BOOL m;

                ch = node->next_1.test->values[0];
                m = node->next_1.test->match;

                for (;;) {
                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr > slice_start && (ptr[-1] == ch) == m;
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_STRING:
            {
                RE_CODE ch;

                ch = node->next_1.test->values[0];

                for (;;) {
                    RE_Position next_position;

                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr < slice_end && (ptr[0] == ch) &&
                      RE_TRY_MATCH(state, &node->next_1, ptr, &next_position);
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_STRING_IGNORE:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;

                ch = node->next_1.test->values[0];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);

                for (;;) {
                    RE_Position next_position;

                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr < slice_end && same_char_ignore_3(ptr[0], ch,
                      ch_lower, ch_upper, ch_title) && RE_TRY_MATCH(state,
                      &node->next_1, ptr, &next_position);
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_STRING_IGNORE_REV:
            {
                RE_CODE ch;
                RE_CODE ch_lower;
                RE_CODE ch_upper;
                RE_CODE ch_title;

                ch = node->next_1.test->values[node->next_1.test->value_count -
                  1];
                ch_lower = encoding->lower(ch);
                ch_upper = encoding->upper(ch);
                ch_title = encoding->title(ch);

                for (;;) {
                    RE_Position next_position;

                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr > slice_start && same_char_ignore_3(ptr[-1], ch,
                      ch_lower, ch_upper, ch_title) && RE_TRY_MATCH(state,
                      &node->next_1, ptr, &next_position);
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            case RE_OP_STRING_REV:
            {
                RE_CODE ch;

                ch = node->next_1.test->values[node->next_1.test->value_count -
                  1];

                for (;;) {
                    RE_Position next_position;

                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = ptr > slice_start && (ptr[-1] == ch) &&
                      RE_TRY_MATCH(state, &node->next_1, ptr, &next_position);
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }
            default:
                for (;;) {
                    RE_Position next_position;

                    match = RE_MATCH_ONE(state, encoding, node->next_2.node,
                      ptr);
                    if (!match)
                        break;
                    ++count;
                    ptr += step;
                    match = RE_TRY_MATCH(state, &node->next_1, ptr,
                      &next_position);
                    if (match)
                        break;
                    if (count == max_count)
                        break;
                }
                break;
            }

            if (match) {
                /* The tail could match. */
                text_ptr = ptr;

                if (count < max_count) {
                    /* The match is shorter than the maximum, so we might need
                     * to backtrack the repeat again to consume more.
                     */
                    rp_data->count = count;
                    reload_groups(state);
                } else {
                    /* We've reached the maximum, so we won't need to backtrack
                     * the repeat again.
                     */
                    pop_groups(state);
                    rp_data->start = bt_data->repeat.start;
                    rp_data->count = bt_data->repeat.count;
                    discard_backtrack(state);
                }

                node = node->next_1.node;
                goto advance;
            } else {
                /* The tail couldn't match. */
                drop_groups(state);
                rp_data->start = bt_data->repeat.start;
                rp_data->count = bt_data->repeat.count;
                discard_backtrack(state);
            }
            break;
        }
        default:
            return RE_ERROR_ILLEGAL;
        }
    }
}

/* Performs a match or search from the current text position. */
Py_LOCAL(int) RE_MATCH(RE_State* state, BOOL search) {
    size_t available;
    int status;
    TRACE(("<<RE_MATCH>>\n"))

    /* Is there enough to search? */
    available = state->reverse ? (RE_CHAR*)state->text_ptr -
      (RE_CHAR*)state->slice_start : (RE_CHAR*)state->slice_end -
      (RE_CHAR*)state->text_ptr;
    if (available < state->min_width)
        return RE_ERROR_FAILURE;

#if defined(RE_MULTITHREADED)
    /* Release the GIL. */
    if (state->is_multithreaded)
        release_GIL(state);

#endif
    /* Initialise the base context. */
    init_match(state);

    /* Perform the match. */
    status = RE_MATCH_CONTEXT(state, state->pattern->start_node, search);
    if (status == RE_ERROR_SUCCESS) {
        Py_ssize_t max_end_index;
        RE_GroupInfo* group_info;
        PatternObject* pattern;
        Py_ssize_t g;

        /* Store the results. */
        state->lastindex = -1;
        state->lastgroup = -1;
        max_end_index = -1;

        /* Store the capture groups. */
        group_info = state->pattern->group_info;
        pattern = state->pattern;
        for (g = 0; g < pattern->group_count; g++) {
            Py_ssize_t ofs;
            Py_ssize_t m;
            void* begin;
            void* end;

            ofs = group_info[g].value_offset;
            begin = state->data[ofs].group.begin;
            end = state->data[ofs].group.end;
            m = g * 2;
            state->marks[m] = begin;
            state->marks[m + 1] = end;
            TRACE(("group %d at %d from %d to %d\n", g + 1, ofs,
              text_offset(state, begin), text_offset(state, end)))
            if (begin && end && group_info[g].end_index > max_end_index) {
                max_end_index = group_info[g].end_index;
                state->lastindex = g + 1;
                if (group_info[g].has_name)
                    state->lastgroup = g + 1;
            }
        }
    }

#if defined(RE_MULTITHREADED)
    /* Re-acquire the GIL. */
    if (state->is_multithreaded)
        acquire_GIL(state);

#endif
    /* Has an error occurred which we haven't reported yet? */
    if (status < 0 && !PyErr_Occurred())
        set_error(status, NULL);

    return status;
}

#undef RE_CHAR
#undef RE_AT_BOUNDARY
#undef RE_TRY_MATCH
#undef RE_COUNT_ONE
#undef RE_MATCH_ONE
#undef RE_SEARCH_CONTEXT
#undef RE_MATCH_CONTEXT
#undef RE_MATCH

#if !defined(RE_RECURSIVE)
/* Parsed on first (non-recursive) pass only.
 *
 * This causes the recursive (second) pass.
 */

#define RE_RECURSIVE

/* Set up for Unicode. */
#define RE_CHAR Py_UNICODE
#define RE_AT_BOUNDARY uat_boundary
#define RE_TRY_MATCH utry_match
#define RE_COUNT_ONE ucount_one
#define RE_MATCH_ONE umatch_one
#define RE_SEARCH_CONTEXT usearch_context
#define RE_MATCH_CONTEXT umatch_context
#define RE_MATCH umatch

#include "_regex.c"

/* Undefine so that the remainder of the code is skipped on the first pass (it
 * will have been done on the recursive pass).
 */
#undef RE_RECURSIVE
#endif

#if defined(RE_RECURSIVE)
/* Parsed on recursive (second) pass only. */

/* Gets a string from a Python object. */
Py_LOCAL(BOOL) get_string(PyObject* string, void** characters, Py_ssize_t*
  length, Py_ssize_t* charsize) {
    /* Given a Python object, return a data pointer, a length (in characters),
     * and a character size. Return FALSE if the object is not a string (or not
     * compatible).
     */
    PyBufferProcs* buffer;
    Py_ssize_t bytes;
    Py_ssize_t size;

    /* Unicode objects do not support the buffer API. So, get the data
       directly instead. */
    if (PyUnicode_Check(string)) {
        /* Unicode strings doesn't always support the buffer interface. */
        *characters = (void*)PyUnicode_AS_DATA(string);
        *length = PyUnicode_GET_SIZE(string);
        *charsize = sizeof(Py_UNICODE);
        return TRUE;
    }

    /* Get pointer to string buffer. */
    buffer = string->ob_type->tp_as_buffer;
    if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
      buffer->bf_getsegcount(string, NULL) != 1) {
        PyErr_SetString(PyExc_TypeError, "expected string or buffer");
        return FALSE;
    }

    /* Determine buffer size. */
    bytes = buffer->bf_getreadbuffer(string, 0, characters);

    if (bytes < 0) {
        PyErr_SetString(PyExc_TypeError, "buffer has negative size");
        return FALSE;
    }

    /* Determine character size. */
    size = PyObject_Size(string);

    if (PyString_Check(string) || bytes == size)
        *charsize = 1;
    else if (bytes == (Py_ssize_t)(size * sizeof(Py_UNICODE)))
        *charsize = sizeof(Py_UNICODE);
    else {
        PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
        return FALSE;
    }

    *length = size;

    return TRUE;
}

/* Initialises a state object. */
Py_LOCAL(BOOL) state_init_2(RE_State* state, PatternObject* pattern, PyObject*
  string, void* characters, Py_ssize_t length, Py_ssize_t charsize, Py_ssize_t
  start, Py_ssize_t end, BOOL overlapped) {
    BOOL ascii;
    BOOL locale;
    BOOL unicode;

    state->data = NULL;
    state->saved_groups = NULL;
    state->marks = NULL;
    state->backtrack_block.previous = NULL;
    state->backtrack_block.next = NULL;
    state->backtrack_block.capacity = RE_BACKTRACK_BLOCK_SIZE;

    /* The marks for the capture groups. */
    if (pattern->group_count) {
        Py_ssize_t threshold;

        state->marks = (void**)re_alloc(pattern->group_count * 2 *
          sizeof(void*));
        if (!state->marks)
            goto error;
        state->saved_groups_capacity = 16;
        threshold = pattern->group_count * 16;
        while (state->saved_groups_capacity < threshold)
            state->saved_groups_capacity *= 2;
        state->saved_groups = (RE_Data*)re_alloc(state->saved_groups_capacity *
          sizeof(RE_Data));
        if (!state->saved_groups)
            goto error;
    } else
        state->saved_groups_capacity = 0;

    /* Adjust boundaries. */
    if (start < 0)
        start += length;
    if (start < 0)
        start = 0;
    else if (start > length)
        start = length;

    if (end < 0)
        end += length;
    if (end < 0)
        end = 0;
    else if (end > length)
        end = length;

    state->overlapped = overlapped;
    state->min_width = pattern->min_width;

    state->charsize = charsize;

    /* Initialise the character encoding. */
    unicode = pattern->flags & RE_FLAG_UNICODE;
    locale = pattern->flags & RE_FLAG_LOCALE;
    ascii = pattern->flags & RE_FLAG_ASCII;
    if (!unicode && !locale && !ascii) {
        if (charsize == 1)
            ascii = RE_FLAG_ASCII;
        else
            unicode = RE_FLAG_UNICODE;
    }
    if (unicode)
        state->encoding = &unicode_encoding;
    else if (locale)
        state->encoding = &locale_encoding;
    else if (ascii)
        state->encoding = &ascii_encoding;

    /* The state object contains a reference to the string and also a pointer to
     * its contents.
     *
     * The documentation says that the end of the slice behaves like the end of
     * the string.
     */
    state->text_start = characters;
    state->text_end = (void*)((char*)characters + end * state->charsize);

    state->reverse = pattern->flags & RE_FLAG_REVERSE;

    state->slice_start = (void*)((char*)characters + start * state->charsize);
    state->slice_end = state->text_end;
    state->text_ptr = state->reverse ? state->slice_end : state->slice_start;

    /* Point to the final newline if it's at the end of the string, otherwise
     * just the end of the string.
     */
    state->final_newline = NULL;
    if (state->text_start < state->text_end) {
        char* final_ptr;
        RE_CODE final_char;

        final_ptr = (char*)state->text_end - charsize;
        if (charsize == 1)
            final_char = *(unsigned char*)final_ptr;
        else
            final_char = *(Py_UNICODE*)final_ptr;
        if (final_char == '\n')
            state->final_newline = final_ptr;
    }

    state->zero_width = (pattern->flags & RE_FLAG_ZEROWIDTH) != 0;
    state->must_advance = FALSE;

    state->pattern = pattern;
    state->string = string;

    if (pattern->data_count > 0)
        state->data = re_alloc(pattern->data_count * sizeof(RE_Data));

    Py_INCREF(state->pattern);
    Py_INCREF(state->string);

#if defined(RE_MULTITHREADED)
    /* Multithreading is possible during matching on immutable strings. */
    state->is_multithreaded = PyUnicode_Check(string) ||
      PyString_Check(string);

#endif
    return TRUE;

error:
    re_dealloc(state->data);
    re_dealloc(state->saved_groups);
    re_dealloc(state->marks);
    state->data = NULL;
    state->saved_groups = NULL;
    state->marks = NULL;
    return FALSE;
}

/* Initialises a state object. */
Py_LOCAL(BOOL) state_init(RE_State* state, PatternObject* pattern, PyObject*
  string, Py_ssize_t start, Py_ssize_t end, BOOL overlapped) {
    void* characters;
    Py_ssize_t length;
    Py_ssize_t charsize;

    /* Get the string to search or match. */
    if (!get_string(string, &characters, &length, &charsize))
        return FALSE;

    return state_init_2(state, pattern, string, characters, length, charsize,
      start, end, overlapped);
}

/* Finalises a state objects, discarding its contents. */
Py_LOCAL(void) state_fini(RE_State* state) {
    RE_BacktrackBlock* current;

    re_dealloc(state->marks);

    /* Deallocate the backtrack blocks. */
    current = state->backtrack_block.next;
    while (current) {
        RE_BacktrackBlock* next;

        next = current->next;
        re_dealloc(current);
        current = next;
    }

    re_dealloc(state->data);
    re_dealloc(state->saved_groups);

    Py_DECREF(state->pattern);
    Py_DECREF(state->string);
}

Py_LOCAL(long) as_string_index(PyObject* obj, long def) {
    if (PyInt_Check(obj))
        return PyInt_AsLong(obj);
    else if (PyLong_Check(obj))
        return PyLong_AsLong(obj);
    else if (obj == Py_None)
        return def;

    set_error(RE_ERROR_INDEX, NULL);
    return 0;
}

/* Deallocates a MatchObject. */
static void match_dealloc(MatchObject* self) {
    Py_XDECREF(self->string);
    Py_DECREF(self->pattern);
    re_dealloc(self->marks);
    Py_XDECREF(self->regs);
    PyObject_DEL(self);
}

/* Gets a MatchObject's group by integer index. */
Py_LOCAL(PyObject*) match_get_group_by_index(MatchObject* self, long
  index, PyObject* def) {
    Py_ssize_t m;

    if (index < 0 || index > self->group_count) {
        /* Raise error if we were given a bad group number. */
        set_error(RE_ERROR_NO_SUCH_GROUP, NULL);
        return NULL;
    }

    if (index == 0)
        return PySequence_GetSlice(self->string, self->pos, self->endpos);

    m = index * 2 - 2;

    if (self->string == Py_None || self->marks[m] < 0 || self->marks[m + 1] < 0)
      {
        /* Return default value if the string or group is undefined. */
        Py_INCREF(def);
        return def;
    }

    return PySequence_GetSlice(self->string, self->marks[m], self->marks[m +
      1]);
}

/* Gets a MatchObject's group index.
 *
 * The supplied index can be an integer or a string (group name) object.
 */
Py_LOCAL(long) match_get_group_index(MatchObject* self, PyObject* index,
  BOOL allow_neg) {
    long group = -1;

    /* Is the index an integer? */
    if (PyInt_Check(index)) {
        group = PyInt_AsLong(index);
        if (PyErr_Occurred())
            goto error;

        /* Adjust negative indices where valid and allowed. */
        if (allow_neg && -self->group_count <= group && group <= -1)
            group += self->group_count + 1;
    } else if (PyLong_Check(index)) {
        group = PyLong_AsLong(index);
        if (PyErr_Occurred())
            goto error;

        /* Adjust negative indices where valid and allowed. */
        if (allow_neg && -self->group_count <= group && group <= -1)
            group += self->group_count + 1;
    } else {
        /* The index might be a group name. */
        if (self->pattern->groupindex) {
            /* Look up the name. */
            index = PyObject_GetItem(self->pattern->groupindex, index);
            if (!index)
                goto error;

            /* Check that we have an integer. */
            if (PyInt_Check(index))
                group = PyInt_AsLong(index);
            else if (PyLong_Check(index))
                group = PyLong_AsLong(index);
            else
                goto error;

            if (PyErr_Occurred())
                goto error;

            Py_DECREF(index);
        }
    }

    return group;

error:
    PyErr_Clear();
    return -1;
}

/* Gets a MatchObject's group by object index. */
Py_LOCAL(PyObject*) match_get_group(MatchObject* self, PyObject* index,
  PyObject* def, BOOL allow_neg) {
    /* Check that the index is an integer or a string. */
    if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || PyString_Check(index))
        return match_get_group_by_index(self, match_get_group_index(self, index,
          allow_neg), def);

    set_error(RE_ERROR_GROUP_INDEX_TYPE, index);
    return NULL;
}

/* MatchObject's 'group' method. */
static PyObject* match_group(MatchObject* self, PyObject* args) {
    Py_ssize_t size;
    PyObject* result;
    Py_ssize_t i;

    size = PyTuple_GET_SIZE(args);

    switch (size) {
    case 0:
        /* group() */
        result = match_get_group_by_index(self, 0, Py_None);
        break;
    case 1:
        /* group(x) */
        result = match_get_group(self, PyTuple_GET_ITEM(args, 0), Py_None,
          FALSE);
        break;
    default:
        /* group(x, y, z, ...) */
        /* Fetch multiple items. */
        result = PyTuple_New(size);
        if (!result)
            return NULL;
        for (i = 0; i < size; i++) {
            PyObject* item = match_get_group(self, PyTuple_GET_ITEM(args, i),
              Py_None, FALSE);
            if (!item) {
                Py_DECREF(result);
                return NULL;
            }
            PyTuple_SET_ITEM(result, i, item);
        }
        break;
    }
    return result;
}

/* MatchObject's 'start' method. */
static PyObject* match_start(MatchObject* self, PyObject* args) {
    long index;

    PyObject* index_ = Py_False; /* Default index is 0. */
    if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
        return NULL;

    index = match_get_group_index(self, index_, FALSE);
    if (index < 0 || index > self->group_count) {
        set_error(RE_ERROR_NO_SUCH_GROUP, NULL);
        return NULL;
    }

    if (index == 0)
        return Py_BuildValue("i", self->pos);

    /* A mark is -1 if the group is undefined. */
    return Py_BuildValue("i", self->marks[index * 2 - 2]);
}

/* MatchObject's 'end' method. */
static PyObject* match_end(MatchObject* self, PyObject* args) {
    long index;

    PyObject* index_ = Py_False; /* Default index is 0. */
    if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
        return NULL;

    index = match_get_group_index(self, index_, FALSE);
    if (index < 0 || index > self->group_count) {
        set_error(RE_ERROR_NO_SUCH_GROUP, NULL);
        return NULL;
    }

    if (index == 0)
        return Py_BuildValue("i", self->endpos);

    /* A mark is -1 if the group is undefined. */
    return Py_BuildValue("i", self->marks[index * 2 - 1]);
}

/* Creates an integer next (2-tuple). */
Py_LOCAL(PyObject*) _pair(Py_ssize_t i1, Py_ssize_t i2) {
    PyObject* next;
    PyObject* item;

    next = PyTuple_New(2);
    if (!next)
        return NULL;

    item = PyInt_FromSsize_t(i1);
    if (!item)
        goto error;
    PyTuple_SET_ITEM(next, 0, item);

    item = PyInt_FromSsize_t(i2);
    if (!item)
        goto error;
    PyTuple_SET_ITEM(next, 1, item);

    return next;

error:
    Py_DECREF(next);
    return NULL;
}

/* MatchObject's 'span' method. */
static PyObject* match_span(MatchObject* self, PyObject* args) {
    long index;

    PyObject* index_ = Py_False; /* Default index is 0. */
    if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
        return NULL;

    index = match_get_group_index(self, index_, FALSE);
    if (index < 0 || index > self->group_count) {
        set_error(RE_ERROR_NO_SUCH_GROUP, NULL);
        return NULL;
    }

    if (index == 0)
        return _pair(self->pos, self->endpos);

    /* Marks are -1 if group is undefined. */
    return _pair(self->marks[index * 2 - 2], self->marks[index * 2 - 1]);
}

/* MatchObject's 'groups' method. */
static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* kw) {
    PyObject* result;
    long index;

    PyObject* def = Py_None;
    static char* kwlist[] = { "default", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
        return NULL;

    result = PyTuple_New(self->group_count);
    if (!result)
        return NULL;

    /* Group 0 is the entire matched portion of the string. */
    for (index = 0; index < self->group_count; index++) {
        PyObject* item;
        item = match_get_group_by_index(self, index + 1, def);
        if (!item) {
            Py_DECREF(result);
            return NULL;
        }
        PyTuple_SET_ITEM(result, index, item);
    }

    return result;
}

/* MatchObject's 'groupdict' method. */
static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject*
  kw) {
    PyObject* result;
    PyObject* keys;
    Py_ssize_t index;

    PyObject* def = Py_None;
    static char* kwlist[] = { "default", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
        return NULL;

    result = PyDict_New();
    if (!result || !self->pattern->groupindex)
        return result;

    keys = PyMapping_Keys(self->pattern->groupindex);
    if (!keys)
        goto failed;

    for (index = 0; index < PyList_GET_SIZE(keys); index++) {
        int status;
        PyObject* key;
        PyObject* value;
        key = PyList_GET_ITEM(keys, index);
        if (!key)
            goto failed;
        value = match_get_group(self, key, def, FALSE);
        if (!value) {
            Py_DECREF(key);
            goto failed;
        }
        status = PyDict_SetItem(result, key, value);
        Py_DECREF(value);
        if (status < 0)
            goto failed;
    }

    Py_DECREF(keys);

    return result;

failed:
    Py_XDECREF(keys);
    Py_DECREF(result);
    return NULL;
}

/* Calls a function in the re module. */
Py_LOCAL(PyObject*) call(char* module, char* function, PyObject* args) {
    PyObject* name;
    PyObject* mod;
    PyObject* func;
    PyObject* result;

    if (!args)
        return NULL;
    name = PyString_FromString(module);
    if (!name)
        return NULL;
    mod = PyImport_Import(name);
    Py_DECREF(name);
    if (!mod)
        return NULL;
    func = PyObject_GetAttrString(mod, function);
    Py_DECREF(mod);
    if (!func)
        return NULL;
    result = PyObject_CallObject(func, args);
    Py_DECREF(func);
    Py_DECREF(args);
    return result;
}

/* Gets a replacement item from the replacement list.
 *
 * The replacement item could be a string literal or a group.
 */
Py_LOCAL(PyObject*) get_match_replacement(MatchObject* self, PyObject* item,
  PyObject* string, Py_ssize_t group_count) {
    long group;

    if (PyUnicode_Check(item) || PyString_Check(item)) {
        /* It's a literal, which can be added directly to the list. */
        Py_INCREF(item);
        return item;
    }

    /* Is it a group reference? */
    if (PyInt_Check(item))
        group = PyInt_AsLong(item);
    else if (PyLong_Check(item))
        group = PyLong_AsLong(item);
    else {
        /* Not a group either! */
        set_error(RE_ERROR_REPLACEMENT, NULL);
        return NULL;
    }

    if (PyErr_Occurred()) {
        set_error(RE_ERROR_REPLACEMENT, NULL);
        return NULL;
    }

    if (group == 0) {
        /* The entire matched portion of the string. */
        return PySequence_GetSlice(string, self->pos, self->endpos);
    } else if (group >= 1 && group <= group_count) {
        /* A group. If it didn't match then return None instead. */
        Py_ssize_t m = group * 2 - 2;
        if (self->marks[m] >= 0)
            return PySequence_GetSlice(string, self->marks[m], self->marks[m +
              1]);
        else {
            Py_INCREF(Py_None);
            return Py_None;
        }
    } else {
        /* No such group. */
        set_error(RE_ERROR_NO_SUCH_GROUP, NULL);
        return NULL;
    }
}

/* Joins together a list of strings. */
Py_LOCAL(PyObject*) join_list(PyObject* list, PyObject* string) {
    /* Join list elements. */
    PyObject* joiner;
    PyObject* function;
    PyObject* args;
    PyObject* result;

    if (PyList_GET_SIZE(list) == 1) {
        result = PyList_GET_ITEM(list, 0);
        if (PyObject_Type(result) == PyObject_Type(string)) {
            Py_INCREF(result);
            Py_DECREF(list);
            return result;
        }
    }

    joiner = PySequence_GetSlice(string, 0, 0);
    if (!joiner) {
        Py_DECREF(list);
        return NULL;
    }

    if (PyList_GET_SIZE(list) == 0) {
        Py_DECREF(list);
        return joiner;
    }

    function = PyObject_GetAttrString(joiner, "join");
    if (!function) {
        Py_DECREF(joiner);
        return NULL;
    }
    args = PyTuple_New(1);
    if (!args) {
        Py_DECREF(function);
        Py_DECREF(joiner);
        return NULL;
    }
    PyTuple_SET_ITEM(args, 0, list);
    result = PyObject_CallObject(function, args);
    Py_DECREF(args); /* Also removes list. */
    Py_DECREF(function);
    Py_DECREF(joiner);

    return result;
}

/* Adds an item to be joined. */
Py_LOCAL(int) add_item(JoinInfo* join_info, PyObject* item) {
    int status;

    /* If the list already exists then just add the item to it. */
    if (join_info->list) {
        status = PyList_Append(join_info->list, item);
        if (status < 0)
            goto error;

        return status;
    }

    /* If we already have an item then we now have 2(!) and need to put them
     * into a list.
     */
    if (join_info->item) {
        join_info->list = PyList_New(0);
        if (!join_info->list) {
            status = RE_ERROR_MEMORY;
            goto error;
        }

        status = PyList_Append(join_info->list, join_info->item);
        if (status < 0)
            goto error;

        status = PyList_Append(join_info->list, item);
        if (status < 0)
            goto error;

        return 0;
    }

    /* This is the first item. */
    join_info->item = item;
    Py_INCREF(join_info->item);

    return 0;

error:
    if (join_info->list)
        Py_DECREF(join_info->list);
    if (join_info->item)
        Py_DECREF(join_info->item);
    return status;
}

/* Joins together a list of strings for pattern_subx. */
Py_LOCAL(PyObject*) join_list_info(JoinInfo* join_info, PyObject* string) {
    /* If the list already exists then just do the join. */
    if (join_info->list) {
        Py_DECREF(join_info->item);
        return join_list(join_info->list, string);
    }

    /* If we have only 1 item then we _might_ be able to just return it. */
    if (join_info->item) {
        int status;

        /* We can return the single item only if it's the same type of string as
         * the joiner.
         */
        if (PyObject_Type(join_info->item) == PyObject_Type(string))
            return join_info->item;

        /* We'll default to the normal joining method, which requires the item
         * to be in a list.
         */
        join_info->list = PyList_New(0);
        if (!join_info->list) {
            Py_DECREF(join_info->item);
            return NULL;
        }

        status = PyList_Append(join_info->list, join_info->item);
        if (status < 0) {
            Py_DECREF(join_info->list);
            Py_DECREF(join_info->item);
            return NULL;
        }

        Py_DECREF(join_info->item);
        return join_list(join_info->list, string);
    }

    /* There are no items, so return an empty string. */
    return PySequence_GetSlice(string, 0, 0);
}

/* MatchObject's 'expand' method. */
static PyObject* match_expand(MatchObject* self, PyObject* str_template) {
    PyObject* replacement;
    JoinInfo join_info;
    Py_ssize_t size;
    Py_ssize_t i;

    /* Hand the template to the template compiler. */
    replacement = call(RE_MODULE, "_compile_replacement", PyTuple_Pack(2,
      self->pattern, str_template));
    if (!replacement)
        return NULL;

    join_info.list = NULL;
    join_info.item = NULL;

    /* Add each part of the template to the list. */
    size = PyList_GET_SIZE(replacement);
    for (i = 0; i < size; i++) {
        PyObject* item;
        PyObject* str_item;

        item = PyList_GET_ITEM(replacement, i);
        str_item = get_match_replacement(self, item, self->string,
          self->group_count);
        if (!str_item)
            goto error;

        /* Add to the list. */
        if (str_item == Py_None)
            Py_DECREF(str_item);
        else {
            int status;

            status = add_item(&join_info, str_item);
            Py_DECREF(str_item);
            if (status < 0) {
                set_error(status, NULL);
                goto error;
            }
        }
    }

    Py_DECREF(replacement);

    /* Convert the list to a single string (also cleans up join_info). */
    return join_list_info(&join_info, self->string);

error:
    if (join_info.list)
        Py_DECREF(join_info.list);
    if (join_info.item)
        Py_DECREF(join_info.item);
    Py_DECREF(replacement);
    return NULL;
}

/* MatchObject's 'copy' method. */
static PyObject* match_copy(MatchObject* self, PyObject *unused) {
    PyErr_SetString(PyExc_TypeError, "cannot copy this MatchObject");
    return NULL;
}

/* MatchObject's 'deepcopy' method. */
static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) {
    PyErr_SetString(PyExc_TypeError, "cannot deepcopy this MatchObject");
    return NULL;
}

/* MatchObject's 'regs' method. */
static PyObject* match_regs(MatchObject* self) {
    PyObject* regs;
    PyObject* item;
    Py_ssize_t index;

    regs = PyTuple_New(self->group_count + 1);
    if (!regs)
        return NULL;

    item = _pair(self->pos, self->endpos);
    if (!item) {
        Py_DECREF(regs);
        return NULL;
    }
    PyTuple_SET_ITEM(regs, 0, item);

    for (index = 0; index < self->group_count; index++) {
        item = _pair(self->marks[index * 2], self->marks[index * 2 + 1]);
        if (!item) {
            Py_DECREF(regs);
            return NULL;
        }
        PyTuple_SET_ITEM(regs, index + 1, item);
    }

    Py_INCREF(regs);
    self->regs = regs;

    return regs;
}

/* MatchObject's slice method. */
Py_LOCAL(PyObject*) match_get_group_slice(MatchObject* self, PySliceObject*
  slice) {
    Py_ssize_t start;
    Py_ssize_t end;
    Py_ssize_t step;
    Py_ssize_t slice_length;

    if (PySlice_GetIndicesEx(slice, self->group_count + 1, &start, &end, &step,
      &slice_length) < 0) {
        return NULL;
    }

    if (slice_length <= 0)
        return PyTuple_New(0);
    else {
        PyObject* result;
        long cur;
        Py_ssize_t i;

        result = PyTuple_New(slice_length);
        if (!result)
            return NULL;

        cur = start;
        for (i = 0; i < slice_length; i++) {
            PyTuple_SetItem(result, i, match_get_group_by_index(self, cur,
              Py_None));
            cur += step;
        }

        return result;
    }
}

/* MatchObject's length method. */
static Py_ssize_t match_length(MatchObject* self) {
    return self->group_count + 1;
}

/* MatchObject's subscript method. */
static PyObject* match_subscript(MatchObject* self, PyObject* item) {
    if (PySlice_Check(item))
        return match_get_group_slice(self, (PySliceObject*)item);

    return match_get_group(self, item, Py_None, TRUE);
}

/* The documentation of a MatchObject. */
PyDoc_STRVAR(match_group_doc,
    "group([group1, ...]) --> string or tuple of strings.\n\
    Return one or more subgroups of the match.  If there is a single argument,\n\
    the result is a single string; if there are multiple arguments, the result\n\
    is a tuple with one item per argument; if there are no arguments, the whole\n\
    match is returned. Group 0 is the whole match.");

PyDoc_STRVAR(match_start_doc,
    "start([group]) --> int.\n\
    Return the index of the start of a subgroup of the match.  Defaults to group\n\
    0 which is the whole match.  Return -1 if the group exists but did not\n\
    contribute to the match.");

PyDoc_STRVAR(match_end_doc,
    "end([group]) --> int.\n\
    Return the index of the start of a subgroup of the match.  Defaults to group\n\
    0 which is the whole match.  Return -1 if the group exists but did not\n\
    contribute to the match.");

PyDoc_STRVAR(match_span_doc,
    "span([group]) --> 2-tuple of int.\n\
    Return the 2-tuple of the indices of the start and end of a subgroup of the\n\
    match.  If a group did not contribute to the match, this is (-1, -1).\n\
    Defaults to group 0 which is the entire match.");

PyDoc_STRVAR(match_groups_doc,
    "groups(default=None) --> tuple of strings.\n\
    Return a tuple containing all the subgroups of the match.  The argument is\n\
    the default for groups that did not participate in the match.");

PyDoc_STRVAR(match_groupdict_doc,
    "groupdict(default=None) --> dict.\n\
    Return a dictionary containing all the named subgroups of the match, keyed\n\
    by the subgroup name.  The argument is groups that did not participate in\n\
    the match.");

PyDoc_STRVAR(match_expand_doc,
    "expand(template) --> string.\n\
    Return the string obtained by doing backslash substitution on the template,\n\
    as done by the sub() method.");

/* MatchObject's methods. */
static PyMethodDef match_methods[] = {
    {"group", (PyCFunction)match_group, METH_VARARGS, match_group_doc},
    {"start", (PyCFunction)match_start, METH_VARARGS, match_start_doc},
    {"end", (PyCFunction)match_end, METH_VARARGS, match_end_doc},
    {"span", (PyCFunction)match_span, METH_VARARGS, match_span_doc},
    {"groups", (PyCFunction)match_groups, METH_VARARGS|METH_KEYWORDS,
      match_groups_doc},
    {"groupdict", (PyCFunction)match_groupdict, METH_VARARGS|METH_KEYWORDS,
      match_groupdict_doc},
    {"expand", (PyCFunction)match_expand, METH_O, match_expand_doc},
    {"__copy__", (PyCFunction)match_copy, METH_NOARGS},
    {"__deepcopy__", (PyCFunction)match_deepcopy, METH_O},
    {"__getitem__", (PyCFunction)match_subscript, METH_O|METH_COEXIST},
    {NULL, NULL}
};

/* Gets an attribute of a MatchObject. */
static PyObject* match_getattr(MatchObject* self, char* name) {
    PyObject* res;

    res = Py_FindMethod(match_methods, (PyObject*)self, name);
    if (res)
        return res;

    PyErr_Clear();

    if (!strcmp(name, "lastindex")) {
        if (self->lastindex >= 0)
            return Py_BuildValue("i", self->lastindex);

        Py_INCREF(Py_None);
        return Py_None;
    }

    if (!strcmp(name, "lastgroup")) {
        if (self->pattern->indexgroup && self->lastgroup >= 0) {
            PyObject* index = Py_BuildValue("i", self->lastgroup);
            PyObject* result = PyDict_GetItem(self->pattern->indexgroup, index);
            Py_DECREF(index);
            if (result)
                return result;
            PyErr_Clear();
        }

        Py_INCREF(Py_None);
        return Py_None;
    }

    if (!strcmp(name, "regs")) {
        if (self->regs) {
            Py_INCREF(self->regs);
            return self->regs;
        } else
            return match_regs(self);
    }

    if (!strcmp(name, "string")) {
        if (self->string) {
            Py_INCREF(self->string);
            return self->string;
        } else {
            Py_INCREF(Py_None);
            return Py_None;
        }
    }

    if (!strcmp(name, "re")) {
        Py_INCREF(self->pattern);
        return (PyObject*)self->pattern;
    }

    if (!strcmp(name, "pos"))
        return Py_BuildValue("i", self->pos);

    if (!strcmp(name, "endpos"))
        return Py_BuildValue("i", self->endpos);

    PyErr_SetString(PyExc_AttributeError, name);
    return NULL;
}

static PyMappingMethods match_as_mapping = {
    (lenfunc)match_length,       /* mp_length */
    (binaryfunc)match_subscript, /* mp_subscript */
    0,                           /* mp_ass_subscript */
};

/* FIXME: implement setattr("string", None) as a special case (to detach the
 * associated string, if any.
 */
static PyTypeObject Match_Type = {
    PyObject_HEAD_INIT(NULL)
    0,
    "_" RE_MODULE "." RE_MODULE_UPPER "_Match",
    sizeof(MatchObject), 0,
    (destructor)match_dealloc,  /* tp_dealloc */
    0,                          /* tp_print */
    (getattrfunc)match_getattr, /* tp_getattr */
    0,                          /* tp_setattr */
    0,                          /* tp_compare */
    0,                          /* tp_repr */
    0,                          /* tp_as_number */
    0,                          /* tp_as_sequence */
    &match_as_mapping,          /* tp_as_mapping */
    0,                          /* tp_hash */
    0,                          /* tp_call */
    0,                          /* tp_str */
    0,                          /* tp_getattro */
    0,                          /* tp_setattro */
    0,                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT,         /* tp_flags */
    0,                          /* tp_doc */
    0,                          /* tp_traverse */
    0,                          /* tp_clear */
    0,                          /* tp_richcompare */
    0,                          /* tp_weaklistoffset */
    0,                          /* tp_iter */
    0,                          /* tp_iternext */
    match_methods,              /* tp_methods */
};

/* Creates a new MatchObject. */
Py_LOCAL(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* state,
  int status) {
    /* Create MatchObject (from state object). */
    if (status > 0) {
        MatchObject* match;
        Py_ssize_t g;

        /* Create MatchObject. */
        match = PyObject_NEW(MatchObject, &Match_Type);
        if (!match)
            return NULL;

        match->string = state->string;
        match->pattern = pattern;
        match->regs = NULL;
        Py_INCREF(match->string);
        Py_INCREF(match->pattern);

        match->marks = (Py_ssize_t*)re_alloc(pattern->group_count * 2 *
          sizeof(Py_ssize_t));
        if (!match->marks) {
            Py_DECREF(match);
            return NULL;
        }

        match->regs = NULL;
        match->group_count = pattern->group_count;

        if (state->reverse) {
            match->pos = text_offset(state, state->text_ptr);
            match->endpos = text_offset(state, state->match_ptr);
        } else {
            match->pos = text_offset(state, state->match_ptr);
            match->endpos = text_offset(state, state->text_ptr);
        }

        /* Fill in group slices. */
        for (g = 0; g < pattern->group_count; g++) {
            Py_ssize_t m = g * 2;
            Py_ssize_t start = text_offset(state, state->marks[m]);
            Py_ssize_t end = text_offset(state, state->marks[m + 1]);
            if (0 <= start && start <= end) {
                match->marks[m] = start;
                match->marks[m + 1] = end;
            } else {
                match->marks[m] = -1;
                match->marks[m + 1] = -1;
            }
        }

        match->lastindex = state->lastindex;
        match->lastgroup = state->lastgroup;

        return (PyObject*)match;
    } else if (status == 0) {
        /* No match. */
        Py_INCREF(Py_None);
        return Py_None;
    }

    /* Internal error. */
    set_error(status, NULL);
    return NULL;
}

/* Gets the text of a capture group from a state. */
Py_LOCAL(PyObject*) state_get_group(RE_State* state, Py_ssize_t index, PyObject*
  string, BOOL empty) {
    Py_ssize_t m;
    Py_ssize_t start;
    Py_ssize_t end;

    m = index * 2 - 2;
    if (string != Py_None && index >= 1 && index <= state->pattern->group_count
      && state->marks[m] && state->marks[m + 1]) {
        start = text_offset(state, state->marks[m]);
        end = text_offset(state, state->marks[m + 1]);
    } else {
        if (empty)
            /* Want an empty string. */
            start = end = 0;
        else {
            Py_INCREF(Py_None);
            return Py_None;
        }
    }

    return PySequence_GetSlice(string, start, end);
}

/* ScannerObject's 'match' method. */
static PyObject* scanner_match(ScannerObject* self, PyObject* unused) {
    RE_State* state;
    int status;
    PyObject* match;

    state = &self->state;

    if (state->charsize == 1)
        status = bmatch(state, FALSE);
    else
        status = umatch(state, FALSE);

    if (PyErr_Occurred())
        return NULL;

    match = pattern_new_match(self->pattern, state, status);

    /* Continue from where we left off, but don't allow a contiguous
     * zero-width match.
     */
    state->must_advance = state->text_ptr == state->match_ptr;

    return match;
}

/* ScannerObject's 'search' method. */
static PyObject* scanner_search(ScannerObject* self, PyObject *unused) {
    RE_State* state;
    int status;
    PyObject* match;

    state = &self->state;

    if (state->charsize == 1)
        status = bmatch(state, TRUE);
    else
        status = umatch(state, TRUE);

    if (PyErr_Occurred())
        return NULL;

    match = pattern_new_match(self->pattern, state, status);

    if (state->overlapped) {
        /* Advance one character. */
        Py_ssize_t step;

        step = state->reverse ? -1 : 1;
        state->text_ptr = (void*)((char*)state->search_anchor + state->charsize
          * step);
        state->must_advance = FALSE;
    } else
        /* Continue from where we left off, but don't allow a contiguous
         * zero-width match.
         */
        state->must_advance = state->text_ptr == state->match_ptr;

    return match;
}

/* ScannerObject's methods. */
static PyMethodDef scanner_methods[] = {
    {"match", (PyCFunction)scanner_match, METH_NOARGS},
    {"search", (PyCFunction)scanner_search, METH_NOARGS},
    {NULL, NULL}
};

/* Deallocates a ScannerObject. */
static void scanner_dealloc(ScannerObject* self) {
    state_fini(&self->state);
    Py_DECREF(self->pattern);
    PyObject_DEL(self);
}

/* Gets an attribute of a ScannerObject. */
static PyObject* scanner_getattr(ScannerObject* self, char* name) {
    PyObject* res;

    res = Py_FindMethod(scanner_methods, (PyObject*)self, name);
    if (res)
        return res;

    PyErr_Clear();

    if (!strcmp(name, "pattern")) {
        Py_INCREF(self->pattern);
        return (PyObject*)self->pattern;
    }

    PyErr_SetString(PyExc_AttributeError, name);
    return NULL;
}

static PyTypeObject Scanner_Type = {
    PyObject_HEAD_INIT(NULL)
    0,
    "_" RE_MODULE "." RE_MODULE_UPPER "_Scanner",
    sizeof(ScannerObject), 0,
    (destructor)scanner_dealloc,  /* tp_dealloc */
    0,                            /* tp_print */
    (getattrfunc)scanner_getattr, /* tp_getattr */
};

/* Creates a new ScannerObject. */
static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args,
  PyObject* kw) {
    /* Create search state object. */
    ScannerObject* self;
    Py_ssize_t start;
    Py_ssize_t end;

    PyObject* string;
    PyObject* pos = Py_None;
    PyObject* endpos = Py_None;
    Py_ssize_t overlapped = FALSE;
    static char* kwlist[] = { "string", "pos", "endpos", "overlapped", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|OOn:scanner", kwlist, &string,
      &pos, &endpos, &overlapped))
        return NULL;

    start = as_string_index(pos, 0);
    end = as_string_index(endpos, PY_SSIZE_T_MAX);
    if (PyErr_Occurred())
        return NULL;

    /* Create a scanner object. */
    self = PyObject_NEW(ScannerObject, &Scanner_Type);
    if (!self)
        return NULL;

    self->pattern = pattern;
    Py_INCREF(self->pattern);

    if (!state_init(&self->state, pattern, string, start, end, overlapped)) {
        PyObject_DEL(self);
        return NULL;
    }

    return (PyObject*) self;
}

/* SplitterObject's 'split' method. */
static PyObject* splitter_split(SplitterObject* self, PyObject *unused) {
    RE_State* state;
    PyObject* result;
    int status;

    if (self->finished) {
        result = Py_False;
        Py_INCREF(result);
        return result;
    }

    state = &self->state;

    if (self->index == 0) {
        if (self->split_count < self->maxsplit) {
            Py_ssize_t step;
            void* end_ptr;

            if (state->reverse) {
                step = -1;
                end_ptr = state->slice_start;
            } else {
                step = 1;
                end_ptr = state->slice_end;
            }

retry:
            if (state->charsize == 1)
                status = bmatch(state, TRUE);
            else
                status = umatch(state, TRUE);

            if (PyErr_Occurred())
                goto error;

            if (status == RE_ERROR_SUCCESS) {
                if (!state->zero_width) {
                    /* The current behaviour is to advance one character if the
                     * split was zero-width. Unfortunately, this can give an
                     * incorrect result. GvR wants this behaviour to be retained
                     * so as not to break any existing software which might rely
                     * on it. The correct behaviour is enabled by setting the
                     * 'zero_width' flag.
                     */
                     if (state->text_ptr == state->match_ptr) {
                         if (self->last == end_ptr)
                             goto no_match;

                         /* Advance one character. */
                         state->text_ptr = (char*)state->text_ptr +
                           state->charsize * step;
                         state->must_advance = FALSE;
                         goto retry;
                     }
                }

                ++self->split_count;

                /* Get segment before this match. */
                if (state->reverse)
                    result = PySequence_GetSlice(state->string,
                      text_offset(state, state->match_ptr), text_offset(state,
                      self->last));
                else
                    result = PySequence_GetSlice(state->string,
                      text_offset(state, self->last), text_offset(state,
                      state->match_ptr));
                if (!result)
                    goto error;

                self->last = state->text_ptr;

                /* The correct behaviour is to reject a zero-width match just
                 * after a split point. The current behaviour is to advance one
                 * character if the match was zero-width. Unfortunately, this
                 * can give an incorrect result. GvR wants this behaviour to be
                 * retained so as not to break any existing software which might
                 * rely on it. The correct behaviour is enabled by setting the
                 * 'zero_width' flag.
                 */
                if (state->zero_width)
                    /* Continue from where we left off, but don't allow a
                     * contiguous zero-width match.
                     */
                    state->must_advance = TRUE;
                else {
                    if (state->text_ptr == state->match_ptr)
                        /* Advance one character. */
                        state->text_ptr = (char*)state->text_ptr +
                          state->charsize * step;

                    state->must_advance = FALSE;
                }
            }
        } else
            goto no_match;

        if (status == RE_ERROR_FAILURE) {
no_match:
            /* Get segment following last match (even if empty). */
            if (state->reverse)
                result = PySequence_GetSlice(state->string,
                  text_offset(state, state->text_start), text_offset(state,
                  self->last));
            else
                result = PySequence_GetSlice(state->string,
                  text_offset(state, self->last), text_offset(state,
                  state->text_end));
            if (!result)
                goto error;

            self->finished = TRUE;
        }
    } else {
        /* Add group. */
        result = state_get_group(state, self->index, state->string, FALSE);
        if (!result)
            goto error;
    }

    ++self->index;
    if (self->index > state->pattern->group_count)
        self->index = 0;

    return result;

error:
    state_fini(state);
    return NULL;
}

/* SplitterObject's methods. */
static PyMethodDef splitter_methods[] = {
    {"split", (PyCFunction)splitter_split, METH_NOARGS},
    {NULL, NULL}
};

/* Deallocates a SplitterObject. */
static void splitter_dealloc(SplitterObject* self) {
    state_fini(&self->state);
    Py_DECREF(self->pattern);
    PyObject_DEL(self);
}

/* Gets an attribute of a SplitterObject. */
static PyObject* splitter_getattr(SplitterObject* self, char* name) {
    PyObject* res;

    res = Py_FindMethod(splitter_methods, (PyObject*)self, name);
    if (res)
        return res;

    PyErr_Clear();

    if (!strcmp(name, "pattern")) {
        Py_INCREF(self->pattern);
        return (PyObject*)self->pattern;
    }

    PyErr_SetString(PyExc_AttributeError, name);
    return NULL;
}

static PyTypeObject Splitter_Type = {
    PyObject_HEAD_INIT(NULL)
    0,
    "_" RE_MODULE "." RE_MODULE_UPPER "_Splitter",
    sizeof(SplitterObject), 0,
    (destructor)splitter_dealloc,  /* tp_dealloc */
    0,                            /* tp_print */
    (getattrfunc)splitter_getattr, /* tp_getattr */
};

/* Creates a new SplitterObject. */
static PyObject* pattern_splitter(PatternObject* pattern, PyObject* args,
  PyObject* kw) {
    /* Create split state object. */
    SplitterObject* self;
    RE_State* state;

    PyObject* string;
    Py_ssize_t maxsplit = 0;
    static char* kwlist[] = { "string", "maxsplit", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:splitter", kwlist, &string,
      &maxsplit))
        return NULL;

    /* Create a splitter object. */
    self = PyObject_NEW(SplitterObject, &Splitter_Type);
    if (!self)
        return NULL;

    self->pattern = pattern;
    Py_INCREF(self->pattern);

    if (maxsplit == 0)
        maxsplit = PY_SSIZE_T_MAX;

    state = &self->state;

    if (!state_init(state, pattern, string, 0, PY_SSIZE_T_MAX, FALSE)) {
        PyObject_DEL(self);
        return NULL;
    }

    self->maxsplit = maxsplit;
    self->last = state->reverse ? state->text_end : state->text_start;
    self->split_count = 0;
    self->index = 0;
    self->finished = FALSE;

    return (PyObject*) self;
}

/* PatternObject's 'match' method. */
static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject*
  kw) {
    Py_ssize_t start;
    Py_ssize_t end;
    RE_State state;
    int status;
    PyObject* match;

    PyObject* string;
    PyObject* pos = Py_None;
    PyObject* endpos = Py_None;
    static char* kwlist[] = { "string", "pos", "endpos", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|OO:match", kwlist, &string,
      &pos, &endpos))
        return NULL;

    start = as_string_index(pos, 0);
    end = as_string_index(endpos, PY_SSIZE_T_MAX);
    if (PyErr_Occurred())
        return NULL;

    if (!state_init(&state, self, string, start, end, FALSE))
        return NULL;

    if (state.charsize == 1)
        status = bmatch(&state, FALSE);
    else
        status = umatch(&state, FALSE);

    if (PyErr_Occurred()) {
        state_fini(&state);
        return NULL;
    }

    match = pattern_new_match(self, &state, status);

    state_fini(&state);

    return match;
}

/* PatternObject's 'search' method. */
static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject*
  kw) {
    Py_ssize_t start;
    Py_ssize_t end;
    void* characters;
    Py_ssize_t length;
    Py_ssize_t charsize;
    RE_State state;
    int status;
    PyObject* match;

    PyObject* string;
    PyObject* pos = Py_None;
    PyObject* endpos = Py_None;
    static char* kwlist[] = { "string", "pos", "endpos", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|OO:search", kwlist, &string,
      &pos, &endpos))
        return NULL;

    start = as_string_index(pos, 0);
    end = as_string_index(endpos, PY_SSIZE_T_MAX);
    if (PyErr_Occurred())
        return NULL;

    /* Get the string. */
    if (!get_string(string, &characters, &length, &charsize))
        return NULL;

    /* Adjust boundaries. */
    if (start < 0)
        start += length;
    if (start < 0)
        start = 0;
    else if (start > length)
        start = length;

    if (end < 0)
        end += length;
    if (end < 0)
        end = 0;
    else if (end > length)
        end = length;

    /* If the pattern is too long for the string, then take a shortcut. */
    if ((Py_ssize_t)self->min_width > end - start) {
        /* No match. */
        Py_INCREF(Py_None);
        return Py_None;
    }

    if (!state_init_2(&state, self, string, characters, length, charsize, start,
      end, FALSE))
        return NULL;

    if (state.charsize == 1)
        status = bmatch(&state, TRUE);
    else
        status = umatch(&state, TRUE);

    if (PyErr_Occurred()) {
        state_fini(&state);
        return NULL;
    }

    match = pattern_new_match(self, &state, status);

    state_fini(&state);

    return match;
}

/* Gets a replacement item from the replacement list.
 *
 * The replacement item could be a string literal or a group.
 *
 * It can return None to represent an empty string.
 */
Py_LOCAL(PyObject*) get_sub_replacement(PyObject* item, PyObject* string,
  RE_State* state, Py_ssize_t group_count) {
    long group;

    if (PyString_Check(item) || PyUnicode_Check(item)) {
        /* It's a literal, which can be added directly to the list. */
        Py_INCREF(item);
        return item;
    }

    /* Is it a group reference? */
    if (PyInt_Check(item))
        group = PyInt_AsLong(item);
    else if (PyLong_Check(item))
        group = PyLong_AsLong(item);
    else {
        /* Not a group either! */
        set_error(RE_ERROR_REPLACEMENT, NULL);
        return NULL;
    }

    if (PyErr_Occurred()) {
        set_error(RE_ERROR_REPLACEMENT, NULL);
        return NULL;
    }

    if (group == 0) {
        /* The entire matched portion of the string. */
        if (state->match_ptr == state->text_ptr) {
            /* Return None for "". */
            Py_INCREF(Py_None);
            return Py_None;
        }

        if (state->reverse)
            return PySequence_GetSlice(string, text_offset(state,
              state->text_ptr), text_offset(state, state->match_ptr));
        else
            return PySequence_GetSlice(string, text_offset(state,
              state->match_ptr), text_offset(state, state->text_ptr));
    } else if (1 <= group && group <= group_count) {
        /* A group. */
        Py_ssize_t m = group * 2 - 2;
        if (!state->marks[m] || state->marks[m] == state->marks[m + 1]) {
            /* The group didn't match or is "", so return None for "". */
            Py_INCREF(Py_None);
            return Py_None;
        }

        return PySequence_GetSlice(string, text_offset(state, state->marks[m]),
          text_offset(state, state->marks[m + 1]));
    } else {
        /* No such group. */
        set_error(RE_ERROR_INVALID_GROUP_REF, NULL);
        return NULL;
    }
}

/* Checks whether a template is a literal.
 *
 * To keep it simple we'll say that a literal is a string which can be used
 * as-is, so no backslashes.
 *
 * Returns its length if it is a literal, otherwise -1.
 */
Py_LOCAL(int) check_template(PyObject* str_template) {
    void* characters;
    Py_ssize_t length;
    Py_ssize_t charsize;

    if (!get_string(str_template, &characters, &length, &charsize))
        return -1;

    if (charsize == 1) {
        char* c_ptr;
        char* end_ptr;

        c_ptr = (char*)characters;
        end_ptr = (char*)characters + length;

        while (c_ptr < end_ptr) {
            if (*c_ptr == '\\')
                return -1;

            ++c_ptr;
        }
    } else {
        Py_UNICODE* c_ptr;
        Py_UNICODE* end_ptr;

        c_ptr = (Py_UNICODE*)characters;
        end_ptr = (Py_UNICODE*)characters + length;

        while (c_ptr < end_ptr) {
            if (*c_ptr == '\\')
                return -1;

            ++c_ptr;
        }
    }

    return length;
}

/* PatternObject's 'subx' method. */
Py_LOCAL(PyObject*) pattern_subx(PatternObject* self, PyObject* str_template,
  PyObject* string, Py_ssize_t maxsub, Py_ssize_t subn) {
    void* characters;
    Py_ssize_t length;
    Py_ssize_t charsize;
    BOOL is_callable;
    BOOL is_literal;
    PyObject* replacement;
    RE_State state;
    JoinInfo join_info;
    Py_ssize_t sub_count;
    void* last;
    PyObject* item;
    void* end_ptr;

    /* Get the string. */
    if (!get_string(string, &characters, &length, &charsize))
        return NULL;

    /* If the pattern is too long for the string, then take a shortcut. */
    if ((Py_ssize_t)self->min_width > length) {
        Py_INCREF(string);

        if (subn)
            return Py_BuildValue("Ni", string, 0);

        return string;
    }

    if (maxsub == 0)
        maxsub = PY_SSIZE_T_MAX;

    is_callable = FALSE;
    is_literal = FALSE;

    /* sub/subn takes either a function or a string template. */
    if (PyCallable_Check(str_template)) {
        /* It's callable. */
        is_callable = TRUE;

        replacement = str_template;
        Py_INCREF(replacement);
    } else {
        /* The template isn't callable, so is it a literal?
         *
         * To keep it simple we'll say that a literal is a string which can be
         * used as-is, so no backslashes.
         */
        Py_ssize_t literal_length;

        literal_length = check_template(str_template);
        if (literal_length >= 0) {
            /* It's a literal. */
            is_literal = TRUE;

            /* We'll represent "" with None. */
            replacement = literal_length > 0 ? str_template : Py_None;
            Py_INCREF(replacement);
        } else {
            /* The template isn't a literal either, so hand it over to the
             * template compiler.
             */
            replacement = call(RE_MODULE, "_compile_replacement",
              PyTuple_Pack(2, self, str_template));
            if (!replacement)
                return NULL;
        }
    }

    if (!state_init_2(&state, self, string, characters, length, charsize, 0,
      PY_SSIZE_T_MAX, FALSE)) {
        Py_DECREF(replacement);
        return NULL;
    }

    join_info.item = NULL;
    join_info.list = NULL;

    sub_count = 0;
    last = state.reverse ? state.text_end : state.text_start;
    while (sub_count < maxsub) {
        int status;

        if (state.charsize == 1)
            status = bmatch(&state, TRUE);
        else
            status = umatch(&state, TRUE);

        if (PyErr_Occurred())
            goto error;

        if (status == 0)
            break;

        /* Append the segment before this match. */
        if (state.match_ptr != last) {
            if (state.reverse)
                item = PySequence_GetSlice(string, text_offset(&state,
                  state.match_ptr), text_offset(&state, last));
            else
                item = PySequence_GetSlice(string, text_offset(&state, last),
                  text_offset(&state, state.match_ptr));
            if (!item)
                goto error;

            /* Add to the list. */
            status = add_item(&join_info, item);
            Py_DECREF(item);
            if (status < 0) {
                set_error(status, NULL);
                goto error;
            }
        }

        /* Add this match. */
        if (is_callable) {
            /* Pass the MatchObject to the replacement function. */
            PyObject* match;
            PyObject* args;

            match = pattern_new_match(self, &state, 1);
            if (!match)
                goto error;
            args = PyTuple_Pack(1, match);
            if (!args) {
                Py_DECREF(match);
                goto error;
            }
            item = PyObject_CallObject(replacement, args);
            Py_DECREF(args);
            Py_DECREF(match);
            if (!item)
                goto error;

            /* Add to the list. */
            if (item != Py_None) {
                status = add_item(&join_info, item);
                Py_DECREF(item);
                if (status < 0) {
                    set_error(status, NULL);
                    goto error;
                }
            }
        } else if (is_literal) {
            /* The replacement is a literal string.
             *
             * None represents "", otherwise add it to the list.
             */
            if (replacement != Py_None) {
                status = add_item(&join_info, replacement);
                if (status < 0) {
                    set_error(status, NULL);
                    goto error;
                }
            }
        } else {
            /* The replacement is a list template. */
            Py_ssize_t size;
            Py_ssize_t i;

            /* Add each part of the template to the list. */
            size = PyList_GET_SIZE(replacement);
            for (i = 0; i < size; i++) {
                PyObject* item;
                PyObject* str_item;

                item = PyList_GET_ITEM(replacement, i);
                str_item = get_sub_replacement(item, string, &state,
                  self->group_count);
                if (!str_item)
                    goto error;

                /* Add to the list. */
                if (str_item == Py_None)
                    /* None for "". */
                    Py_DECREF(str_item);
                else {
                    status = add_item(&join_info, str_item);
                    Py_DECREF(str_item);
                    if (status < 0) {
                        set_error(status, NULL);
                        goto error;
                    }
                }
            }
        }

        ++sub_count;

        /* Continue from where we left off, but don't allow a contiguous
         * zero-width match.
         */
        last = state.text_ptr;
        state.must_advance = TRUE;
    }

    /* Get the segment following the last match. */
    end_ptr = state.reverse ? state.text_start : state.text_end;
    if (last != end_ptr) {
        int status;

        /* The segment is part of the original string. */
        if (state.reverse)
            item = PySequence_GetSlice(string, text_offset(&state,
              state.text_start), text_offset(&state, last));
        else
            item = PySequence_GetSlice(string, text_offset(&state, last),
              text_offset(&state, state.text_end));
        if (!item)
            goto error;
        status = add_item(&join_info, item);
        Py_DECREF(item);
        if (status < 0) {
            set_error(status, NULL);
            goto error;
        }
    }

    state_fini(&state);

    Py_DECREF(replacement);

    /* Convert the list to a single string (also cleans up join_info). */
    item = join_list_info(&join_info, string);
    if (!item)
        return NULL;

    if (subn)
        return Py_BuildValue("Ni", item, sub_count);

    return item;

error:
    if (join_info.list)
        Py_DECREF(join_info.list);
    if (join_info.item)
        Py_DECREF(join_info.item);
    state_fini(&state);
    Py_DECREF(replacement);
    return NULL;
}

/* PatternObject's 'sub' method. */
static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
  {
    PyObject* ptemplate;
    PyObject* string;
    Py_ssize_t count = 0;

    static char* kwlist[] = { "repl", "string", "count", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist, &ptemplate,
      &string, &count))
        return NULL;

    return pattern_subx(self, ptemplate, string, count, 0);
}

/* PatternObject's 'subn' method. */
static PyObject* pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
  {
    PyObject* ptemplate;
    PyObject* string;
    Py_ssize_t count = 0;

    static char* kwlist[] = { "repl", "string", "count", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist, &ptemplate,
      &string, &count))
        return NULL;

    return pattern_subx(self, ptemplate, string, count, 1);
}

/* PatternObject's 'split' method. */
static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject*
  kw) {
    RE_State state;
    PyObject* list;
    PyObject* item;
    int status;
    Py_ssize_t split_count;
    Py_ssize_t i;
    void* start_ptr;
    void* end_ptr;
    Py_ssize_t step;
    void* last;

    PyObject* string;
    Py_ssize_t maxsplit = 0;
    static char* kwlist[] = { "string", "maxsplit", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist, &string,
      &maxsplit))
        return NULL;

    if (maxsplit == 0)
        maxsplit = PY_SSIZE_T_MAX;

    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX, FALSE))
        return NULL;

    list = PyList_New(0);
    if (!list) {
        state_fini(&state);
        return NULL;
    }

    split_count = 0;
    if (state.reverse) {
        start_ptr = state.text_end;
        end_ptr = state.text_start;
        step = -1;
    } else {
        start_ptr = state.text_start;
        end_ptr = state.text_end;
        step = 1;
    }
    last = start_ptr;
    while (split_count < maxsplit) {
        if (state.charsize == 1)
            status = bmatch(&state, TRUE);
        else
            status = umatch(&state, TRUE);

        if (PyErr_Occurred())
            goto error;

        if (status == 0)
            /* No more matches. */
            break;

        if (!state.zero_width) {
            /* The current behaviour is to advance one character if the split
             * was zero-width. Unfortunately, this can give an incorrect result.
             * GvR wants this behaviour to be retained so as not to break any
             * existing software which might rely on it. The correct behaviour
             * is enabled by setting the 'zero_width' flag.
             */
            if (state.text_ptr == state.match_ptr) {
                if (last == end_ptr)
                    break;

                /* Advance one character. */
                state.text_ptr = (char*)state.text_ptr + state.charsize * step;
                state.must_advance = FALSE;
                continue;
            }
        }

        /* Get segment before this match. */
        if (state.reverse)
            item = PySequence_GetSlice(string, text_offset(&state,
              state.match_ptr), text_offset(&state, last));
        else
            item = PySequence_GetSlice(string, text_offset(&state, last),
              text_offset(&state, state.match_ptr));
        if (!item)
            goto error;
        status = PyList_Append(list, item);
        Py_DECREF(item);
        if (status < 0)
            goto error;

        /* Add groups (if any). */
        for (i = 1; i <= self->group_count; i++) {
            item = state_get_group(&state, i, string, FALSE);
            if (!item)
                goto error;
            status = PyList_Append(list, item);
            Py_DECREF(item);
            if (status < 0)
                goto error;
        }

        ++split_count;
        last = state.text_ptr;

        /* The correct behaviour is to reject a zero-width match just after a
         * split point. The current behaviour is to advance one character if the
         * match was zero-width. Unfortunately, this can give an incorrect
         * result. GvR wants this behaviour to be retained so as not to break
         * any existing software which might rely on it. The correct behaviour
         * is enabled by setting the 'zero_width' flag.
         */
        if (state.zero_width)
            /* Continue from where we left off, but don't allow a contiguous
             * zero-width match.
             */
            state.must_advance = TRUE;
        else {
            if (state.text_ptr == state.match_ptr)
                /* Advance one character. */
                state.text_ptr = (char*)state.text_ptr + state.charsize * step;

            state.must_advance = FALSE;
        }
    }

    /* Get segment following last match (even if empty). */
    if (state.reverse)
        item = PySequence_GetSlice(string, text_offset(&state,
          state.text_start), text_offset(&state, last));
    else
        item = PySequence_GetSlice(string, text_offset(&state, last),
          text_offset(&state, state.text_end));
    if (!item)
        goto error;
    status = PyList_Append(list, item);
    Py_DECREF(item);
    if (status < 0)
        goto error;

    state_fini(&state);

    return list;

error:
    Py_DECREF(list);
    state_fini(&state);
    return NULL;
}

/* PatternObject's 'splititer' method. */
static PyObject* pattern_splititer(PatternObject* pattern, PyObject* args,
  PyObject* kw) {
    PyObject* splitter;
    PyObject* split;
    PyObject* iterator;

    splitter = pattern_splitter(pattern, args, kw);
    if (!splitter)
        return NULL;

    split = PyObject_GetAttrString(splitter, "split");
    Py_DECREF(splitter);
    if (!split)
        return NULL;

    iterator = PyCallIter_New(split, Py_False);
    Py_DECREF(split);

    return iterator;
}

/* PatternObject's 'findall' method. */
static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject*
  kw) {
    Py_ssize_t start;
    Py_ssize_t end;
    RE_State state;
    PyObject* list;
    Py_ssize_t step;
    int status;
    Py_ssize_t i, b, e;

    PyObject* string;
    PyObject* pos = Py_None;
    PyObject* endpos = Py_None;
    Py_ssize_t overlapped = FALSE;
    static char* kwlist[] = { "string", "pos", "endpos", "overlapped", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|OOn:findall", kwlist, &string,
      &pos, &endpos, &overlapped))
        return NULL;

    start = as_string_index(pos, 0);
    end = as_string_index(endpos, PY_SSIZE_T_MAX);
    if (PyErr_Occurred())
        return NULL;

    if (!state_init(&state, self, string, start, end, overlapped))
        return NULL;

    list = PyList_New(0);
    if (!list) {
        state_fini(&state);
        return NULL;
    }

    step = state.reverse ? -1 : 1;
    while (state.slice_start <= state.text_ptr && state.text_ptr <=
      state.slice_end) {
        PyObject* item;

        if (state.charsize == 1)
            status = bmatch(&state, TRUE);
        else
            status = umatch(&state, TRUE);

        if (PyErr_Occurred())
            goto error;

        if (status == 0)
            break;

        /* Don't bother to build a MatchObject. */
        switch (self->group_count) {
        case 0:
            if (state.reverse) {
                b = text_offset(&state, state.text_ptr);
                e = text_offset(&state, state.match_ptr);
            } else {
                b = text_offset(&state, state.match_ptr);
                e = text_offset(&state, state.text_ptr);
            }
            item = PySequence_GetSlice(string, b, e);
            if (!item)
                goto error;
            break;
        case 1:
            item = state_get_group(&state, 1, string, TRUE);
            if (!item)
                goto error;
            break;
        default:
            item = PyTuple_New(self->group_count);
            if (!item)
                goto error;
            for (i = 0; i < self->group_count; i++) {
                PyObject* o = state_get_group(&state, i + 1, string, TRUE);
                if (!o) {
                    Py_DECREF(item);
                    goto error;
                }
                PyTuple_SET_ITEM(item, i, o);
            }
            break;
        }

        status = PyList_Append(list, item);
        Py_DECREF(item);
        if (status < 0)
            goto error;

        if (state.overlapped) {
            /* Advance one character. */
            state.text_ptr = (char*)state.search_anchor + state.charsize * step;
            state.must_advance = FALSE;
        } else
            /* Continue from where we left off, but don't allow a contiguous
             * zero-width match.
             */
            state.must_advance = state.text_ptr == state.match_ptr;
    }

    state_fini(&state);

    return list;

error:
    Py_DECREF(list);
    state_fini(&state);
    return NULL;
}

/* PatternObject's 'finditer' method. */
static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args,
  PyObject* kw) {
    PyObject* scanner;
    PyObject* search;
    PyObject* iterator;

    scanner = pattern_scanner(pattern, args, kw);
    if (!scanner)
        return NULL;

    search = PyObject_GetAttrString(scanner, "search");
    Py_DECREF(scanner);
    if (!search)
        return NULL;

    iterator = PyCallIter_New(search, Py_None);
    Py_DECREF(search);

    return iterator;
}

/* PatternObject's 'copy' method. */
static PyObject* pattern_copy(PatternObject* self, PyObject *unused) {
    PyErr_SetString(PyExc_TypeError, "cannot copy this PatternObject");
    return NULL;
}

/* PatternObject's 'deepcopy' method. */
static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) {
    PyErr_SetString(PyExc_TypeError, "cannot deepcopy this PatternObject");
    return NULL;
}

/* The documentation of a PatternObject. */
PyDoc_STRVAR(pattern_match_doc,
    "match(string[, pos[, endpos]]) --> MatchObject or None.\n\
    Matches zero or more characters at the beginning of the string.");

PyDoc_STRVAR(pattern_search_doc,
    "search(string[, pos[, endpos]]) --> MatchObject or None.\n\
    Scan through string looking for a match, and return a corresponding\n\
    MatchObject instance.  Return None if no position in the string matches.");

PyDoc_STRVAR(pattern_sub_doc,
    "sub(repl, string[, count=0]) --> newstring\n\
    Return the string obtained by replacing the leftmost non-overlapping\n\
    occurrences of pattern in string by the replacement repl.");

PyDoc_STRVAR(pattern_subn_doc,
    "subn(repl, string[, count=0]) --> (newstring, number of subs)\n\
    Return the tuple (new_string, number_of_subs_made) found by replacing the\n\
    leftmost non-overlapping occurrences of pattern with the replacement repl.");

PyDoc_STRVAR(pattern_split_doc,
    "split(string[, maxsplit=0]) --> list.\n\
    Split string by the occurrences of pattern.");

PyDoc_STRVAR(pattern_splititer_doc,
    "splititer(string[, maxsplit=0]) --> iterator.\n\
    Return an iterator yielding the parts of a split string.");

PyDoc_STRVAR(pattern_findall_doc,
    "findall(string[, pos[, endpos[, overlapped=False]]]) --> list.\n\
    Return a list of all matches of pattern in string.  The matches may be\n\
    overlapped if overlapped is True.");

PyDoc_STRVAR(pattern_finditer_doc,
    "finditer(string[, pos[, endpos[, overlapped=False]]]) --> iterator.\n\
    Return an iterator over all matches for the RE pattern in string.  The\n\
    matches may be overlapped if overlapped is True.  For each match, the\n\
    iterator returns a MatchObject.");

/* The methods of a PatternObject. */
static PyMethodDef pattern_methods[] = {
    {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS,
      pattern_match_doc},
    {"search", (PyCFunction)pattern_search, METH_VARARGS|METH_KEYWORDS,
      pattern_search_doc},
    {"sub", (PyCFunction)pattern_sub, METH_VARARGS|METH_KEYWORDS,
      pattern_sub_doc},
    {"subn", (PyCFunction)pattern_subn, METH_VARARGS|METH_KEYWORDS,
      pattern_subn_doc},
    {"split", (PyCFunction)pattern_split, METH_VARARGS|METH_KEYWORDS,
      pattern_split_doc},
    {"splititer", (PyCFunction)pattern_splititer, METH_VARARGS|METH_KEYWORDS,
      pattern_splititer_doc},
    {"findall", (PyCFunction)pattern_findall, METH_VARARGS|METH_KEYWORDS,
      pattern_findall_doc},
    {"finditer", (PyCFunction)pattern_finditer, METH_VARARGS|METH_KEYWORDS,
      pattern_finditer_doc},
    {"scanner", (PyCFunction)pattern_scanner, METH_VARARGS|METH_KEYWORDS},
    {"__copy__", (PyCFunction)pattern_copy, METH_NOARGS},
    {"__deepcopy__", (PyCFunction)pattern_deepcopy, METH_O},
    {NULL, NULL}
};

PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");

/* Deallocates a PatternObject. */
static void pattern_dealloc(PatternObject* self) {
    Py_ssize_t i;

    /* Discard the nodes. */
    for (i = 0; i < self->node_count; i++) {
        RE_Node* node;

        node = self->node_list[i];
        re_dealloc(node->values);
        re_dealloc(node);
    }
    re_dealloc(self->node_list);

    re_dealloc(self->bad_character_offset);
    re_dealloc(self->good_suffix_offset);

    /* Discard the group info. */
    re_dealloc(self->group_info);

    /* Discard the repeat info. */
    re_dealloc(self->repeat_info);

    if (self->weakreflist)
        PyObject_ClearWeakRefs((PyObject*)self);
    Py_XDECREF(self->pattern);
    Py_XDECREF(self->groupindex);
    Py_XDECREF(self->indexgroup);
    PyObject_DEL(self);
}

/* Gets an attribute of a PatternObject. */
static PyObject* pattern_getattr(PatternObject* self, char* name) {
    PyObject* res;

    res = Py_FindMethod(pattern_methods, (PyObject*)self, name);
    if (res)
        return res;

    PyErr_Clear();

    if (!strcmp(name, "pattern")) {
        Py_INCREF(self->pattern);
        return (PyObject*)self->pattern;
    }

    if (!strcmp(name, "flags"))
        return Py_BuildValue("i", self->flags);

    if (!strcmp(name, "groups"))
        return Py_BuildValue("i", self->group_count);

    if (!strcmp(name, "groupindex")) {
        Py_INCREF(self->groupindex);
        return (PyObject*)self->groupindex;
    }

    PyErr_SetString(PyExc_AttributeError, name);
    return NULL;
}

static PyTypeObject Pattern_Type = {
    PyObject_HEAD_INIT(NULL)
    0,
    "_" RE_MODULE "." RE_MODULE_UPPER "_Pattern",
    sizeof(PatternObject), 0,
    (destructor)pattern_dealloc,            /* tp_dealloc */
    0,                                      /* tp_print */
    (getattrfunc)pattern_getattr,           /* tp_getattr */
    0,                                      /* tp_setattr */
    0,                                      /* tp_compare */
    0,                                      /* tp_repr */
    0,                                      /* tp_as_number */
    0,                                      /* tp_as_sequence */
    0,                                      /* tp_as_mapping */
    0,                                      /* tp_hash */
    0,                                      /* tp_call */
    0,                                      /* tp_str */
    0,                                      /* tp_getattro */
    0,                                      /* tp_setattro */
    0,                                      /* tp_as_buffer */
    Py_TPFLAGS_HAVE_WEAKREFS,               /* tp_flags */
    pattern_doc,                            /* tp_doc */
    0,                                      /* tp_traverse */
    0,                                      /* tp_clear */
    0,                                      /* tp_richcompare */
    offsetof(PatternObject, weakreflist),   /* tp_weaklistoffset */
};

/* Building the nodes is made simpler by allowing branches to have a single
 * exit. These need to be removed.
 */
Py_LOCAL(void) skip_one_way_branches(PatternObject* pattern) {
    BOOL modified;

    /* If a node refers to a 1-way branch then make the former refer to the
     * latter's destination. Repeat until they're all done.
     */
    do {
        Py_ssize_t i;

        modified = FALSE;

        for (i = 0; i < pattern->node_count; i++) {
            RE_Node* node;
            RE_Node* next;

            node = pattern->node_list[i];

            /* Check the first destination. */
            next = node->next_1.node;
            if (next && next->op == RE_OP_BRANCH && !next->next_2.node) {
                node->next_1.node = next->next_1.node;
                modified = TRUE;
            }

            /* Check the second destination. */
            next = node->next_2.node;
            if (next && next->op == RE_OP_BRANCH && !next->next_2.node) {
                node->next_2.node = next->next_1.node;
                modified = TRUE;
            }
        }
    } while (modified);

    /* The start node might be a 1-way branch. Skip over it because it'll be
     * removed. It might even be the first in a chain.
     */
    while (pattern->start_node->op == RE_OP_BRANCH &&
      !pattern->start_node->next_2.node)
        pattern->start_node = pattern->start_node->next_1.node;
}

/* Discards any unused nodes.
 *
 * Optimising the nodes might result in some nodes no longer being used.
 */
Py_LOCAL(void) discard_unused_nodes(PatternObject* pattern) {
    size_t new_count;
    size_t old_count;
    Py_ssize_t i;

    /* The start node is used. */
    new_count = 0;
    pattern->start_node->index = new_count++;

    /* If a node is used then all the nodes to which it refers are also used.
     * Repeat until they're all done.
     */
    do {
        old_count = new_count;

        for (i = 0; i < pattern->node_count; i++) {
            RE_Node* node;

            node = pattern->node_list[i];
            if (node->index >= 0) {
                RE_Node* next;

                /* Check the first destination. */
                next = node->next_1.node;
                if (next && next->index < 0)
                    next->index = new_count++;

                /* Check the second destination. */
                next = node->next_2.node;
                if (next && next->index < 0)
                    next->index = new_count++;
            }
        }
    } while (new_count > old_count);

    /* Remove the nodes which aren't used. */
    new_count = 0;

    for (i = 0; i < pattern->node_count; i++) {
        RE_Node* node;

        node = pattern->node_list[i];
        if (node->index >= 0)
            pattern->node_list[new_count++] = node;
        else {
            re_dealloc(node->values);
            re_dealloc(node);
        }
    }

    pattern->node_count = new_count;
}

/* Assigns offsets for the groups into the values array.
 *
 * Entries for groups come before those for repeats.
 */
Py_LOCAL(BOOL) assign_group_offsets(PatternObject* pattern) {
    Py_ssize_t offset;
    Py_ssize_t i;

    offset = 0;

    for (i = 0; i < pattern->group_count; i++) {
        RE_GroupInfo* group_info;

        group_info = &pattern->group_info[i];
        if (group_info->id < 1) {
            /* There's an entry for a non-existant group. */
            set_error(RE_ERROR_ILLEGAL, NULL);
            return FALSE;
        }

         group_info->value_offset = offset++;
    }

    return TRUE;
}

/* Marks all the group which are named. */
Py_LOCAL(void) mark_named_groups(PatternObject* pattern) {
    Py_ssize_t i;

    for (i = 0; i < pattern->group_count; i++) {
        RE_GroupInfo* group_info;
        PyObject* index;

        group_info = &pattern->group_info[i];
        index = Py_BuildValue("i", i + 1);
        group_info->has_name = PyDict_Contains(pattern->indexgroup, index);
        Py_DECREF(index);
    }
}

#if defined(VERBOSE)
static int compare_nodes(const void* elem1, const void* elem2) {
    RE_Node** e1 = (RE_Node**)elem1;
    RE_Node** e2 = (RE_Node**)elem2;
    return (int)((*e1)->index - (*e2)->index);
}

#endif
/* Gets the test node. */
Py_LOCAL(void) set_test_node(RE_NextNode* next) {
    RE_Node* node = next->node;
    RE_Node* test;

    next->test = node;
    next->match_next = node;
    next->match_step = 0;

    if (!node)
        return;

    test = node;
    while (test->op == RE_OP_BEGIN_GROUP || test->op == RE_OP_END_GROUP)
        test = test->next_1.node;

    next->test = test;

    if (test != node)
        return;

    switch (test->op) {
    case RE_OP_ANY: /* Any character, except a newline. */
    case RE_OP_ANY_ALL: /* Any character at all. */
    case RE_OP_ANY_ALL_REV: /* Any character at all. */
    case RE_OP_ANY_REV: /* Any character, except a newline. */
    case RE_OP_BOUNDARY: /* At a word boundary. */
    case RE_OP_CATEGORY: /* A character category. */
    case RE_OP_CATEGORY_REV: /* A character category. */
    case RE_OP_CHARACTER: /* A character literal. */
    case RE_OP_CHARACTER_IGNORE: /* A character literal, ignoring case. */
    case RE_OP_CHARACTER_IGNORE_REV: /* A character literal, ignoring case. */
    case RE_OP_CHARACTER_REV: /* A character literal. */
    case RE_OP_END_OF_LINE: /* At the end of a line. */
    case RE_OP_END_OF_STRING: /* At the end of the string. */
    case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */
    case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */
    case RE_OP_SET: /* Character set. */
    case RE_OP_SET_IGNORE: /* Character set, ignoring case. */
    case RE_OP_SET_IGNORE_REV: /* Character set, ignoring case. */
    case RE_OP_SET_REV: /* Character set. */
    case RE_OP_START_OF_LINE: /* At the start of a line. */
    case RE_OP_START_OF_STRING: /* At the start of the string. */
    case RE_OP_STRING: /* A string literal. */
    case RE_OP_STRING_IGNORE: /* A string literal, ignoring case. */
    case RE_OP_STRING_IGNORE_REV: /* A string literal, ignoring case. */
    case RE_OP_STRING_REV: /* A string literal. */
        next->match_next = test->next_1.node;
        next->match_step = test->step;
        break;
    case RE_OP_GREEDY_REPEAT_ONE:
    case RE_OP_LAZY_REPEAT_ONE:
        if (test->values[1] > 0)
            next->test = test;
        break;
    }
}

/* Sets the test nodes. */
Py_LOCAL(void) set_test_nodes(PatternObject* pattern) {
    RE_Node** node_list;
    Py_ssize_t i;

    node_list = pattern->node_list;
    for (i = 0; i < pattern->node_count; i++) {
        RE_Node* node;

        node = node_list[i];
        set_test_node(&node->next_1);
        set_test_node(&node->next_2);
    }
}

/* Assigns offsets for the repeats into the values array.
 *
 * Entries for groups come before those for repeats.
 */
Py_LOCAL(void) assign_repeat_offsets(PatternObject* pattern) {
    Py_ssize_t offset;
    Py_ssize_t r;

    offset = pattern->group_count;

    for (r = 0; r < pattern->repeat_count; r++)
        pattern->repeat_info[r].value_offset = offset++;
}

/* Optimises the pattern. */
Py_LOCAL(BOOL) optimise_pattern(PatternObject* pattern) {
    /* Building the nodes is made simpler by allowing branches to have a single
     * exit. These need to be removed.
     */
    skip_one_way_branches(pattern);

    /* Discard any unused nodes. */
    discard_unused_nodes(pattern);

    /* Set the test nodes. */
    set_test_nodes(pattern);

#if defined(VERBOSE)
    {
        RE_Node** sorted_nodes = re_alloc(pattern->node_count *
          sizeof(*sorted_nodes));
        Py_ssize_t i;
        for (i = 0; i < pattern->node_count; i++)
            sorted_nodes[i] = pattern->node_list[i];
        qsort(sorted_nodes, pattern->node_count, sizeof(*sorted_nodes),
          compare_nodes);
        for (i = 0; i < pattern->node_count; i++) {
            RE_Node* node = sorted_nodes[i];
            printf("node->index is %d, node->op is %s", node->index,
              re_op_text[node->op]);
            if (node->next_1)
                printf(", node->next_1 is %d", node->next_1->index);
            if (node->next_2)
                printf(", node->next_2 is %d", node->next_2->index);
            printf("\n");
        }
        re_dealloc(sorted_nodes);
    }

#endif
    /* Assign group offsets into the values array. */
    if (!assign_group_offsets(pattern))
        return FALSE;

    /* Assign repeat offsets into the values array. */
    assign_repeat_offsets(pattern);

    /* Mark all the group that are named. */
    mark_named_groups(pattern);

    return TRUE;
}

/* Creates a new pattern node. */
Py_LOCAL(RE_Node*) create_node(PatternObject* pattern, RE_CODE op, BOOL match,
  Py_ssize_t step, Py_ssize_t value_count) {
    RE_Node* node;

    node = (RE_Node*)re_alloc(sizeof(*node));
    if (!node)
        return NULL;

    node->value_capacity = value_count;
    node->value_count = value_count;
    node->values = (RE_CODE*)re_alloc(node->value_capacity * sizeof(RE_CODE));
    if (!node->values)
        goto error;

    node->index = -1;
    node->op = op;
    node->match = match;
    node->step = step;
    node->next_1.node = NULL;
    node->next_1.test = NULL;
    node->next_2.node = NULL;
    node->next_2.test = NULL;

    /* Ensure that there's enough storage to record the new node. */
    if (pattern->node_count >= pattern->node_capacity) {
        RE_Node** new_node_list;

        pattern->node_capacity += 16;
        new_node_list = (RE_Node**)re_realloc(pattern->node_list,
          pattern->node_capacity * sizeof(RE_Node*));
        if (!new_node_list)
            goto error;
        pattern->node_list = new_node_list;
    }

    /* Record the new node. */
    pattern->node_list[pattern->node_count++] = node;

    return node;

error:
    re_dealloc(node->values);
    re_dealloc(node);
    return NULL;
}

/* Adds a node as a next node for another node. */
Py_LOCAL(void) add_node(RE_Node* node_1, RE_Node* node_2) {
    if (!node_1->next_1.node)
        node_1->next_1.node = node_2;
    else
        node_1->next_2.node = node_2;
}

/* Ensures that the entry for a group's details actually exists. */
Py_LOCAL(BOOL) ensure_group(PatternObject* pattern, Py_ssize_t group) {
    Py_ssize_t old_capacity;
    Py_ssize_t new_capacity;
    RE_GroupInfo* new_group_info;

    if (group <= pattern->group_count)
        /* We already have an entry for the group. */
        return TRUE;

    /* Increase the storage capacity to include the new entry if it's
     * insufficient.
     */
    old_capacity = pattern->group_info_capacity;
    new_capacity = pattern->group_info_capacity;
    while (group > new_capacity)
        new_capacity += 16;

    if (new_capacity > old_capacity) {
        new_group_info = (RE_GroupInfo*)re_realloc(pattern->group_info,
          new_capacity * sizeof(RE_GroupInfo));
        if (!new_group_info)
            return FALSE;
        memset(new_group_info + old_capacity, 0, (new_capacity - old_capacity) *
          sizeof(RE_GroupInfo));

        pattern->group_info = new_group_info;
        pattern->group_info_capacity = new_capacity;
    }

    pattern->group_count = group;

    return TRUE;
}

/* Records that there's a reference to a group. */
Py_LOCAL(BOOL) record_ref_group(PatternObject* pattern, Py_ssize_t group) {
    if (!ensure_group(pattern, group))
        return FALSE;

    pattern->group_info[group - 1].referenced = TRUE;

    return TRUE;
}

/* Records that there's a new group. */
Py_LOCAL(BOOL) record_group(PatternObject* pattern, Py_ssize_t group) {
    if (!ensure_group(pattern, group))
        return FALSE;

    if (group >= 1) {
        pattern->group_info[group - 1].id = group;
        pattern->group_info[group - 1].end_index = pattern->group_count;
    }

    return TRUE;
}

/* Records that a group has closed. */
Py_LOCAL(void) record_group_end(PatternObject* pattern, Py_ssize_t group) {
    if (group >= 1)
        pattern->group_info[group - 1].end_index = ++pattern->group_end_index;
}

Py_LOCAL(BOOL) build_sequence(RE_CompileArgs* args);

/* Checks whether a node matches one and only one character. */
Py_LOCAL(BOOL) sequence_matches_one(RE_Node* node) {
    while (node->op == RE_OP_BRANCH && !node->next_2.node)
        node = node->next_1.node;

    if (node->next_1.node)
        return FALSE;

    switch (node->op) {
    case RE_OP_ANY:
    case RE_OP_ANY_ALL:
    case RE_OP_ANY_ALL_REV:
    case RE_OP_ANY_REV:
    case RE_OP_CATEGORY:
    case RE_OP_CATEGORY_REV:
    case RE_OP_CHARACTER:
    case RE_OP_CHARACTER_IGNORE:
    case RE_OP_CHARACTER_IGNORE_REV:
    case RE_OP_CHARACTER_REV:
    case RE_OP_SET:
    case RE_OP_SET_IGNORE:
    case RE_OP_SET_IGNORE_REV:
    case RE_OP_SET_REV:
        return TRUE;
    default:
        return FALSE;
    }
}

/* Records a repeat. */
Py_LOCAL(BOOL) record_repeat(PatternObject* pattern, int id) {
    Py_ssize_t old_capacity;
    Py_ssize_t new_capacity;
    RE_RepeatInfo* new_repeat_info;

    /* Increase the storage capacity to include the new entry if it's
     * insufficient.
     */
    old_capacity = pattern->repeat_info_capacity;
    new_capacity = pattern->repeat_info_capacity;
    while (id >= new_capacity)
        new_capacity += 16;

    if (new_capacity > old_capacity) {
        new_repeat_info = (RE_RepeatInfo*)re_realloc(pattern->repeat_info,
          new_capacity * sizeof(RE_RepeatInfo));
        if (!new_repeat_info)
            return FALSE;
        memset(new_repeat_info + old_capacity, 0, (new_capacity - old_capacity)
          * sizeof(RE_RepeatInfo));

        pattern->repeat_info = new_repeat_info;
        pattern->repeat_info_capacity = new_capacity;
    }

    if (id >= pattern->repeat_count)
        pattern->repeat_count = id + 1;

    return TRUE;
}

/* Builds a simple operation. */
Py_LOCAL(BOOL) build_simple(RE_CompileArgs* args, RE_CODE op, Py_ssize_t step) {
    RE_Node* node;

    /* Create the node. */
    node = create_node(args->pattern, op, TRUE, step, 0);
    if (!node)
        return FALSE;

    ++args->code;

    ++args->min_width;

    /* Append the node. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a character set. */
Py_LOCAL(BOOL) build_set(RE_CompileArgs* args, RE_CODE op, Py_ssize_t step) {
    RE_CODE flags;
    RE_Node* node;
    RE_Node* previous;

    /* opcode, match | zerowidth, members. */
    flags = args->code[1];
    if (flags & ~3)
        return FALSE;

    if (flags & 2)
        step = 0;
    else
        ++args->min_width;

    node = create_node(args->pattern, op, flags & 1, step, 0);
    if (!node)
        return FALSE;
    args->code += 2;

    previous = NULL;

    /* Compile the character set. */
    do {
        RE_CODE op;
        RE_Node* member;

        op = args->code[0];
        switch (op) {
        case RE_OP_ANY:
            /* opcode. */
            member = create_node(args->pattern, op, TRUE, 1, 0);
            if (!member)
                return FALSE;
            ++args->code;
            break;
        case RE_OP_BIG_BITSET:
        {
            /* opcode, max_char, indexes, subsets. */
            RE_CODE max_char;
            size_t index_count;
            RE_CODE* cur_ptr;
            size_t subset_count;
            size_t i;
            size_t size;

            max_char = args->code[1];

            index_count = max_char / 256 + 1;
            cur_ptr = args->code + 2;
            subset_count = 0;
            for (i = 0; i < index_count; i++) {
                RE_CODE idx;

                idx = cur_ptr[i / INDEXES_PER_CODE];
                idx >>= BITS_PER_INDEX * (i % INDEXES_PER_CODE);
                idx &= (1 << BITS_PER_INDEX) - 1;
                if (idx > subset_count)
                    return FALSE;
                if (idx == subset_count)
                    ++subset_count;
            }

            cur_ptr += (index_count + INDEXES_PER_CODE - 1) /
              INDEXES_PER_CODE;
            cur_ptr += (256 / BITS_PER_CODE) * subset_count;
            if (cur_ptr > args->end_code)
                return FALSE;

            size = cur_ptr - (args->code + 2);
            member = create_node(args->pattern, op, TRUE, 1, 1 + size);
            if (!member)
                return FALSE;
            member->values[0] = max_char;
            for (i = 0; i < size; i++)
                member->values[1 + i] = args->code[2 + i];
            args->code += 2 + size;
            break;
        }
        case RE_OP_CATEGORY:
        case RE_OP_CHARACTER:
        {
            /*  opcode, flags, value. */
            RE_CODE flags;
            RE_CODE value;

            flags = args->code[1];
            value = args->code[2];
            if (flags & ~1)
                return FALSE;

            member = create_node(args->pattern, op, TRUE, 1, 1);
            if (!member)
                return FALSE;
            member->match = flags & 1;
            member->values[0] = value;
            args->code += 3;
            break;
        }
        case RE_OP_RANGE:
        {
            /* opcode, flags, min_char, max_char. */
            RE_CODE flags;
            RE_CODE min_value;
            RE_CODE max_value;

            flags = args->code[1];
            min_value = args->code[2];
            max_value = args->code[3];
            if (flags & ~1)
                return FALSE;
            if (min_value > max_value)
                return FALSE;

            member = create_node(args->pattern, op, TRUE, 1, 2);
            if (!member)
                return FALSE;
            member->match = flags & 1;
            member->values[0] = min_value;
            member->values[1] = max_value;
            args->code += 4;
            break;
        }
        case RE_OP_SMALL_BITSET:
        {
            /* opcode, top_bits, bitset. */
            size_t size;
            size_t i;

            size = 256 / BITS_PER_CODE;
            if (args->code + 2 + size > args->end_code)
                return FALSE;

            member = create_node(args->pattern, op, TRUE, 1, size + 1);
            if (!member)
                return FALSE;
            member->values[0] = args->code[1];
            for (i = 0; i < size; i++)
                member->values[1 + i] = args->code[2 + i];
            args->code += 2 + size;
            break;
        }
        case RE_OP_STRING:
        {
            /* Actually a character set. */
            size_t count;
            size_t i;

            count = args->code[1];
            if (args->code + 2 + count > args->end_code)
                return FALSE;

            member = create_node(args->pattern, op, TRUE, 1, count);
            if (!member)
                return FALSE;
            for (i = 0; i < count; i++)
                member->values[i] = args->code[1 + i];
            args->code += 2 + count;
            break;
        }
        default:
            /* Illegal opcode for a character set. */
            return FALSE;
        }

        if (previous)
            previous->next_1.node = member;
        else
            node->next_2.node = member;
        previous = member;
    } while (args->code < args->end_code && args->code[0] != RE_OP_END);

    /* Check that that we've reached the end correctly. (The last opcode
     * should be 'END'.)
     */
    if (args->code >= args->end_code || args->code[0] != RE_OP_END)
        return FALSE;
    ++args->code;

    /* Append the character set. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds an atomic sequence. */
Py_LOCAL(BOOL) build_atomic(RE_CompileArgs* args) {
    RE_Node* node;
    RE_CompileArgs subargs;
    RE_Node* success_node;

    node = create_node(args->pattern, RE_OP_ATOMIC, FALSE, 0, 0);
    if (!node)
        return FALSE;
    ++args->code;

    /* Compile the sequence and check that we've reached the end of the
     * subpattern.
     */
    subargs = *args;
    if (!build_sequence(&subargs))
        return FALSE;
    args->code = subargs.code;
    args->min_width = subargs.min_width;

    if (args->code[0] != RE_OP_END)
        return FALSE;
    ++args->code;

    /* Create the success node to terminate the subpattern. */
    success_node = create_node(args->pattern, RE_OP_SUCCESS, FALSE, 0, 0);
    if (!success_node)
        return FALSE;

    /* Insert the subpattern. */
    node->next_2.node = subargs.start;
    add_node(subargs.end, success_node);

    /* Append the atomic node. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a word boundary node. */
Py_LOCAL(BOOL) build_boundary(RE_CompileArgs* args) {
    RE_CODE flags;
    RE_Node* node;

    flags = args->code[1];
    if (flags & ~1)
        return FALSE;

    /* Create the node. */
    node = create_node(args->pattern, RE_OP_BOUNDARY, flags & 1, 0, 0);
    if (!node)
        return FALSE;

    args->code += 2;

    /* Append the node. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a zero-width node. */
Py_LOCAL(BOOL) build_zerowidth(RE_CompileArgs* args, RE_CODE op) {
    RE_Node* node;

    /* Create the node. */
    node = create_node(args->pattern, op, TRUE, 0, 0);
    if (!node)
        return FALSE;

    ++args->code;

    /* Append the node. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a 2-way branch. */
Py_LOCAL(BOOL) build_branch(RE_CompileArgs* args) {
    RE_Node* branch_node;
    RE_Node* join_node;
    size_t min_width;

    /* Create nodes for the start and end of the branch sequence. */
    branch_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
    join_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
    if (!branch_node || !join_node)
        return FALSE;

    /* Append the start node. */
    add_node(args->end, branch_node);
    args->end = join_node;

    min_width = ~(size_t)0;

    /* A branch in the regular expression is compiled into a series of
     * 2-way branches.
     */
    do {
        RE_CompileArgs subargs;
        RE_Node* next_branch_node;

        /* Skip over the 'BRANCH' or 'NEXT' opcode. */
        ++args->code;

        /* Compile the sequence until the next 'BRANCH' or 'NEXT'
         * opcode.
         */
        subargs = *args;
        subargs.min_width = 0;
        if (!build_sequence(&subargs))
            return FALSE;
        args->code = subargs.code;

        if (subargs.min_width < min_width)
            min_width = subargs.min_width;

        /* Append the sequence. */
        add_node(branch_node, subargs.start);
        add_node(subargs.end, join_node);

        /* Create a start node for the next sequence and append it. */
        next_branch_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0,
          0);
        if (!next_branch_node)
            return FALSE;

        add_node(branch_node, next_branch_node);
        branch_node = next_branch_node;
    } while (args->code < args->end_code && args->code[0] ==
      RE_OP_NEXT);

    args->min_width += min_width;

    /* We should have reached the end of the branch. */
    if (args->code[0] != RE_OP_END)
        return FALSE;

    ++args->code;

    return TRUE;
}

/* Builds a repeated sequence. */
Py_LOCAL(BOOL) build_repeat(RE_CompileArgs* args, RE_CODE op) {
    BOOL greedy;
    RE_CODE min_count;
    RE_CODE max_count;

    /* This includes special cases such as optional items, which we'll
     * check for and treat specially. They don't need repeat counts,
     * which helps us avoid unnecessary work when matching.
     */
    greedy = op == RE_OP_GREEDY_REPEAT;
    min_count = args->code[1];
    max_count = args->code[2];
    if (min_count > max_count)
        return FALSE;
    args->code += 3;

    if (min_count == 0 && max_count == 1) {
        /* Optional sequence. */
        RE_Node* branch_node;
        RE_Node* join_node;
        RE_CompileArgs subargs;

        /* Create the start and end nodes. */
        branch_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
        join_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
        if (!branch_node || !join_node)
            return FALSE;

        /* Compile the sequence and check that that we've reached the end of
         * it.
         */
        subargs = *args;
        if (!build_sequence(&subargs))
            return FALSE;
        args->code = subargs.code;

        if (args->code[0] != RE_OP_END)
            return FALSE;
        ++args->code;

        if (greedy) {
            /* It's a greedy option. */
            add_node(branch_node, subargs.start);
            add_node(branch_node, join_node);
        } else {
            /* It's a lazy option. */
            add_node(branch_node, join_node);
            add_node(branch_node, subargs.start);
        }
        add_node(subargs.end, join_node);

        /* Append the optional sequence. */
        add_node(args->end, branch_node);
        args->end = join_node;
    } else if (min_count == 1 && max_count == 1) {
        /* Singly-repeated sequence. */
        RE_CompileArgs subargs;

        subargs = *args;
        subargs.min_width = 0;
        if (!build_sequence(&subargs))
            return FALSE;
        args->code = subargs.code;
        args->min_width += subargs.min_width;

        /* Append the sequence. */
        add_node(args->end, subargs.start);
        args->end = subargs.end;
    } else {
        RE_Node* repeat_node;
        RE_CompileArgs subargs;

        /* Create the nodes for the repeat. */
        repeat_node = create_node(args->pattern, greedy ? RE_OP_GREEDY_REPEAT :
          RE_OP_LAZY_REPEAT, FALSE, args->forward ? 1 : -1, 4);
        if (!repeat_node || !record_repeat(args->pattern,
          args->pattern->repeat_count))
            return FALSE;
        repeat_node->values[0] = args->pattern->repeat_count++;
        repeat_node->values[1] = min_count;
        repeat_node->values[2] = max_count;
        repeat_node->values[3] = args->forward;

        /* Compile the 'body' and check that we've reached the end of it. */
        subargs = *args;
        subargs.min_width = 0;
        if (!build_sequence(&subargs))
            return FALSE;
        args->code = subargs.code;
        args->min_width += min_count * subargs.min_width;

        if (args->code[0] != RE_OP_END)
            return FALSE;
        ++args->code;

        if (sequence_matches_one(subargs.start)) {
            repeat_node->op = greedy ? RE_OP_GREEDY_REPEAT_ONE :
              RE_OP_LAZY_REPEAT_ONE;

            /* Append the new sequence. */
            add_node(args->end, repeat_node);
            repeat_node->next_2.node = subargs.start;
            args->end = repeat_node;
        } else {
            RE_Node* end_repeat_node;
            RE_Node* end_node;

            end_repeat_node = create_node(args->pattern, greedy ?
              RE_OP_END_GREEDY_REPEAT : RE_OP_END_LAZY_REPEAT, FALSE,
              args->forward ? 1 : -1, 4);
            if (!end_repeat_node)
                return FALSE;
            end_repeat_node->values[0] = repeat_node->values[0];
            end_repeat_node->values[1] = repeat_node->values[1];
            end_repeat_node->values[2] = repeat_node->values[2];
            end_repeat_node->values[3] = args->forward;

            end_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
            if (!end_node)
                return FALSE;

            /* Append the new sequence. */
            add_node(args->end, repeat_node);
            add_node(repeat_node, subargs.start);
            add_node(repeat_node, end_node);
            add_node(subargs.end, end_repeat_node);
            add_node(end_repeat_node, subargs.start);
            add_node(end_repeat_node, end_node);
            args->end = end_node;
        }
    }

    return TRUE;
}

/* Builds a category. */
Py_LOCAL(BOOL) build_category(RE_CompileArgs* args, RE_CODE op, Py_ssize_t step)
  {
    /* values are: category. */
    RE_CODE flags;
    RE_CODE value;
    RE_Node* node;

    flags = args->code[1];
    value = args->code[2];
    if (flags & ~3)
        return FALSE;

    if (flags & 2)
        step = 0;
    else
        ++args->min_width;

    node = create_node(args->pattern, op, flags & 1, step, 1);
    if (!node)
        return FALSE;
    node->values[0] = value;
    args->code += 3;

    /* Append the category. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a character. */
Py_LOCAL(BOOL) build_character(RE_CompileArgs* args, RE_CODE op, Py_ssize_t
  step) {
    /* values are: character. */
    RE_CODE flags;
    RE_CODE value;
    RE_Node* node;

    flags = args->code[1];
    value = args->code[2];
    if (flags & ~3)
        return FALSE;

    if (flags & 2)
        step = 0;
    else
        ++args->min_width;

    node = create_node(args->pattern, op, flags & 1, step, 1);
    if (!node)
        return FALSE;
    node->values[0] = value;
    args->code += 3;

    /* Append the character. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a capture group. */
Py_LOCAL(BOOL) build_group(RE_CompileArgs* args) {
    RE_CODE group;
    RE_Node* start_node;
    RE_Node* end_node;
    RE_CompileArgs subargs;

    group = args->code[1];
    args->code += 2;

    /* Create nodes for the start and end of the capture group. */
    if (args->forward) {
        start_node = create_node(args->pattern, RE_OP_BEGIN_GROUP, FALSE, 0, 1);
        end_node = create_node(args->pattern, RE_OP_END_GROUP, FALSE, 0, 1);
    } else {
        start_node = create_node(args->pattern, RE_OP_END_GROUP, FALSE, 0, 1);
        end_node = create_node(args->pattern, RE_OP_BEGIN_GROUP, FALSE, 0, 1);
    }
    if (!start_node || !end_node)
        return FALSE;
    start_node->values[0] = group;
    end_node->values[0] = group;

    /* Record that we have a new capture group. */
    if (!record_group(args->pattern, group))
        return FALSE;

    /* Compile the sequence and check that we've reached the end of the
     * capture group.
     */
    subargs = *args;
    subargs.min_width = 0;
    if (!build_sequence(&subargs))
        return FALSE;
    args->code = subargs.code;
    args->min_width += subargs.min_width;

    /* Record that the capture group has closed. */
    record_group_end(args->pattern, group);

    if (args->code[0] != RE_OP_END)
        return FALSE;
    ++args->code;

    /* Append the capture group. */
    add_node(args->end, start_node);
    add_node(start_node, subargs.start);
    add_node(subargs.end, end_node);
    args->end = end_node;

    return TRUE;
}

/* Builds a conditional group. */
Py_LOCAL(BOOL) build_group_exists(RE_CompileArgs* args) {
    RE_CODE group;
    RE_Node* start_node;
    RE_Node* end_node;
    RE_CompileArgs subargs;
    size_t min_width;

    group = args->code[1];
    args->code += 2;

    /* Create nodes for the start and end of the structure. */
    start_node = create_node(args->pattern, RE_OP_GROUP_EXISTS, FALSE, 0, 1);
    end_node = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
    if (!start_node || !end_node)
        return FALSE;
    start_node->values[0] = group;

    subargs = *args;
    subargs.min_width = 0;
    if (!build_sequence(&subargs))
        return FALSE;
    args->code = subargs.code;
    min_width = subargs.min_width;

    /* Append the start node. */
    add_node(args->end, start_node);
    add_node(start_node, subargs.start);
    add_node(subargs.end, end_node);

    if (args->code[0] == RE_OP_NEXT) {
        ++args->code;

        subargs.code = args->code;
        subargs.min_width = 0;
        if (!build_sequence(&subargs))
            return FALSE;
        args->code = subargs.code;

        if (subargs.min_width < min_width)
            min_width = subargs.min_width;

        add_node(start_node, subargs.start);
        add_node(subargs.end, end_node);
    } else {
        add_node(start_node, end_node);

        min_width = 0;
    }

    args->min_width += min_width;

    if (args->code[0] != RE_OP_END)
        return FALSE;
    ++args->code;

    args->end = end_node;

    return TRUE;
}

/* Builds a lookaround. */
Py_LOCAL(BOOL) build_lookaround(RE_CompileArgs* args) {
    RE_CODE flags;
    RE_CODE forward;
    RE_Node* lookaround_node;
    RE_Node* success_node;
    RE_CompileArgs subargs;

    flags = args->code[1];
    forward = args->code[2];
    if (flags & ~1)
        return FALSE;

    /* Create a node for the lookaround. */
    lookaround_node = create_node(args->pattern, RE_OP_LOOKAROUND, flags & 1, 0,
      0);
    if (!lookaround_node)
        return FALSE;
    args->code += 3;

    /* Compile the sequence and check that we've reached the end of the
     * subpattern.
     */
    subargs = *args;
    subargs.forward = forward;
    subargs.min_width = 0;
    if (!build_sequence(&subargs))
        return FALSE;
    args->code = subargs.code;

    if (args->code[0] != RE_OP_END)
        return FALSE;
    ++args->code;

    /* Create the 'SUCCESS' node and append it to the subpattern. */
    success_node = create_node(args->pattern, RE_OP_SUCCESS, FALSE, 0, 0);
    if (!success_node)
        return FALSE;

    /* Insert the subpattern into the node. */
    lookaround_node->next_2.node = subargs.start;
    add_node(subargs.end, success_node);

    /* Append the lookaround. */
    add_node(args->end, lookaround_node);
    args->end = lookaround_node;

    return TRUE;
}

/* Builds a reference to a group. */
Py_LOCAL(BOOL) build_ref_group(RE_CompileArgs* args, RE_CODE op) {
    Py_ssize_t group;
    RE_Node* node;

    group = args->code[1];
    node = create_node(args->pattern, op, FALSE, 0, 1);
    if (!node)
        return FALSE;
    node->values[0] = group;
    args->code += 2;

    /* Record that we have a reference to a group. */
    if (!record_ref_group(args->pattern, group))
        return FALSE;

    /* Append the reference. */
    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a lookaround. */
Py_LOCAL(BOOL) build_string(RE_CompileArgs* args, RE_CODE op, Py_ssize_t step) {
    Py_ssize_t length;
    RE_Node* node;
    Py_ssize_t i;

    length = args->code[1];
    if (args->code + 1 + length > args->end_code)
        return FALSE;

    node = create_node(args->pattern, op, TRUE, length * step, length);
    if (!node)
        return FALSE;
    args->code += 2;

    for (i = 0; i < length; i++)
        node->values[i] = args->code[i];
    args->code += length;

    args->min_width += length;

    add_node(args->end, node);
    args->end = node;

    return TRUE;
}

/* Builds a sequence of nodes from regular expression code. */
Py_LOCAL(BOOL) build_sequence(RE_CompileArgs* args) {
    /* Guarantee that there's something to attach to. */
    args->start = create_node(args->pattern, RE_OP_BRANCH, FALSE, 0, 0);
    args->end = args->start;

    /* The sequence should end with an opcode we don't understand. If it doesn't
     * then the code is illegal.
     */
    while (args->code < args->end_code) {
        RE_CODE op;

        /* The following code groups opcodes by format, not function. */
        op = args->code[0];
        switch (op) {
        case RE_OP_ANY:
        case RE_OP_ANY_ALL:
            /* A simple opcode with no trailing codewords and width of 1. */
            if (!build_simple(args, op, 1))
                return FALSE;
            break;
        case RE_OP_ANY_REV:
        case RE_OP_ANY_ALL_REV:
            /* A simple opcode with no trailing codewords and width of 1. */
            if (!build_simple(args, op, -1))
                return FALSE;
            break;
        case RE_OP_ATOMIC:
            /* An atomic sequence. */
            if (!build_atomic(args))
                return FALSE;
            break;
        case RE_OP_BOUNDARY:
            /* A word boundary. */
            if (!build_boundary(args))
                return FALSE;
            break;
        case RE_OP_BRANCH:
            /* A 2-way branch. */
            if (!build_branch(args))
                return FALSE;
            break;
        case RE_OP_CATEGORY:
            /* A category. */
            if (!build_category(args, op, 1))
                return FALSE;
            break;
        case RE_OP_CATEGORY_REV:
            /* A category. */
            if (!build_category(args, op, -1))
                return FALSE;
            break;
        case RE_OP_CHARACTER:
        case RE_OP_CHARACTER_IGNORE:
            /* A character literal. */
            if (!build_character(args, op, 1))
                return FALSE;
            break;
        case RE_OP_CHARACTER_REV:
        case RE_OP_CHARACTER_IGNORE_REV:
            /* A character literal. */
            if (!build_character(args, op, -1))
                return FALSE;
            break;
        case RE_OP_END_OF_LINE:
        case RE_OP_END_OF_STRING:
        case RE_OP_END_OF_STRING_LINE:
        case RE_OP_SEARCH_ANCHOR:
        case RE_OP_START_OF_LINE:
        case RE_OP_START_OF_STRING:
            /* A simple opcode with no trailing codewords and width of 0. */
            if (!build_zerowidth(args, op))
                return FALSE;
            break;
        case RE_OP_GREEDY_REPEAT:
        case RE_OP_LAZY_REPEAT:
            /* A repeated sequence. */
            if (!build_repeat(args, op))
                return FALSE;
            break;
        case RE_OP_GROUP:
            /* A capture group. */
            if (!build_group(args))
                return FALSE;
            break;
        case RE_OP_GROUP_EXISTS:
            /* A conditional sequence. */
            if (!build_group_exists(args))
                return FALSE;
            break;
        case RE_OP_LOOKAROUND:
            /* A lookaround. */
            if (!build_lookaround(args))
                return FALSE;
            break;
        case RE_OP_REF_GROUP:
        case RE_OP_REF_GROUP_IGNORE:
        case RE_OP_REF_GROUP_REV:
        case RE_OP_REF_GROUP_IGNORE_REV:
            /* A reference to a group. */
            if (!build_ref_group(args, op))
                return FALSE;
            break;
        case RE_OP_SET:
        case RE_OP_SET_IGNORE:
            /* A character set. */
            if (!build_set(args, op, 1))
                return FALSE;
            break;
        case RE_OP_SET_REV:
        case RE_OP_SET_IGNORE_REV:
            /* A character set. */
            if (!build_set(args, op, -1))
                return FALSE;
            break;
        case RE_OP_STRING:
        case RE_OP_STRING_IGNORE:
            /* A string literal. */
            if (!build_string(args, op, 1))
                return FALSE;
            break;
        case RE_OP_STRING_REV:
        case RE_OP_STRING_IGNORE_REV:
            /* A string literal. */
            if (!build_string(args, op, -1))
                return FALSE;
            break;
        default:
            /* We've found an opcode which we don't recognise. We'll leave it
             * for the caller.
             */
            if (args->code >= args->end_code)
                return FALSE;

            return TRUE;
        }
    }

    /* If we get here then we're past the end of the regular expession code, but
     * the code should end with 'SUCCESS' (which isn't recognised by this
     * function), so we have an error.
     */
    return FALSE;
}

/* Prepares some search tables if there's an initial string prefix.
 *
 * The tables are for a Boyer-Moore fast string search.
 *
 * We have to cater for case-insensitive and Unicode matching.
 *
 * For Unicode the 'bad_character_offset' table would take up too much space,
 * so we'll restrict the character codes to the lower 8 bits. This will mean
 * that the tables will be less 'accurate' and the search might be slower, but
 * hopefully still acceptable.
 */
Py_LOCAL(BOOL) prepare_prefix(PatternObject* pattern) {
    RE_Node* node;

    /* Look for the string prefix, if any. */
    node = pattern->start_node;
    while (node->op == RE_OP_BEGIN_GROUP || node->op == RE_OP_END_GROUP)
        node = node->next_1.node;

    /* We won't bother if the prefix is very short. */
    if (node->value_count < MIN_FAST_PREFIX)
        return TRUE;

    switch (node->op) {
    case RE_OP_STRING:
    {
        Py_ssize_t length;
        RE_CODE* values;
        Py_ssize_t* bad_character_offset;
        Py_ssize_t* good_suffix_offset;
        RE_CODE ch;
        Py_ssize_t last_pos;
        Py_ssize_t pos;
        Py_ssize_t suffix_len;
        BOOL saved_start;
        Py_ssize_t s;
        Py_ssize_t i;
        Py_ssize_t s_start;

        length = node->value_count;
        values = node->values;

        bad_character_offset = (Py_ssize_t*)re_alloc(256 *
          sizeof(bad_character_offset[0]));
        good_suffix_offset = (Py_ssize_t*)re_alloc(length *
          sizeof(good_suffix_offset[0]));

        if (!bad_character_offset || !good_suffix_offset) {
            /* At least one allocation failed, but we'll try to continue. */
            re_dealloc(bad_character_offset);
            re_dealloc(good_suffix_offset);

            return TRUE;
        }

        for (ch = 0; ch < 256; ch++)
            bad_character_offset[ch] = length;

        last_pos = length - 1;

        for (pos = 0; pos < last_pos; pos++) {
            Py_ssize_t offset;

            offset = last_pos - pos;
            ch = values[pos] & 0xFF;
            bad_character_offset[ch] = offset;
        }

        suffix_len = 2;
        pos = length - suffix_len;
        saved_start = FALSE;
        s = pos - 1;
        i = suffix_len - 1;
        while (pos >= 0) {
            /* Look for another occurrence of the suffix. */
            while (i > 0) {
                /* Have we dropped off the end of the string? */
                if (s + i < 0)
                    break;

                if (values[s + i] == values[pos + i])
                    /* It still matches. */
                    --i;
                else {
                    /* Start again further along. */
                    --s;
                    i = suffix_len - 1;
                }
            }

            if (s >= 0 && values[s] == values[pos]) {
                /* We haven't dropped off the end of the string, and the suffix
                 * has matched this far, so this is a good starting point for
                 * the next iteration.
                 */
                --s;
                if (!saved_start) {
                    s_start = s;
                    saved_start = TRUE;
                }
            } else {
                /* Calculate the suffix offset. */
                good_suffix_offset[pos] = pos - s;

                /* Extend the suffix and start searching for _this_ one. */
                --pos;
                ++suffix_len;

                /* Where's a good place to start searching? */
                if (saved_start) {
                    s = s_start;
                    saved_start = FALSE;
                } else
                    --s;

                /* Can we short-circuit the searching? */
                if (s < 0)
                    break;
            }

            i = suffix_len - 1;
        }

        /* Fill-in any remaining entries. */
        while (pos >= 0) {
            good_suffix_offset[pos] = pos - s;
            --pos;
            --s;
        }

        pattern->bad_character_offset = bad_character_offset;
        pattern->good_suffix_offset = good_suffix_offset;
        break;
    }
    case RE_OP_STRING_IGNORE:
    {
        BOOL ascii;
        BOOL locale;
        BOOL unicode;
        RE_EncodingTable* encoding;
        Py_ssize_t length;
        RE_CODE* values;
        Py_ssize_t* bad_character_offset;
        Py_ssize_t* good_suffix_offset;
        RE_CODE ch;
        Py_ssize_t last_pos;
        Py_ssize_t pos;
        BOOL (*same_char_ignore)(RE_CODE ch1, RE_CODE ch2);
        Py_ssize_t suffix_len;
        BOOL saved_start;
        Py_ssize_t s;
        Py_ssize_t i;
        Py_ssize_t s_start;

        /* We want to prepare the tables, but we also need to make it
         * case-insensitive. We can do this only if the regex has a specific
         * encoding.
         */
        unicode = pattern->flags & RE_FLAG_UNICODE;
        locale = pattern->flags & RE_FLAG_LOCALE;
        ascii = pattern->flags & RE_FLAG_ASCII;
        if (!unicode && !locale && !ascii)
            /* No specific encoding, so no fast search. */
            return TRUE;

        if (unicode)
            encoding = &unicode_encoding;
        else if (locale)
            encoding = &locale_encoding;
        else if (ascii)
            encoding = &ascii_encoding;

        length = node->value_count;
        values = node->values;

        bad_character_offset = (Py_ssize_t*)re_alloc(256 *
          sizeof(bad_character_offset[0]));
        good_suffix_offset = (Py_ssize_t*)re_alloc(length *
          sizeof(good_suffix_offset[0]));

        if (!bad_character_offset || !good_suffix_offset) {
            /* At least one allocation failed, but we'll try to continue. */
            re_dealloc(bad_character_offset);
            re_dealloc(good_suffix_offset);

            return TRUE;
        }

        for (ch = 0; ch < 256; ch++)
            bad_character_offset[ch] = length;

        last_pos = length - 1;

        for (pos = 0; pos < last_pos; pos++) {
            Py_ssize_t offset;

            offset = last_pos - pos;
            ch = encoding->lower(values[pos]) & 0xFF;
            bad_character_offset[ch] = offset;
            ch = encoding->upper(values[pos]) & 0xFF;
            bad_character_offset[ch] = offset;
            ch = encoding->title(values[pos]) & 0xFF;
            bad_character_offset[ch] = offset;
        }

        same_char_ignore = encoding->same_char_ignore;

        suffix_len = 2;
        pos = length - suffix_len;
        saved_start = FALSE;
        s = pos - 1;
        i = suffix_len - 1;
        while (pos >= 0) {
            /* Look for another occurrence of the suffix. */
            while (i > 0) {
                /* Have we dropped off the end of the string? */
                if (s + i < 0)
                    break;

                if (same_char_ignore(values[s + i], values[pos + i]))
                    /* It still matches. */
                    --i;
                else {
                    /* Start again further along. */
                    --s;
                    i = suffix_len - 1;
                }
            }

            if (s >= 0 && same_char_ignore(values[s], values[pos])) {
                /* We haven't dropped off the end of the string, and the suffix
                 * has matched this far, so this is a good starting point for
                 * the next iteration.
                 */
                --s;
                if (!saved_start) {
                    s_start = s;
                    saved_start = TRUE;
                }
            } else {
                /* Calculate the suffix offset. */
                good_suffix_offset[pos] = pos - s;

                /* Extend the suffix and start searching for _this_ one. */
                --pos;
                ++suffix_len;

                /* Where's a good place to start searching? */
                if (saved_start) {
                    s = s_start;
                    saved_start = FALSE;
                } else
                    --s;

                /* Can we short-circuit the searching? */
                if (s < 0)
                    break;
            }

            i = suffix_len - 1;
        }

        /* Fill-in any remaining entries. */
        while (pos >= 0) {
            good_suffix_offset[pos] = pos - s;
            --pos;
            --s;
        }

        pattern->bad_character_offset = bad_character_offset;
        pattern->good_suffix_offset = good_suffix_offset;
        break;
    }
    case RE_OP_STRING_IGNORE_REV:
    {
        BOOL ascii;
        BOOL locale;
        BOOL unicode;
        RE_EncodingTable* encoding;
        Py_ssize_t length;
        RE_CODE* values;
        Py_ssize_t* bad_character_offset;
        Py_ssize_t* good_suffix_offset;
        RE_CODE ch;
        Py_ssize_t pos;
        BOOL (*same_char_ignore)(RE_CODE ch1, RE_CODE ch2);
        Py_ssize_t suffix_len;
        BOOL saved_start;
        Py_ssize_t s;
        Py_ssize_t i;
        Py_ssize_t s_start;

        /* We want to prepare the tables, but we also need to make it
         * case-insensitive. We can do this only if the regex has a specific
         * encoding.
         */
        unicode = pattern->flags & RE_FLAG_UNICODE;
        locale = pattern->flags & RE_FLAG_LOCALE;
        ascii = pattern->flags & RE_FLAG_ASCII;
        if (!unicode && !locale && !ascii)
            /* No specific encoding, so no fast search. */
            return TRUE;

        if (unicode)
            encoding = &unicode_encoding;
        else if (locale)
            encoding = &locale_encoding;
        else if (ascii)
            encoding = &ascii_encoding;

        length = node->value_count;
        values = node->values;

        bad_character_offset = (Py_ssize_t*)re_alloc(256 *
          sizeof(bad_character_offset[0]));
        good_suffix_offset = (Py_ssize_t*)re_alloc(length *
          sizeof(good_suffix_offset[0]));

        if (!bad_character_offset || !good_suffix_offset) {
            /* At least one allocation failed, but we'll try to continue. */
            re_dealloc(bad_character_offset);
            re_dealloc(good_suffix_offset);

            return TRUE;
        }

        for (ch = 0; ch < 256; ch++)
            bad_character_offset[ch] = -length;

        for (pos = length - 1; pos >= 1; pos--) {
            Py_ssize_t offset;

            offset = -pos;
            ch = encoding->lower(values[pos]) & 0xFF;
            bad_character_offset[ch] = offset;
            ch = encoding->upper(values[pos]) & 0xFF;
            bad_character_offset[ch] = offset;
            ch = encoding->title(values[pos]) & 0xFF;
            bad_character_offset[ch] = offset;
        }

        same_char_ignore = encoding->same_char_ignore;

        suffix_len = 2;
        pos = suffix_len - 1;
        saved_start = FALSE;
        s = pos + 1;
        i = suffix_len - 1;
        while (pos < length) {
            /* Look for another occurrence of the suffix. */
            while (i > 0) {
                /* Have we dropped off the end of the string? */
                if (s - i >= length)
                    break;

                if (same_char_ignore(values[s - i], values[pos - i]))
                    /* It still matches. */
                    --i;
                else {
                    /* Start again further along. */
                    ++s;
                    i = suffix_len - 1;
                }
            }

            if (s < length && same_char_ignore(values[s], values[pos])) {
                /* We haven't dropped off the end of the string, and the suffix
                 * has matched this far, so this is a good starting point for
                 * the next iteration.
                 */
                ++s;
                if (!saved_start) {
                    s_start = s;
                    saved_start = TRUE;
                }
            } else {
                /* Calculate the suffix offset. */
                good_suffix_offset[pos] = pos - s;

                /* Extend the suffix and start searching for _this_ one. */
                ++pos;
                ++suffix_len;

                /* Where's a good place to start searching? */
                if (saved_start) {
                    s = s_start;
                    saved_start = FALSE;
                } else
                    ++s;

                /* Can we short-circuit the searching? */
                if (s >= length)
                    break;
            }

            i = suffix_len - 1;
        }

        /* Fill-in any remaining entries. */
        while (pos < length) {
            good_suffix_offset[pos] = pos - s;
            ++pos;
            ++s;
        }

        pattern->bad_character_offset = bad_character_offset;
        pattern->good_suffix_offset = good_suffix_offset;
        break;
    }
    case RE_OP_STRING_REV:
    {
        Py_ssize_t length;
        RE_CODE* values;
        Py_ssize_t* bad_character_offset;
        Py_ssize_t* good_suffix_offset;
        RE_CODE ch;
        Py_ssize_t pos;
        Py_ssize_t suffix_len;
        BOOL saved_start;
        Py_ssize_t s;
        Py_ssize_t i;
        Py_ssize_t s_start;

        length = node->value_count;
        values = node->values;

        bad_character_offset = (Py_ssize_t*)re_alloc(256 *
          sizeof(bad_character_offset[0]));
        good_suffix_offset = (Py_ssize_t*)re_alloc(length *
          sizeof(good_suffix_offset[0]));

        if (!bad_character_offset || !good_suffix_offset) {
            /* At least one allocation failed, but we'll try to continue. */
            re_dealloc(bad_character_offset);
            re_dealloc(good_suffix_offset);

            return TRUE;
        }

        for (ch = 0; ch < 256; ch++)
            bad_character_offset[ch] = -length;

        for (pos = length - 1; pos >= 1; pos--) {
            Py_ssize_t offset;

            offset = -pos;
            ch = values[pos] & 0xFF;
            bad_character_offset[ch] = offset;
        }

        suffix_len = 2;
        pos = suffix_len - 1;
        saved_start = FALSE;
        s = pos + 1;
        i = suffix_len - 1;
        while (pos < length) {
            /* Look for another occurrence of the suffix. */
            while (i > 0) {
                /* Have we dropped off the end of the string? */
                if (s - i >= length)
                    break;

                if (values[s - i] == values[pos - i])
                    /* It still matches. */
                    --i;
                else {
                    /* Start again further along. */
                    ++s;
                    i = suffix_len - 1;
                }
            }

            if (s < length && values[s] == values[pos]) {
                /* We haven't dropped off the end of the string, and the suffix
                 * has matched this far, so this is a good starting point for
                 * the next iteration.
                 */
                ++s;
                if (!saved_start) {
                    s_start = s;
                    saved_start = TRUE;
                }
            } else {
                /* Calculate the suffix offset. */
                good_suffix_offset[pos] = pos - s;

                /* Extend the suffix and start searching for _this_ one. */
                ++pos;
                ++suffix_len;

                /* Where's a good place to start searching? */
                if (saved_start) {
                    s = s_start;
                    saved_start = FALSE;
                } else
                    ++s;

                /* Can we short-circuit the searching? */
                if (s >= length)
                    break;
            }

            i = suffix_len - 1;
        }

        /* Fill-in any remaining entries. */
        while (pos < length) {
            good_suffix_offset[pos] = pos - s;
            ++pos;
            ++s;
        }

        pattern->bad_character_offset = bad_character_offset;
        pattern->good_suffix_offset = good_suffix_offset;
        break;
    }
    }

    return TRUE;
}

/* Compiles the regular expression code to 'nodes'.
 *
 * Various details about the regular expression are discovered during
 * compilation and stored in the PatternObject.
 */
Py_LOCAL(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, PatternObject*
  pattern) {
    RE_CompileArgs args;
    RE_Node* success_node;

    /* Compile a regex sequence and then check that we've reached the end
     * correctly. (The last opcode should be 'SUCCESS'.)
     *
     * If successful, 'start' and 'end' will point to the start and end nodes of
     * the compiled sequence.
     */
    args.code = code;
    args.end_code = end_code;
    args.pattern = pattern;
    args.forward = (pattern->flags & RE_FLAG_REVERSE) == 0;
    args.min_width = 0;
    if (!build_sequence(&args))
        return FALSE;

    if (args.code + 1 != end_code || args.code[0] != RE_OP_SUCCESS)
        return FALSE;

    pattern->min_width = args.min_width;

    /* Create the 'SUCCESS' node and append it to the sequence. */
    success_node = create_node(pattern, RE_OP_SUCCESS, FALSE, 0, 0);
    if (!success_node)
        return FALSE;

    add_node(args.end, success_node);
    pattern->start_node = args.start;
    pattern->success_node = success_node;

    pattern->data_count = pattern->group_count + pattern->repeat_count;

    /* Optimise the pattern. */
    if (!optimise_pattern(pattern))
        return FALSE;

    if (!prepare_prefix(pattern))
        return FALSE;

    return TRUE;
}

/* Compiles regular expression code to a PatternObject.
 *
 * The regular expression code is provided as a list and is then compiled to
 * 'nodes'. Various details about the regular expression are discovered during
 * compilation and stored in the PatternObject.
 */
static PyObject* re_compile(PyObject* self_, PyObject* args) {
    PyObject* pattern;
    Py_ssize_t flags = 0;
    PyObject* code_list;
    PyObject* groupindex;
    PyObject* indexgroup;
    Py_ssize_t code_len;
    RE_CODE* code;
    Py_ssize_t i;
    PatternObject* self;
    BOOL ok;

    if (!PyArg_ParseTuple(args, "OiOOO", &pattern, &flags, &code_list,
      &groupindex, &indexgroup))
        return NULL;

    /* Read the regular expression code. */
    code_len = PyList_GET_SIZE(code_list);
    code = (RE_CODE*)re_alloc(code_len * sizeof(RE_CODE));
    if (!code)
        return NULL;

    for (i = 0; i < code_len; i++) {
        PyObject* o = PyList_GET_ITEM(code_list, i);
        unsigned long value;

        if (PyInt_Check(o)) {
            long svalue;

            svalue = PyInt_AsLong(o);
            if (svalue < 0)
                goto error;

            value = svalue;
        } else if (PyLong_Check(o)) {
            value = PyLong_AsUnsignedLong(o);
            if (PyErr_Occurred())
                goto error;
        } else
            goto error;

        code[i] = (RE_CODE)value;
        if (code[i] != value)
            goto error;
    }

    /* Create the PatternObject. */
    self = PyObject_NEW(PatternObject, &Pattern_Type);
    if (!self) {
        set_error(RE_ERROR_MEMORY, NULL);
        re_dealloc(code);
        return NULL;
    }

    /* Initialise the PatternObject. */
    self->pattern = pattern;
    self->flags = flags;
    self->weakreflist = NULL;
    self->start_node = NULL;
    self->success_node = NULL;
    self->repeat_count = 0;
    self->group_count = 0;
    self->group_end_index = 0;
    self->groupindex = groupindex;
    self->indexgroup = indexgroup;
    self->node_capacity = 0;
    self->node_count = 0;
    self->node_list = NULL;
    self->group_info_capacity = 0;
    self->group_info = NULL;
    self->repeat_info_capacity = 0;
    self->repeat_info = NULL;
    Py_INCREF(self->pattern);
    Py_INCREF(self->groupindex);
    Py_INCREF(self->indexgroup);
    self->bad_character_offset = NULL;
    self->good_suffix_offset = NULL;

    /* Compile the regular expression code to nodes. */
    ok = compile_to_nodes(code, code + code_len, self);

    /* We no longer need the regular expression code. */
    re_dealloc(code);

    if (!ok) {
        if (!PyErr_Occurred())
            set_error(RE_ERROR_ILLEGAL, NULL);

        Py_DECREF(self);
        return NULL;
    }

    return (PyObject*)self;

error:
    re_dealloc(code);
    set_error(RE_ERROR_ILLEGAL, NULL);
    return NULL;
}

/* Gets the size of the codewords. */
static PyObject* re_get_code_size(PyObject* self, PyObject* unused) {
    return Py_BuildValue("l", sizeof(RE_CODE));
}

/* Sets the exception to return on error. */
static PyObject* re_set_exception(PyObject* self_, PyObject* args) {
    if (!PyArg_ParseTuple(args, "O", &error_exception))
        return NULL;

    Py_INCREF(error_exception);

    Py_INCREF(Py_None);
    return Py_None;
}

/* The table of the module's functions. */
static PyMethodDef _functions[] = {
    {"compile", (PyCFunction)re_compile, METH_VARARGS},
    {"get_code_size", (PyCFunction)re_get_code_size, METH_NOARGS},
    {"set_exception", (PyCFunction)re_set_exception, METH_VARARGS},
    {NULL, NULL}
};

/* Initialises the module. */
PyMODINIT_FUNC init_regex(void) {
    PyObject* m;
    PyObject* d;
    PyObject* x;
#if defined(VERBOSE)
    /* Unbuffered in case it crashes! */
    setvbuf(stdout, NULL, _IONBF, 0);
#endif

    /* Patch object types. */
    Pattern_Type.ob_type = Match_Type.ob_type = Scanner_Type.ob_type =
      Splitter_Type.ob_type = &PyType_Type;

    error_exception = PyExc_RuntimeError;

    m = Py_InitModule("_" RE_MODULE, _functions);
    if (!m)
        return;
    d = PyModule_GetDict(m);

    x = PyInt_FromLong(RE_MAGIC);
    if (x) {
        PyDict_SetItemString(d, "MAGIC", x);
        Py_DECREF(x);
    }

    x = PyInt_FromLong(sizeof(RE_CODE));
    if (x) {
        PyDict_SetItemString(d, "CODE_SIZE", x);
        Py_DECREF(x);
    }

    x = PyString_FromString(copyright);
    if (x) {
        PyDict_SetItemString(d, "copyright", x);
        Py_DECREF(x);
    }
}
#endif

/* vim:ts=4:sw=4:et */
