view src/regexp.h @ 32721:94f4a488412e v9.0.1683

Updated runtime files Commit: https://github.com/vim/vim/commit/6efb1980336ff324e9c57a4e282530b952fca816 Author: Christian Brabandt <cb@256bit.org> Date: Thu Aug 10 05:44:25 2023 +0200 Updated runtime files This is a collection of various PRs from github that all require a minor patch number: 1) https://github.com/vim/vim/pull/12612 Do not conflate dictionary key with end of block 2) https://github.com/vim/vim/pull/12729: When saving and restoring 'undolevels', the constructs `&undolevels` and `:set undolevels` are problematic. The construct `&undolevels` reads an unpredictable value; it will be the local option value (if one has been set), or the global option value (otherwise), making it unsuitable for saving a value for later restoration. Similarly, if a local option value has been set for 'undolevels', temporarily modifying the option via `:set undolevels` changes the local value as well as the global value, requiring extra work to restore both values. Saving and restoring the option value in one step via the construct `:let &undolevels = &undolevels` appears to make no changes to the 'undolevels' option, but if a local option has been set to a different value than the global option, it has the unintended effect of changing the global 'undolevels' value to the local value. Update the documentation to explain these issues and recommend explicit use of global and local option values when saving and restoring. Update some unit tests to use `g:undolevels`. 3) https://github.com/vim/vim/pull/12702: Problem: Pip requirements files are not recognized. Solution: Add a pattern to match pip requirements files. 4) https://github.com/vim/vim/pull/12688: Add indent file and tests for ABB Rapid 5) https://github.com/vim/vim/pull/12668: Use Lua 5.1 numeric escapes in tests and add to CI Only Lua 5.2+ and LuaJIT understand hexadecimal escapes in strings. Lua 5.1 only supports decimal escapes: > A character in a string can also be specified by its numerical value > using the escape sequence \ddd, where ddd is a sequence of up to three > decimal digits. (Note that if a numerical escape is to be followed by a > digit, it must be expressed using exactly three digits.) Strings in Lua > can contain any 8-bit value, including embedded zeros, which can be > specified as '\0'. To make sure this works with Lua 5.4 and Lua 5.1 change the Vim CI to run with Lua 5.1 as well as Lua 5.4 6) https://github.com/vim/vim/pull/12631: Add hurl filetype detection 7) https://github.com/vim/vim/pull/12573: Problem: Files for haskell persistent library are not recognized Solution: Add pattern persistentmodels for haskell persistent library closes: #12612 closes: #12729 closes: #12702 closes: #12688 closes: #12668 closes: #12631 closes: #12573 Co-authored-by: lacygoill <lacygoill@lacygoill.me> Co-authored-by: Michael Henry <drmikehenry@drmikehenry.com> Co-authored-by: ObserverOfTime <chronobserver@disroot.org> Co-authored-by: KnoP-01 <knosowski@graeffrobotics.de> Co-authored-by: James McCoy <jamessan@jamessan.com> Co-authored-by: Jacob Pfeifer <jacob@pfeifer.dev> Co-authored-by: Borys Lykah <lykahb@fastmail.com>
author Christian Brabandt <cb@256bit.org>
date Thu, 10 Aug 2023 06:30:06 +0200
parents 7fb4e244b16e
children 90063f44c99a
line wrap: on
line source

/* vi:set ts=8 sts=4 sw=4 noet:
 *
 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
 *
 * This is NOT the original regular expression code as written by Henry
 * Spencer.  This code has been modified specifically for use with Vim, and
 * should not be used apart from compiling Vim.  If you want a good regular
 * expression library, get the original code.
 *
 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
 */

#ifndef _REGEXP_H
#define _REGEXP_H

/*
 * The number of sub-matches is limited to 10.
 * The first one (index 0) is the whole match, referenced with "\0".
 * The second one (index 1) is the first sub-match, referenced with "\1".
 * This goes up to the tenth (index 9), referenced with "\9".
 */
#define NSUBEXP  10

/*
 * In the NFA engine: how many braces are allowed.
 * TODO(RE): Use dynamic memory allocation instead of static, like here
 */
#define NFA_MAX_BRACES 20

/*
 * In the NFA engine: how many states are allowed
 */
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE (-1)

// Which regexp engine to use? Needed for vim_regcomp().
// Must match with 'regexpengine'.
#define	    AUTOMATIC_ENGINE	0
#define	    BACKTRACKING_ENGINE	1
#define	    NFA_ENGINE		2

typedef struct regengine regengine_T;

/*
 * Structure returned by vim_regcomp() to pass on to vim_regexec().
 * This is the general structure. For the actual matcher, two specific
 * structures are used. See code below.
 */
typedef struct regprog
{
    regengine_T		*engine;
    unsigned		regflags;
    unsigned		re_engine;   // automatic, backtracking or nfa engine
    unsigned		re_flags;    // second argument for vim_regcomp()
    int			re_in_use;   // prog is being executed
} regprog_T;

/*
 * Structure used by the back track matcher.
 * These fields are only to be used in regexp.c!
 * See regexp.c for an explanation.
 */
typedef struct
{
    // These four members implement regprog_T
    regengine_T		*engine;
    unsigned		regflags;
    unsigned		re_engine;
    unsigned		re_flags;
    int			re_in_use;

    int			regstart;
    char_u		reganch;
    char_u		*regmust;
    int			regmlen;
#ifdef FEAT_SYN_HL
    char_u		reghasz;
#endif
    char_u		program[1];	// actually longer..
} bt_regprog_T;

/*
 * Structure representing a NFA state.
 * An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
 */
typedef struct nfa_state nfa_state_T;
struct nfa_state
{
    int			c;
    nfa_state_T		*out;
    nfa_state_T		*out1;
    int			id;
    int			lastlist[2]; // 0: normal, 1: recursive
    int			val;
};

/*
 * Structure used by the NFA matcher.
 */
typedef struct
{
    // These three members implement regprog_T
    regengine_T		*engine;
    unsigned		regflags;
    unsigned		re_engine;
    unsigned		re_flags;
    int			re_in_use;

    nfa_state_T		*start;		// points into state[]

    int			reganch;	// pattern starts with ^
    int			regstart;	// char at start of pattern
    char_u		*match_text;	// plain text to match with

    int			has_zend;	// pattern contains \ze
    int			has_backref;	// pattern contains \1 .. \9
#ifdef FEAT_SYN_HL
    int			reghasz;
#endif
    char_u		*pattern;
    int			nsubexp;	// number of ()
    int			nstate;
    nfa_state_T		state[1];	// actually longer..
} nfa_regprog_T;

/*
 * Structure to be used for single-line matching.
 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
 * When there is no match, the pointer is NULL.
 */
typedef struct
{
    regprog_T		*regprog;
    char_u		*startp[NSUBEXP];
    char_u		*endp[NSUBEXP];

    colnr_T		rm_matchcol;   // match start without "\zs"
    int			rm_ic;
} regmatch_T;

/*
 * Structure to be used for multi-line matching.
 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
 * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
 * The line numbers are relative to the first line, thus startpos[0].lnum is
 * always 0.
 * When there is no match, the line number is -1.
 */
typedef struct
{
    regprog_T		*regprog;
    lpos_T		startpos[NSUBEXP];
    lpos_T		endpos[NSUBEXP];

    colnr_T		rmm_matchcol;   // match start without "\zs"
    int			rmm_ic;
    colnr_T		rmm_maxcol;	// when not zero: maximum column
} regmmatch_T;

/*
 * Structure used to store external references: "\z\(\)" to "\z\1".
 * Use a reference count to avoid the need to copy this around.  When it goes
 * from 1 to zero the matches need to be freed.
 */
typedef struct
{
    short		refcnt;
    char_u		*matches[NSUBEXP];
} reg_extmatch_T;

struct regengine
{
    // bt_regcomp or nfa_regcomp
    regprog_T	*(*regcomp)(char_u*, int);
    // bt_regfree or nfa_regfree
    void	(*regfree)(regprog_T *);
    // bt_regexec_nl or nfa_regexec_nl
    int		(*regexec_nl)(regmatch_T *, char_u *, colnr_T, int);
    // bt_regexec_mult or nfa_regexec_mult
    long	(*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, int *);
    //char_u	*expr;
};

// Flags used by vim_regsub() and vim_regsub_both()
#define REGSUB_COPY	    1
#define REGSUB_MAGIC	    2
#define REGSUB_BACKSLASH    4

#endif	// _REGEXP_H