Mercurial > vim
changeset 8995:3cf6704d6efc v7.4.1783
commit https://github.com/vim/vim/commit/af98a49dd0ef1661b4998f118151fddbf6e4df75
Author: Bram Moolenaar <Bram@vim.org>
Date: Sun Apr 24 14:40:12 2016 +0200
patch 7.4.1783
Problem: The old regexp engine doesn't handle character classes correctly.
(Manuel Ortega)
Solution: Use regmbc() instead of regc(). Add a test.
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sun, 24 Apr 2016 14:45:04 +0200 |
parents | c48891ecfef2 |
children | d25ba6b80388 |
files | src/regexp.c src/testdir/test_regexp_utf8.vim src/version.c |
diffstat | 3 files changed, 68 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/src/regexp.c +++ b/src/regexp.c @@ -2544,12 +2544,12 @@ collection: case CLASS_ALNUM: for (cu = 1; cu <= 255; cu++) if (isalnum(cu)) - regc(cu); + regmbc(cu); break; case CLASS_ALPHA: for (cu = 1; cu <= 255; cu++) if (isalpha(cu)) - regc(cu); + regmbc(cu); break; case CLASS_BLANK: regc(' '); @@ -2558,32 +2558,32 @@ collection: case CLASS_CNTRL: for (cu = 1; cu <= 255; cu++) if (iscntrl(cu)) - regc(cu); + regmbc(cu); break; case CLASS_DIGIT: for (cu = 1; cu <= 255; cu++) if (VIM_ISDIGIT(cu)) - regc(cu); + regmbc(cu); break; case CLASS_GRAPH: for (cu = 1; cu <= 255; cu++) if (isgraph(cu)) - regc(cu); + regmbc(cu); break; case CLASS_LOWER: for (cu = 1; cu <= 255; cu++) if (MB_ISLOWER(cu)) - regc(cu); + regmbc(cu); break; case CLASS_PRINT: for (cu = 1; cu <= 255; cu++) if (vim_isprintc(cu)) - regc(cu); + regmbc(cu); break; case CLASS_PUNCT: for (cu = 1; cu <= 255; cu++) if (ispunct(cu)) - regc(cu); + regmbc(cu); break; case CLASS_SPACE: for (cu = 9; cu <= 13; cu++) @@ -2593,12 +2593,12 @@ collection: case CLASS_UPPER: for (cu = 1; cu <= 255; cu++) if (MB_ISUPPER(cu)) - regc(cu); + regmbc(cu); break; case CLASS_XDIGIT: for (cu = 1; cu <= 255; cu++) if (vim_isxdigit(cu)) - regc(cu); + regmbc(cu); break; case CLASS_TAB: regc('\t');
--- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -33,3 +33,59 @@ func Test_equivalence_re2() set re=2 call s:equivalence_test() endfunc + +func s:classes_test() + call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) + + let alphachars = '' + let lowerchars = '' + let upperchars = '' + let alnumchars = '' + let printchars = '' + let punctchars = '' + let xdigitchars = '' + let i = 1 + while i <= 255 + let c = nr2char(i) + if c =~ '[[:alpha:]]' + let alphachars .= c + endif + if c =~ '[[:lower:]]' + let lowerchars .= c + endif + if c =~ '[[:upper:]]' + let upperchars .= c + endif + if c =~ '[[:alnum:]]' + let alnumchars .= c + endif + if c =~ '[[:print:]]' + let printchars .= c + endif + if c =~ '[[:punct:]]' + let punctchars .= c + endif + if c =~ '[[:xdigit:]]' + let xdigitchars .= c + endif + let i += 1 + endwhile + + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) + call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) + call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) + call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) + call assert_equal('0123456789ABCDEFabcdef', xdigitchars) +endfunc + +func Test_classes_re1() + set re=1 + call s:classes_test() +endfunc + +func Test_classes_re2() + set re=2 + call s:classes_test() +endfunc