view src/testdir/test95.in @ 5899:60cdaa05a6ad v7.4.292

updated for version 7.4.292 Problem: Searching for "a" does not match accented "a" with new regexp engine, does match with old engine. (David B?rgin) "ca" does not match "ca" with accented "a" with either engine. Solution: Change the old engine, check for following composing character also for single-byte patterns.
author Bram Moolenaar <bram@vim.org>
date Tue, 13 May 2014 18:04:00 +0200
parents 9982ec574beb
children fdea5ea9afd1
line wrap: on
line source

Test for regexp patterns with multi-byte support, using utf-8.
See test64 for the non-multi-byte tests.

A pattern that gives the expected result produces OK, so that we know it was
actually tried.

STARTTEST
:so small.vim
:so mbyte.vim
:set nocp encoding=utf-8 viminfo+=nviminfo nomore
:" tl is a List of Lists with:
:"    2: test auto/old/new  0: test auto/old  1: test auto/new
:"    regexp pattern
:"    text to test the pattern on
:"    expected match (optional)
:"    expected submatch 1 (optional)
:"    expected submatch 2 (optional)
:"    etc.
:"  When there is no match use only the first two items.
:let tl = []

:"""" Multi-byte character tests. These will fail unless vim is compiled
:"""" with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
:call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
:call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ'])								" equivalence classes
:call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
:call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
:call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])

:" this is not a normal "i" but 0xec
:call add(tl, [2, '\p\+', 'ìa', 'ìa'])
:call add(tl, [2, '\p*', 'aあ', 'aあ'])

:"""" Test recognition of some character classes
:call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
:call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])

:"""" Test composing character matching
:call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
:call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
:call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
:call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
:call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
:call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
:call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
:call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
:call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, "a", "ca\u0300t"])
:call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])


:"""" Test \Z
:call add(tl, [2, 'ú\Z', 'x'])
:call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
:call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
:call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
:call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
:call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
:call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
:call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
:call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
:call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
:call add(tl, [2, "\u05b9\\Z", "xyz"])
:call add(tl, [2, "\\Z\u05b9", "xyz"])
:call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
:call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
:call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
:call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])

:"""" Combining different tests and features
:call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])

:"""" Run the tests

:"
:for t in tl
:  let re = t[0]
:  let pat = t[1]
:  let text = t[2]
:  let matchidx = 3
:  for engine in [0, 1, 2]
:    if engine == 2 && re == 0 || engine == 1 && re == 1
:      continue
:    endif
:    let &regexpengine = engine
:    try
:      let l = matchlist(text, pat)
:    catch
:      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
:    endtry
:" check the match itself
:    if len(l) == 0 && len(t) > matchidx
:      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
:    elseif len(l) > 0 && len(t) == matchidx
:      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match'
:    elseif len(t) > matchidx && l[0] != t[matchidx]
:      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"'
:    else
:      $put ='OK ' . engine . ' - ' . pat
:    endif
:    if len(l) > 0
:"   check all the nine submatches
:      for i in range(1, 9)
:        if len(t) <= matchidx + i
:          let e = ''
:        else
:          let e = t[matchidx + i]
:        endif
:        if l[i] != e
:          $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"'
:        endif
:      endfor
:      unlet i
:    endif
:  endfor
:endfor
:unlet t tl e l

:" check that 'ambiwidth' does not change the meaning of \p
:set regexpengine=1 ambiwidth=single
:$put ='eng 1 ambi single: ' . match(\"\u00EC\", '\p')
:set regexpengine=1 ambiwidth=double
:$put ='eng 1 ambi double: ' . match(\"\u00EC\", '\p')
:set regexpengine=2 ambiwidth=single
:$put ='eng 2 ambi single: ' . match(\"\u00EC\", '\p')
:set regexpengine=2 ambiwidth=double
:$put ='eng 2 ambi double: ' . match(\"\u00EC\", '\p')

:/\%#=1^Results/,$wq! test.out
ENDTEST

Results of test95: