annotate src/testdir/test95.in @ 4527:55bcaa1d2749 v7.3.1011

updated for version 7.3.1011 Problem: New regexp engine is inefficient with multi-byte characters. Solution: Handle a character at a time instead of a byte at a time. Also make \Z partly work.
author Bram Moolenaar <bram@vim.org>
date Fri, 24 May 2013 21:59:54 +0200
parents 36ddcf4cecbc
children 432a6b8c7d93
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4476
3cc2dca142a0 updated for version 7.3.986
Bram Moolenaar <bram@vim.org>
parents: 4468
diff changeset
1 Test for regexp patterns with multi-byte support, using utf-8.
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
2 See test64 for the non-multi-byte tests.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
3
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
4 A pattern that gives the expected result produces OK, so that we know it was
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
5 actually tried.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
6
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
7 STARTTEST
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
8 :so small.vim
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
9 :so mbyte.vim
4525
36ddcf4cecbc updated for version 7.3.1010
Bram Moolenaar <bram@vim.org>
parents: 4521
diff changeset
10 :set nocp encoding=utf-8 viminfo+=nviminfo nomore
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
11 :" tl is a List of Lists with:
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
12 :" regexp pattern
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
13 :" text to test the pattern on
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
14 :" expected match (optional)
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
15 :" expected submatch 1 (optional)
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
16 :" expected submatch 2 (optional)
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
17 :" etc.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
18 :" When there is no match use only the first two items.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
19 :let tl = []
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
20
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
21 :"""" Multi-byte character tests. These will fail unless vim is compiled
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
22 :"""" with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
23 :call add(tl, ['[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
24 :call add(tl, ['[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
25 :call add(tl, ['[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
26 :call add(tl, [' [^ ]\+', 'start มabcdม ', ' มabcdม'])
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
27 :call add(tl, ['[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
28
4468
dde1cf311be6 updated for version 7.3.982
Bram Moolenaar <bram@vim.org>
parents: 4444
diff changeset
29 :" this is not a normal "i" but 0xec
dde1cf311be6 updated for version 7.3.982
Bram Moolenaar <bram@vim.org>
parents: 4444
diff changeset
30 :call add(tl, ['\p\+', 'ìa', 'ìa'])
dde1cf311be6 updated for version 7.3.982
Bram Moolenaar <bram@vim.org>
parents: 4444
diff changeset
31
4478
032c9f916f25 updated for version 7.3.987
Bram Moolenaar <bram@vim.org>
parents: 4476
diff changeset
32 :"""" Test recognition of some character classes
4521
8cb14f59a327 updated for version 7.3.1008
Bram Moolenaar <bram@vim.org>
parents: 4478
diff changeset
33 :call add(tl, ['\i\+', '&*¨xx ', 'xx'])
8cb14f59a327 updated for version 7.3.1008
Bram Moolenaar <bram@vim.org>
parents: 4478
diff changeset
34 :call add(tl, ['\%#=1\i\+', '&*¨xx ', 'xx'])
4478
032c9f916f25 updated for version 7.3.987
Bram Moolenaar <bram@vim.org>
parents: 4476
diff changeset
35 :call add(tl, ['\f\+', '&*Ÿfname ', 'fname'])
4521
8cb14f59a327 updated for version 7.3.1008
Bram Moolenaar <bram@vim.org>
parents: 4478
diff changeset
36 :call add(tl, ['\%#=1\f\+', '&*Ÿfname ', 'fname'])
4478
032c9f916f25 updated for version 7.3.987
Bram Moolenaar <bram@vim.org>
parents: 4476
diff changeset
37
4527
55bcaa1d2749 updated for version 7.3.1011
Bram Moolenaar <bram@vim.org>
parents: 4525
diff changeset
38 :"""" Test composing character matching
55bcaa1d2749 updated for version 7.3.1011
Bram Moolenaar <bram@vim.org>
parents: 4525
diff changeset
39 :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
55bcaa1d2749 updated for version 7.3.1011
Bram Moolenaar <bram@vim.org>
parents: 4525
diff changeset
40 :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
55bcaa1d2749 updated for version 7.3.1011
Bram Moolenaar <bram@vim.org>
parents: 4525
diff changeset
41
4525
36ddcf4cecbc updated for version 7.3.1010
Bram Moolenaar <bram@vim.org>
parents: 4521
diff changeset
42 :"""" Test \Z
36ddcf4cecbc updated for version 7.3.1010
Bram Moolenaar <bram@vim.org>
parents: 4521
diff changeset
43 :call add(tl, ['ú\Z', 'x'])
36ddcf4cecbc updated for version 7.3.1010
Bram Moolenaar <bram@vim.org>
parents: 4521
diff changeset
44
4478
032c9f916f25 updated for version 7.3.987
Bram Moolenaar <bram@vim.org>
parents: 4476
diff changeset
45 :"""" Combining different tests and features
032c9f916f25 updated for version 7.3.987
Bram Moolenaar <bram@vim.org>
parents: 4476
diff changeset
46 :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])
032c9f916f25 updated for version 7.3.987
Bram Moolenaar <bram@vim.org>
parents: 4476
diff changeset
47
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
48 :"""" Run the tests
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
49 :"
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
50 :for t in tl
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
51 : let l = matchlist(t[1], t[0])
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
52 :" check the match itself
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
53 : if len(l) == 0 && len(t) > 2
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
54 : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", did not match, expected: \"' . t[2] . '\"'
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
55 : elseif len(l) > 0 && len(t) == 2
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
56 : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected no match'
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
57 : elseif len(t) > 2 && l[0] != t[2]
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
58 : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"'
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
59 : else
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
60 : $put ='OK - ' . t[0]
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
61 : endif
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
62 : if len(l) > 0
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
63 :" check all the nine submatches
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
64 : for i in range(1, 9)
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
65 : if len(t) <= i + 2
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
66 : let e = ''
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
67 : else
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
68 : let e = t[i + 2]
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
69 : endif
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
70 : if l[i] != e
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
71 : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"'
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
72 : endif
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
73 : endfor
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
74 : unlet i
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
75 : endif
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
76 :endfor
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
77 :unlet t tl e l
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
78
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
79 :/\%#=1^Results/,$wq! test.out
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
80 ENDTEST
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
81
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents:
diff changeset
82 Results of test95: