# HG changeset patch # User Bram Moolenaar # Date 1429617755 -7200 # Node ID ec46a5ca1b5169c35c416f8baa3d196cf2daeece # Parent cf5d9c1e4c8ae6ec4d23fbb76bf5e3d1f9fd722c patch 7.4.704 Problem: Searching for a character matches an illegal byte and causes invalid memory access. (Dominique Pelle) Solution: Do not match an invalid byte when search for a character in a string. Fix equivalence classes using negative numbers, which result in illegal bytes. diff --git a/src/misc2.c b/src/misc2.c --- a/src/misc2.c +++ b/src/misc2.c @@ -1885,9 +1885,12 @@ vim_strchr(string, c) { while (*p != NUL) { - if (utf_ptr2char(p) == c) + int l = (*mb_ptr2len)(p); + + /* Avoid matching an illegal byte here. */ + if (utf_ptr2char(p) == c && l > 1) return p; - p += (*mb_ptr2len)(p); + p += l; } return NULL; } diff --git a/src/regexp.c b/src/regexp.c --- a/src/regexp.c +++ b/src/regexp.c @@ -845,13 +845,14 @@ reg_equi_class(c) #else switch (c) { - case 'A': case '\300': case '\301': case '\302': + /* Do not use '\300' style, it results in a negative number. */ + case 'A': case 0xc0: case 0xc1: case 0xc2: + case 0xc3: case 0xc4: case 0xc5: CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) - case '\303': case '\304': case '\305': - regmbc('A'); regmbc('\300'); regmbc('\301'); - regmbc('\302'); regmbc('\303'); regmbc('\304'); - regmbc('\305'); + regmbc('A'); regmbc(0xc0); regmbc(0xc1); + regmbc(0xc2); regmbc(0xc3); regmbc(0xc4); + regmbc(0xc5); REGMBC(0x100) REGMBC(0x102) REGMBC(0x104) REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0) REGMBC(0x1ea2) @@ -859,9 +860,9 @@ reg_equi_class(c) case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06) regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06) return; - case 'C': case '\307': + case 'C': case 0xc7: CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) - regmbc('C'); regmbc('\307'); + regmbc('C'); regmbc(0xc7); REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a) REGMBC(0x10c) return; @@ -870,11 +871,11 @@ reg_equi_class(c) regmbc('D'); REGMBC(0x10e) REGMBC(0x110) REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10) return; - case 'E': case '\310': case '\311': case '\312': case '\313': + case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb: CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) - regmbc('E'); regmbc('\310'); regmbc('\311'); - regmbc('\312'); regmbc('\313'); + regmbc('E'); regmbc(0xc8); regmbc(0xc9); + regmbc(0xca); regmbc(0xcb); REGMBC(0x112) REGMBC(0x114) REGMBC(0x116) REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba) REGMBC(0x1ebc) @@ -894,11 +895,11 @@ reg_equi_class(c) regmbc('H'); REGMBC(0x124) REGMBC(0x126) REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28) return; - case 'I': case '\314': case '\315': case '\316': case '\317': + case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf: CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) - regmbc('I'); regmbc('\314'); regmbc('\315'); - regmbc('\316'); regmbc('\317'); + regmbc('I'); regmbc(0xcc); regmbc(0xcd); + regmbc(0xce); regmbc(0xcf); REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c) REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf) REGMBC(0x1ec8) @@ -920,20 +921,20 @@ reg_equi_class(c) case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40) regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40) return; - case 'N': case '\321': + case 'N': case 0xd1: CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) CASEMBC(0x1e48) - regmbc('N'); regmbc('\321'); + regmbc('N'); regmbc(0xd1); REGMBC(0x143) REGMBC(0x145) REGMBC(0x147) REGMBC(0x1e44) REGMBC(0x1e48) return; - case 'O': case '\322': case '\323': case '\324': case '\325': - case '\326': case '\330': + case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: + case 0xd6: case 0xd8: CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) - regmbc('O'); regmbc('\322'); regmbc('\323'); - regmbc('\324'); regmbc('\325'); regmbc('\326'); - regmbc('\330'); + regmbc('O'); regmbc(0xd2); regmbc(0xd3); + regmbc(0xd4); regmbc(0xd5); regmbc(0xd6); + regmbc(0xd8); REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150) REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea) REGMBC(0x1ec) REGMBC(0x1ece) @@ -956,12 +957,12 @@ reg_equi_class(c) regmbc('T'); REGMBC(0x162) REGMBC(0x164) REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e) return; - case 'U': case '\331': case '\332': case '\333': case '\334': + case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc: CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) CASEMBC(0x1ee6) - regmbc('U'); regmbc('\331'); regmbc('\332'); - regmbc('\333'); regmbc('\334'); + regmbc('U'); regmbc(0xd9); regmbc(0xda); + regmbc(0xdb); regmbc(0xdc); REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c) REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172) REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6) @@ -977,10 +978,10 @@ reg_equi_class(c) case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c) regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c) return; - case 'Y': case '\335': + case 'Y': case 0xdd: CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) CASEMBC(0x1ef6) CASEMBC(0x1ef8) - regmbc('Y'); regmbc('\335'); + regmbc('Y'); regmbc(0xdd); REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e) REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8) return; @@ -990,13 +991,13 @@ reg_equi_class(c) REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90) REGMBC(0x1e94) return; - case 'a': case '\340': case '\341': case '\342': - case '\343': case '\344': case '\345': + case 'a': case 0xe0: case 0xe1: case 0xe2: + case 0xe3: case 0xe4: case 0xe5: CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) - regmbc('a'); regmbc('\340'); regmbc('\341'); - regmbc('\342'); regmbc('\343'); regmbc('\344'); - regmbc('\345'); + regmbc('a'); regmbc(0xe0); regmbc(0xe1); + regmbc(0xe2); regmbc(0xe3); regmbc(0xe4); + regmbc(0xe5); REGMBC(0x101) REGMBC(0x103) REGMBC(0x105) REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1) REGMBC(0x1ea3) @@ -1004,9 +1005,9 @@ reg_equi_class(c) case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07) regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07) return; - case 'c': case '\347': + case 'c': case 0xe7: CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) - regmbc('c'); regmbc('\347'); + regmbc('c'); regmbc(0xe7); REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b) REGMBC(0x10d) return; @@ -1015,11 +1016,11 @@ reg_equi_class(c) regmbc('d'); REGMBC(0x10f) REGMBC(0x111) REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11) return; - case 'e': case '\350': case '\351': case '\352': case '\353': + case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb: CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) - regmbc('e'); regmbc('\350'); regmbc('\351'); - regmbc('\352'); regmbc('\353'); + regmbc('e'); regmbc(0xe8); regmbc(0xe9); + regmbc(0xea); regmbc(0xeb); REGMBC(0x113) REGMBC(0x115) REGMBC(0x117) REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb) REGMBC(0x1ebd) @@ -1040,11 +1041,11 @@ reg_equi_class(c) REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29) REGMBC(0x1e96) return; - case 'i': case '\354': case '\355': case '\356': case '\357': + case 'i': case 0xec: case 0xed: case 0xee: case 0xef: CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) CASEMBC(0x1d0) CASEMBC(0x1ec9) - regmbc('i'); regmbc('\354'); regmbc('\355'); - regmbc('\356'); regmbc('\357'); + regmbc('i'); regmbc(0xec); regmbc(0xed); + regmbc(0xee); regmbc(0xef); REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d) REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9) return; @@ -1065,20 +1066,20 @@ reg_equi_class(c) case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41) regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41) return; - case 'n': case '\361': + case 'n': case 0xf1: CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) CASEMBC(0x1e45) CASEMBC(0x1e49) - regmbc('n'); regmbc('\361'); + regmbc('n'); regmbc(0xf1); REGMBC(0x144) REGMBC(0x146) REGMBC(0x148) REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49) return; - case 'o': case '\362': case '\363': case '\364': case '\365': - case '\366': case '\370': + case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5: + case 0xf6: case 0xf8: CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) - regmbc('o'); regmbc('\362'); regmbc('\363'); - regmbc('\364'); regmbc('\365'); regmbc('\366'); - regmbc('\370'); + regmbc('o'); regmbc(0xf2); regmbc(0xf3); + regmbc(0xf4); regmbc(0xf5); regmbc(0xf6); + regmbc(0xf8); REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151) REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb) REGMBC(0x1ed) REGMBC(0x1ecf) @@ -1101,12 +1102,12 @@ reg_equi_class(c) regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167) REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97) return; - case 'u': case '\371': case '\372': case '\373': case '\374': + case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc: CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) CASEMBC(0x1ee7) - regmbc('u'); regmbc('\371'); regmbc('\372'); - regmbc('\373'); regmbc('\374'); + regmbc('u'); regmbc(0xf9); regmbc(0xfa); + regmbc(0xfb); regmbc(0xfc); REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d) REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173) REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7) @@ -1123,10 +1124,10 @@ reg_equi_class(c) case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d) regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d) return; - case 'y': case '\375': case '\377': + case 'y': case 0xfd: case 0xff: CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) - regmbc('y'); regmbc('\375'); regmbc('\377'); + regmbc('y'); regmbc(0xfd); regmbc(0xff); REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99) REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9) return; diff --git a/src/testdir/test44.in b/src/testdir/test44.in --- a/src/testdir/test44.in +++ b/src/testdir/test44.in @@ -24,17 +24,25 @@ x:" Now search for multi-byte with compo x:" find word by change of word class /ち\<カヨ\>は x:" Test \%u, [\u] and friends +:" c /\%u20ac -x/[\u4f7f\u5929]\+ -x/\%U12345678 -x/[\U1234abcd\u1234\uabcd] -x/\%d21879b -x/ [[=A=]]* [[=B=]]* [[=C=]]* [[=D=]]* [[=E=]]* [[=F=]]* [[=G=]]* [[=H=]]* [[=I=]]* [[=J=]]* [[=K=]]* [[=L=]]* [[=M=]]* [[=N=]]* [[=O=]]* [[=P=]]* [[=Q=]]* [[=R=]]* [[=S=]]* [[=T=]]* [[=U=]]* [[=V=]]* [[=W=]]* [[=X=]]* [[=Y=]]* [[=Z=]]*/e -x/ [[=a=]]* [[=b=]]* [[=c=]]* [[=d=]]* [[=e=]]* [[=f=]]* [[=g=]]* [[=h=]]* [[=i=]]* [[=j=]]* [[=k=]]* [[=l=]]* [[=m=]]* [[=n=]]* [[=o=]]* [[=p=]]* [[=q=]]* [[=r=]]* [[=s=]]* [[=t=]]* [[=u=]]* [[=v=]]* [[=w=]]* [[=x=]]* [[=y=]]* [[=z=]]*/e -x:" Test backwards search from a multi-byte char +x:" d +/[\u4f7f\u5929]\+ +x:" e +/\%U12345678 +x:" f +/[\U1234abcd\u1234\uabcd] +x:" g +/\%d21879b +x:" h +/ [[=A=]]* [[=B=]]* [[=C=]]* [[=D=]]* [[=E=]]* [[=F=]]* [[=G=]]* [[=H=]]* [[=I=]]* [[=J=]]* [[=K=]]* [[=L=]]* [[=M=]]* [[=N=]]* [[=O=]]* [[=P=]]* [[=Q=]]* [[=R=]]* [[=S=]]* [[=T=]]* [[=U=]]* [[=V=]]* [[=W=]]* [[=X=]]* [[=Y=]]* [[=Z=]]*/e +x:" i +/ [[=a=]]* [[=b=]]* [[=c=]]* [[=d=]]* [[=e=]]* [[=f=]]* [[=g=]]* [[=h=]]* [[=i=]]* [[=j=]]* [[=k=]]* [[=l=]]* [[=m=]]* [[=n=]]* [[=o=]]* [[=p=]]* [[=q=]]* [[=r=]]* [[=s=]]* [[=t=]]* [[=u=]]* [[=v=]]* [[=w=]]* [[=x=]]* [[=y=]]* [[=z=]]*/e +x:" j Test backwards search from a multi-byte char /x x?. -x:let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g' +x:" k +:let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g' :@w :?^1?,$w! test.out :e! test.out diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -742,6 +742,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 704, +/**/ 703, /**/ 702,