# HG changeset patch # User Christian Brabandt # Date 1485468005 -3600 # Node ID 056e32b99e933dcec6465c53986c3eed3992a895 # Parent f5cabb0cd638ac896af2f85874c8e0dedbfdc0f5 patch 8.0.0243: tolower() does not work if the byte count changes commit https://github.com/vim/vim/commit/cc5b22b3bfdc0e9e835cf7871166badda31447bd Author: Bram Moolenaar Date: Thu Jan 26 22:51:56 2017 +0100 patch 8.0.0243: tolower() does not work if the byte count changes Problem: When making a character lower case with tolower() changes the byte cound, it is not made lower case. Solution: Add strlow_save(). (Dominique Pelle, closes #1406) diff --git a/src/evalfunc.c b/src/evalfunc.c --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -12503,39 +12503,8 @@ f_timer_stopall(typval_T *argvars UNUSED static void f_tolower(typval_T *argvars, typval_T *rettv) { - char_u *p; - - p = vim_strsave(get_tv_string(&argvars[0])); rettv->v_type = VAR_STRING; - rettv->vval.v_string = p; - - if (p != NULL) - while (*p != NUL) - { -#ifdef FEAT_MBYTE - int l; - - if (enc_utf8) - { - int c, lc; - - c = utf_ptr2char(p); - lc = utf_tolower(c); - l = utf_ptr2len(p); - /* TODO: reallocate string when byte count changes. */ - if (utf_char2len(lc) == l) - utf_char2bytes(lc, p); - p += l; - } - else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - p += l; /* skip multi-byte character */ - else -#endif - { - *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */ - ++p; - } - } + rettv->vval.v_string = strlow_save(get_tv_string(&argvars[0])); } /* diff --git a/src/misc2.c b/src/misc2.c --- a/src/misc2.c +++ b/src/misc2.c @@ -1602,7 +1602,10 @@ strup_save(char_u *orig) { s = alloc((unsigned)STRLEN(res) + 1 + newl - l); if (s == NULL) - break; + { + vim_free(res); + return NULL; + } mch_memmove(s, res, p - res); STRCPY(s + (p - res) + newl, p + l); p = s + (p - res); @@ -1625,6 +1628,69 @@ strup_save(char_u *orig) return res; } + +/* + * Make string "s" all lower-case and return it in allocated memory. + * Handles multi-byte characters as well as possible. + * Returns NULL when out of memory. + */ + char_u * +strlow_save(char_u *orig) +{ + char_u *p; + char_u *res; + + res = p = vim_strsave(orig); + + if (res != NULL) + while (*p != NUL) + { +# ifdef FEAT_MBYTE + int l; + + if (enc_utf8) + { + int c, lc; + int newl; + char_u *s; + + c = utf_ptr2char(p); + lc = utf_tolower(c); + + /* Reallocate string when byte count changes. This is rare, + * thus it's OK to do another malloc()/free(). */ + l = utf_ptr2len(p); + newl = utf_char2len(lc); + if (newl != l) + { + s = alloc((unsigned)STRLEN(res) + 1 + newl - l); + if (s == NULL) + { + vim_free(res); + return NULL; + } + mch_memmove(s, res, p - res); + STRCPY(s + (p - res) + newl, p + l); + p = s + (p - res); + vim_free(res); + res = s; + } + + utf_char2bytes(lc, p); + p += newl; + } + else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) + p += l; /* skip multi-byte character */ + else +# endif + { + *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */ + p++; + } + } + + return res; +} #endif /* diff --git a/src/proto/misc2.pro b/src/proto/misc2.pro --- a/src/proto/misc2.pro +++ b/src/proto/misc2.pro @@ -40,6 +40,7 @@ char_u *vim_strsave_up(char_u *string); char_u *vim_strnsave_up(char_u *string, int len); void vim_strup(char_u *p); char_u *strup_save(char_u *orig); +char_u *strlow_save(char_u *orig); void del_trailing_spaces(char_u *ptr); void vim_strncpy(char_u *to, char_u *from, size_t len); void vim_strcat(char_u *to, char_u *from, size_t tosize); diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -16,3 +16,148 @@ func Test_str2nr() call assert_equal(123456789, str2nr('123456789')) call assert_equal(-123456789, str2nr('-123456789')) endfunc + +func Test_tolower() + call assert_equal("", tolower("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', + \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few uppercase diacritics. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("bḃḇ", tolower("BḂḆ")) + call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ")) + call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("fḟ ", tolower("FḞ ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ")) + call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ")) + call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("jĵ", tolower("JĴ")) + call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ")) + call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ")) + call assert_equal("mḿṁ", tolower("MḾṀ")) + call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("pṕṗ", tolower("PṔṖ")) + call assert_equal("q", tolower("Q")) + call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ")) + call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ")) + call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("vṽ", tolower("VṼ")) + call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ")) + call assert_equal("xẋẍ", tolower("XẊẌ")) + call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ")) + call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ")) + + " Test with a few lowercase diacritics, which should remain unchanged. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả")) + call assert_equal("bḃḇ", tolower("bḃḇ")) + call assert_equal("cçćĉċč", tolower("cçćĉċč")) + call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ")) + call assert_equal("fḟ", tolower("fḟ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ")) + call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ")) + call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ")) + call assert_equal("jĵǰ", tolower("jĵǰ")) + call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ")) + call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ")) + call assert_equal("mḿṁ ", tolower("mḿṁ ")) + call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("pṕṗ", tolower("pṕṗ")) + call assert_equal("q", tolower("q")) + call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ")) + call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ")) + call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ")) + call assert_equal("vṽ", tolower("vṽ")) + call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ")) + call assert_equal("ẋẍ", tolower("ẋẍ")) + call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ")) + call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ")) + + " According to https://twitter.com/jifa/status/625776454479970304 + " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase + " in length (2 to 3 bytes) when lowercased. So let's test them. + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + +func Test_toupper() + call assert_equal("", toupper("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~', + \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few lowercase diacritics. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả")) + call assert_equal("BḂḆ", toupper("bḃḇ")) + call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč")) + call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ")) + call assert_equal("FḞ", toupper("fḟ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ")) + call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ")) + call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ")) + call assert_equal("JĴǰ", toupper("jĵǰ")) + call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ")) + call assert_equal("MḾṀ ", toupper("mḿṁ ")) + call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("PṔṖ", toupper("pṕṗ")) + call assert_equal("Q", toupper("q")) + call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ")) + call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ")) + call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ")) + call assert_equal("VṼ", toupper("vṽ")) + call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ")) + call assert_equal("ẊẌ", toupper("ẋẍ")) + call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ")) + + " Test that uppercase diacritics, which should remain unchanged. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("BḂḆ", toupper("BḂḆ")) + call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ")) + call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("FḞ ", toupper("FḞ ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ")) + call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ")) + call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("JĴ", toupper("JĴ")) + call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ")) + call assert_equal("MḾṀ", toupper("MḾṀ")) + call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("PṔṖ", toupper("PṔṖ")) + call assert_equal("Q", toupper("Q")) + call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ")) + call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ")) + call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("VṼ", toupper("VṼ")) + call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ")) + call assert_equal("XẊẌ", toupper("XẊẌ")) + call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ")) + + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + + diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -765,6 +765,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 243, +/**/ 242, /**/ 241,