Mercurial > vim
view src/testdir/test_utf8_comparisons.vim @ 34336:d2ad8733db75 v9.1.0101
patch 9.1.0101: upper-case of German sharp s should be U+1E9E
Commit: https://github.com/vim/vim/commit/bd1232a1faf56b614a1e74c4ce51bc6e0650ae00
Author: glepnir <glephunter@gmail.com>
Date: Mon Feb 12 22:14:53 2024 +0100
patch 9.1.0101: upper-case of German sharp s should be U+1E9E
Problem: upper-case of ? should be U+1E9E (CAPITAL LETTER SHARP S)
(fenuks)
Solution: Make gU, ~ and g~ convert the U+00DF LATIN SMALL LETTER SHARP S (?)
to U+1E9E LATIN CAPITAL LETTER SHARP S (?), update tests
(glepnir)
This is part of Unicode 5.1.0 from April 2008, so should be fairly safe
to use now and since 2017 is part of the German standard orthography,
according to Wikipedia:
https://en.wikipedia.org/wiki/Capital_%E1%BA%9E#cite_note-auto-12
There is however one exception: UnicodeData.txt for U+00DF
LATIN SMALL LETTER SHARP S does NOT define U+1E9E LATIN CAPITAL LETTER
SHARP S as its upper case version. Therefore, toupper() won't be able
to convert from lower sharp s to upper case sharp s (the other way
around however works, since U+00DF is considered the lower case
character of U+1E9E and therefore tolower() works correctly for the
upper case version).
fixes: #5573
closes: #14018
Signed-off-by: glepnir <glephunter@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Mon, 12 Feb 2024 22:45:02 +0100 |
parents | 08940efa6b4e |
children | be4389b04043 |
line wrap: on
line source
" Tests for case-insensitive UTF-8 comparisons (utf_strnicmp() in mbyte.c) " Also test "g~ap". func Ch(a, op, b, expected) call assert_equal(eval(printf('"%s" %s "%s"', a:a, a:op, a:b)), a:expected, \ printf('"%s" %s "%s" should return %d', a:a, a:op, a:b, a:expected)) endfunc func Chk(a, b, result) if a:result == 0 call Ch(a:a, '==?', a:b, 1) call Ch(a:a, '!=?', a:b, 0) call Ch(a:a, '<=?', a:b, 1) call Ch(a:a, '>=?', a:b, 1) call Ch(a:a, '<?', a:b, 0) call Ch(a:a, '>?', a:b, 0) elseif a:result > 0 call Ch(a:a, '==?', a:b, 0) call Ch(a:a, '!=?', a:b, 1) call Ch(a:a, '<=?', a:b, 0) call Ch(a:a, '>=?', a:b, 1) call Ch(a:a, '<?', a:b, 0) call Ch(a:a, '>?', a:b, 1) else call Ch(a:a, '==?', a:b, 0) call Ch(a:a, '!=?', a:b, 1) call Ch(a:a, '<=?', a:b, 1) call Ch(a:a, '>=?', a:b, 0) call Ch(a:a, '<?', a:b, 1) call Ch(a:a, '>?', a:b, 0) endif endfunc func Check(a, b, result) call Chk(a:a, a:b, a:result) call Chk(a:b, a:a, -a:result) endfunc func LT(a, b) call Check(a:a, a:b, -1) endfunc func GT(a, b) call Check(a:a, a:b, 1) endfunc func EQ(a, b) call Check(a:a, a:b, 0) endfunc func Test_comparisons() call EQ('', '') call LT('', 'a') call EQ('abc', 'abc') call EQ('Abc', 'abC') call LT('ab', 'abc') call LT('AB', 'abc') call LT('ab', 'aBc') call EQ('\xd0\xb9\xd1\x86\xd1\x83\xd0\xba\xd0\xb5\xd0\xbd', '\xd0\xb9\xd0\xa6\xd0\xa3\xd0\xba\xd0\x95\xd0\xbd') call LT('\xd0\xb9\xd1\x86\xd1\x83\xd0\xba\xd0\xb5\xd0\xbd', '\xd0\xaf\xd1\x86\xd1\x83\xd0\xba\xd0\xb5\xd0\xbd') call EQ('\xe2\x84\xaa', 'k') call LT('\xe2\x84\xaa', 'kkkkkk') call EQ('\xe2\x84\xaa\xe2\x84\xaa\xe2\x84\xaa', 'kkk') call LT('kk', '\xe2\x84\xaa\xe2\x84\xaa\xe2\x84\xaa') call EQ('\xe2\x84\xaa\xe2\x84\xa6k\xe2\x84\xaak\xcf\x89', 'k\xcf\x89\xe2\x84\xaakk\xe2\x84\xa6') call EQ('Abc\x80', 'AbC\x80') call LT('Abc\x80', 'AbC\x81') call LT('Abc', 'AbC\x80') call LT('abc\x80DEF', 'abc\x80def') " case folding stops at the first bad character call LT('\xc3XYZ', '\xc3xyz') call EQ('\xef\xbc\xba', '\xef\xbd\x9a') " FF3A (upper), FF5A (lower) call GT('\xef\xbc\xba', '\xef\xbc\xff') " first string is ok and equals \xef\xbd\x9a after folding, second string is illegal and was left unchanged, then the strings were bytewise compared call LT('\xc3', '\xc3\x83') call EQ('\xc3\xa3xYz', '\xc3\x83XyZ') for n in range(0x60, 0xFF) call LT(printf('xYz\x%.2X', n-1), printf('XyZ\x%.2X', n)) endfor for n in range(0x80, 0xBF) call EQ(printf('xYz\xc2\x%.2XUvW', n), printf('XyZ\xc2\x%.2XuVw', n)) endfor for n in range(0xC0, 0xFF) call LT(printf('xYz\xc2\x%.2XUvW', n), printf('XyZ\xc2\x%.2XuVw', n)) endfor endfunc " test that g~ap changes one paragraph only. func Test_gap() new " setup text call feedkeys("iabcd\<cr>\<cr>defg", "tx") " modify only first line call feedkeys("gg0g~ap", "tx") call assert_equal(["ABCD", "", "defg"], getline(1,3)) endfunc " test that g~, ~ and gU correclty upper-cases ß func Test_uppercase_sharp_ss() new call setline(1, repeat(['ß'], 4)) call cursor(1, 1) norm! ~ call assert_equal('ẞ', getline(line('.'))) norm! ~ call assert_equal('ß', getline(line('.'))) call cursor(2, 1) norm! g~l call assert_equal('ẞ', getline(line('.'))) norm! g~l call assert_equal('ß', getline(line('.'))) call cursor(3, 1) norm! gUl call assert_equal('ẞ', getline(line('.'))) norm! vgU call assert_equal('ẞ', getline(line('.'))) norm! vgu call assert_equal('ß', getline(line('.'))) norm! gul call assert_equal('ß', getline(line('.'))) call cursor(4, 1) norm! vgU call assert_equal('ẞ', getline(line('.'))) norm! vgu call assert_equal('ß', getline(line('.'))) bw! endfunc " vim: shiftwidth=2 sts=2 expandtab