Mercurial > vim
changeset 10724:ae1c6bf22e5f v8.0.0252
patch 8.0.0252: not properly recognizing word characters between 128 and 255
commit https://github.com/vim/vim/commit/4019cf90b8657d4ab1c39744db63550f44f405a2
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Jan 28 16:39:34 2017 +0100
patch 8.0.0252: not properly recognizing word characters between 128 and 255
Problem: Characters below 256 that are not one byte are not always
recognized as word characters.
Solution: Make vim_iswordc() and vim_iswordp() work the same way. Add a test
for this. (Ozaki Kiichi)
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sat, 28 Jan 2017 16:45:04 +0100 |
parents | 72cb227772e7 |
children | d2aa3ad3d3ef |
files | .gitignore src/Makefile src/charset.c src/kword_test.c src/mbyte.c src/proto/mbyte.pro src/version.c |
diffstat | 7 files changed, 143 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/.gitignore +++ b/.gitignore @@ -81,3 +81,4 @@ src/testdir/viminfo src/memfile_test src/json_test src/message_test +src/kword_test
--- a/src/Makefile +++ b/src/Makefile @@ -1584,14 +1584,16 @@ EXTRA_SRC = hangulin.c if_lua.c if_mzsch # Unittest files JSON_TEST_SRC = json_test.c JSON_TEST_TARGET = json_test$(EXEEXT) +KWORD_TEST_SRC = kword_test.c +KWORD_TEST_TARGET = kword_test$(EXEEXT) MEMFILE_TEST_SRC = memfile_test.c MEMFILE_TEST_TARGET = memfile_test$(EXEEXT) MESSAGE_TEST_SRC = message_test.c MESSAGE_TEST_TARGET = message_test$(EXEEXT) -UNITTEST_SRC = $(JSON_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC) -UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET) -RUN_UNITTESTS = run_json_test run_memfile_test run_message_test +UNITTEST_SRC = $(JSON_TEST_SRC) $(KWORD_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC) +UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(KWORD_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET) +RUN_UNITTESTS = run_json_test run_kword_test run_memfile_test run_message_test # All sources, also the ones that are not configured ALL_SRC = $(BASIC_SRC) $(ALL_GUI_SRC) $(UNITTEST_SRC) $(EXTRA_SRC) @@ -1611,7 +1613,6 @@ OBJ_COMMON = \ objects/arabic.o \ objects/buffer.o \ objects/blowfish.o \ - objects/charset.o \ objects/crypt.o \ objects/crypt_zip.o \ objects/dict.o \ @@ -1679,6 +1680,7 @@ OBJ_COMMON = \ # The files included by tests are not in OBJ_COMMON. OBJ_MAIN = \ + objects/charset.o \ objects/json.o \ objects/main.o \ objects/memfile.o \ @@ -1687,13 +1689,23 @@ OBJ_MAIN = \ OBJ = $(OBJ_COMMON) $(OBJ_MAIN) OBJ_JSON_TEST = \ + objects/charset.o \ objects/memfile.o \ objects/message.o \ objects/json_test.o JSON_TEST_OBJ = $(OBJ_COMMON) $(OBJ_JSON_TEST) +OBJ_KWORD_TEST = \ + objects/json.o \ + objects/memfile.o \ + objects/message.o \ + objects/kword_test.o + +KWORD_TEST_OBJ = $(OBJ_COMMON) $(OBJ_KWORD_TEST) + OBJ_MEMFILE_TEST = \ + objects/charset.o \ objects/json.o \ objects/message.o \ objects/memfile_test.o @@ -1701,6 +1713,7 @@ OBJ_MEMFILE_TEST = \ MEMFILE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MEMFILE_TEST) OBJ_MESSAGE_TEST = \ + objects/charset.o \ objects/json.o \ objects/memfile.o \ objects/message_test.o @@ -1710,6 +1723,7 @@ MESSAGE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_M ALL_OBJ = $(OBJ_COMMON) \ $(OBJ_MAIN) \ $(OBJ_JSON_TEST) \ + $(OBJ_KWORD_TEST) \ $(OBJ_MEMFILE_TEST) \ $(OBJ_MESSAGE_TEST) @@ -2036,6 +2050,9 @@ unittest unittests: $(RUN_UNITTESTS) run_json_test: $(JSON_TEST_TARGET) $(VALGRIND) ./$(JSON_TEST_TARGET) || exit 1; echo $* passed; +run_kword_test: $(KWORD_TEST_TARGET) + $(VALGRIND) ./$(KWORD_TEST_TARGET) || exit 1; echo $* passed; + run_memfile_test: $(MEMFILE_TEST_TARGET) $(VALGRIND) ./$(MEMFILE_TEST_TARGET) || exit 1; echo $* passed; @@ -2222,6 +2239,13 @@ testclean: MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \ sh $(srcdir)/link.sh +$(KWORD_TEST_TARGET): auto/config.mk objects $(KWORD_TEST_OBJ) + $(CCC) version.c -o objects/version.o + @LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \ + -o $(KWORD_TEST_TARGET) $(KWORD_TEST_OBJ) $(ALL_LIBS)" \ + MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \ + sh $(srcdir)/link.sh + $(MEMFILE_TEST_TARGET): auto/config.mk objects $(MEMFILE_TEST_OBJ) $(CCC) version.c -o objects/version.o @LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \ @@ -3058,6 +3082,9 @@ objects/json.o: json.c objects/json_test.o: json_test.c $(CCC) -o $@ json_test.c +objects/kword_test.o: kword_test.c + $(CCC) -o $@ kword_test.c + objects/list.o: list.c $(CCC) -o $@ list.c @@ -3597,6 +3624,10 @@ objects/json_test.o: json_test.c main.c auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \ proto.h globals.h farsi.h arabic.h json.c +objects/kword_test.o: kword_test.c main.c vim.h auto/config.h feature.h os_unix.h \ + auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ + regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \ + proto.h globals.h farsi.h arabic.h charset.c mbyte.c objects/memfile_test.o: memfile_test.c main.c vim.h auto/config.h feature.h \ os_unix.h auto/osdef.h ascii.h keymap.h term.h macros.h option.h \ structs.h regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h \
--- a/src/charset.c +++ b/src/charset.c @@ -899,16 +899,17 @@ vim_iswordc(int c) int vim_iswordc_buf(int c, buf_T *buf) { -#ifdef FEAT_MBYTE if (c >= 0x100) { +#ifdef FEAT_MBYTE if (enc_dbcs != 0) return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2; if (enc_utf8) - return utf_class(c) >= 2; + return utf_class_buf(c, buf) >= 2; +#endif + return FALSE; } -#endif - return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0); + return (c > 0 && GET_CHARTAB(buf, c) != 0); } /* @@ -917,21 +918,19 @@ vim_iswordc_buf(int c, buf_T *buf) int vim_iswordp(char_u *p) { -#ifdef FEAT_MBYTE - if (has_mbyte && MB_BYTE2LEN(*p) > 1) - return mb_get_class(p) >= 2; -#endif - return GET_CHARTAB(curbuf, *p) != 0; + return vim_iswordp_buf(p, curbuf); } int vim_iswordp_buf(char_u *p, buf_T *buf) { + int c = *p; + #ifdef FEAT_MBYTE - if (has_mbyte && MB_BYTE2LEN(*p) > 1) - return mb_get_class(p) >= 2; + if (has_mbyte && MB_BYTE2LEN(c) > 1) + c = (*mb_ptr2char)(p); #endif - return (GET_CHARTAB(buf, *p) != 0); + return vim_iswordc_buf(c, buf); } /*
new file mode 100644 --- /dev/null +++ b/src/kword_test.c @@ -0,0 +1,85 @@ +/* vi:set ts=8 sts=4 sw=4 noet: + * + * VIM - Vi IMproved by Bram Moolenaar + * + * Do ":help uganda" in Vim to read copying and usage conditions. + * Do ":help credits" in Vim to see a list of people who contributed. + * See README.txt for an overview of the Vim source code. + */ + +/* + * kword_test.c: Unittests for vim_iswordc() and vim_iswordp(). + */ + +#undef NDEBUG +#include <assert.h> + +/* Must include main.c because it contains much more than just main() */ +#define NO_VIM_MAIN +#include "main.c" + +/* This file has to be included because the tested functions are static */ +#include "charset.c" + +#ifdef FEAT_MBYTE +/* + * Test the results of vim_iswordc() and vim_iswordp() are matched. + */ + static void +test_isword_funcs_utf8(void) +{ + buf_T buf; + int c; + + vim_memset(&buf, 0, sizeof(buf)); + p_enc = (char_u *)"utf-8"; + p_isi = (char_u *)""; + p_isp = (char_u *)""; + p_isf = (char_u *)""; + buf.b_p_isk = (char_u *)"@,48-57,_,128-167,224-235"; + + curbuf = &buf; + mb_init(); /* calls init_chartab() */ + + for (c = 0; c < 0x10000; ++c) + { + char_u p[4] = {0}; + int c1; + int retc; + int retp; + + utf_char2bytes(c, p); + c1 = utf_ptr2char(p); + if (c != c1) + { + fprintf(stderr, "Failed: "); + fprintf(stderr, + "[c = %#04x, p = {%#02x, %#02x, %#02x}] ", + c, p[0], p[1], p[2]); + fprintf(stderr, "c != utf_ptr2char(p) (=%#04x)\n", c1); + abort(); + } + retc = vim_iswordc_buf(c, &buf); + retp = vim_iswordp_buf(p, &buf); + if (retc != retp) + { + fprintf(stderr, "Failed: "); + fprintf(stderr, + "[c = %#04x, p = {%#02x, %#02x, %#02x}] ", + c, p[0], p[1], p[2]); + fprintf(stderr, "vim_iswordc(c) (=%d) != vim_iswordp(p) (=%d)\n", + retc, retp); + abort(); + } + } +} +#endif + + int +main(void) +{ +#ifdef FEAT_MBYTE + test_isword_funcs_utf8(); +#endif + return 0; +}
--- a/src/mbyte.c +++ b/src/mbyte.c @@ -895,7 +895,7 @@ mb_get_class_buf(char_u *p, buf_T *buf) if (enc_dbcs != 0 && p[0] != NUL && p[1] != NUL) return dbcs_class(p[0], p[1]); if (enc_utf8) - return utf_class(utf_ptr2char(p)); + return utf_class_buf(utf_ptr2char(p), buf); return 0; } @@ -2694,6 +2694,12 @@ static struct interval emoji_all[] = int utf_class(int c) { + return utf_class_buf(c, curbuf); +} + + int +utf_class_buf(int c, buf_T *buf) +{ /* sorted list of non-overlapping intervals */ static struct clinterval { @@ -2780,7 +2786,7 @@ utf_class(int c) { if (c == ' ' || c == '\t' || c == NUL || c == 0xa0) return 0; /* blank */ - if (vim_iswordc(c)) + if (vim_iswordc_buf(c, buf)) return 2; /* word character */ return 1; /* punctuation */ }
--- a/src/proto/mbyte.pro +++ b/src/proto/mbyte.pro @@ -40,6 +40,7 @@ int utf_char2bytes(int c, char_u *buf); int utf_iscomposing(int c); int utf_printable(int c); int utf_class(int c); +int utf_class_buf(int c, buf_T *buf); int utf_ambiguous_width(int c); int utf_fold(int a); int utf_toupper(int a);