mirror of
https://github.com/vim/vim
synced 2025-07-15 16:51:57 +00:00
patch 8.0.0252: not properly recognizing word characters between 128 and 255
Problem: Characters below 256 that are not one byte are not always recognized as word characters. Solution: Make vim_iswordc() and vim_iswordp() work the same way. Add a test for this. (Ozaki Kiichi)
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -81,3 +81,4 @@ src/testdir/viminfo
|
||||
src/memfile_test
|
||||
src/json_test
|
||||
src/message_test
|
||||
src/kword_test
|
||||
|
39
src/Makefile
39
src/Makefile
@ -1584,14 +1584,16 @@ EXTRA_SRC = hangulin.c if_lua.c if_mzsch.c auto/if_perl.c if_perlsfio.c \
|
||||
# Unittest files
|
||||
JSON_TEST_SRC = json_test.c
|
||||
JSON_TEST_TARGET = json_test$(EXEEXT)
|
||||
KWORD_TEST_SRC = kword_test.c
|
||||
KWORD_TEST_TARGET = kword_test$(EXEEXT)
|
||||
MEMFILE_TEST_SRC = memfile_test.c
|
||||
MEMFILE_TEST_TARGET = memfile_test$(EXEEXT)
|
||||
MESSAGE_TEST_SRC = message_test.c
|
||||
MESSAGE_TEST_TARGET = message_test$(EXEEXT)
|
||||
|
||||
UNITTEST_SRC = $(JSON_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
|
||||
UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
|
||||
RUN_UNITTESTS = run_json_test run_memfile_test run_message_test
|
||||
UNITTEST_SRC = $(JSON_TEST_SRC) $(KWORD_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
|
||||
UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(KWORD_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
|
||||
RUN_UNITTESTS = run_json_test run_kword_test run_memfile_test run_message_test
|
||||
|
||||
# All sources, also the ones that are not configured
|
||||
ALL_SRC = $(BASIC_SRC) $(ALL_GUI_SRC) $(UNITTEST_SRC) $(EXTRA_SRC)
|
||||
@ -1611,7 +1613,6 @@ OBJ_COMMON = \
|
||||
objects/arabic.o \
|
||||
objects/buffer.o \
|
||||
objects/blowfish.o \
|
||||
objects/charset.o \
|
||||
objects/crypt.o \
|
||||
objects/crypt_zip.o \
|
||||
objects/dict.o \
|
||||
@ -1679,6 +1680,7 @@ OBJ_COMMON = \
|
||||
|
||||
# The files included by tests are not in OBJ_COMMON.
|
||||
OBJ_MAIN = \
|
||||
objects/charset.o \
|
||||
objects/json.o \
|
||||
objects/main.o \
|
||||
objects/memfile.o \
|
||||
@ -1687,13 +1689,23 @@ OBJ_MAIN = \
|
||||
OBJ = $(OBJ_COMMON) $(OBJ_MAIN)
|
||||
|
||||
OBJ_JSON_TEST = \
|
||||
objects/charset.o \
|
||||
objects/memfile.o \
|
||||
objects/message.o \
|
||||
objects/json_test.o
|
||||
|
||||
JSON_TEST_OBJ = $(OBJ_COMMON) $(OBJ_JSON_TEST)
|
||||
|
||||
OBJ_KWORD_TEST = \
|
||||
objects/json.o \
|
||||
objects/memfile.o \
|
||||
objects/message.o \
|
||||
objects/kword_test.o
|
||||
|
||||
KWORD_TEST_OBJ = $(OBJ_COMMON) $(OBJ_KWORD_TEST)
|
||||
|
||||
OBJ_MEMFILE_TEST = \
|
||||
objects/charset.o \
|
||||
objects/json.o \
|
||||
objects/message.o \
|
||||
objects/memfile_test.o
|
||||
@ -1701,6 +1713,7 @@ OBJ_MEMFILE_TEST = \
|
||||
MEMFILE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MEMFILE_TEST)
|
||||
|
||||
OBJ_MESSAGE_TEST = \
|
||||
objects/charset.o \
|
||||
objects/json.o \
|
||||
objects/memfile.o \
|
||||
objects/message_test.o
|
||||
@ -1710,6 +1723,7 @@ MESSAGE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MESSAGE_TEST)
|
||||
ALL_OBJ = $(OBJ_COMMON) \
|
||||
$(OBJ_MAIN) \
|
||||
$(OBJ_JSON_TEST) \
|
||||
$(OBJ_KWORD_TEST) \
|
||||
$(OBJ_MEMFILE_TEST) \
|
||||
$(OBJ_MESSAGE_TEST)
|
||||
|
||||
@ -2036,6 +2050,9 @@ unittest unittests: $(RUN_UNITTESTS)
|
||||
run_json_test: $(JSON_TEST_TARGET)
|
||||
$(VALGRIND) ./$(JSON_TEST_TARGET) || exit 1; echo $* passed;
|
||||
|
||||
run_kword_test: $(KWORD_TEST_TARGET)
|
||||
$(VALGRIND) ./$(KWORD_TEST_TARGET) || exit 1; echo $* passed;
|
||||
|
||||
run_memfile_test: $(MEMFILE_TEST_TARGET)
|
||||
$(VALGRIND) ./$(MEMFILE_TEST_TARGET) || exit 1; echo $* passed;
|
||||
|
||||
@ -2222,6 +2239,13 @@ $(JSON_TEST_TARGET): auto/config.mk objects $(JSON_TEST_OBJ)
|
||||
MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
|
||||
sh $(srcdir)/link.sh
|
||||
|
||||
$(KWORD_TEST_TARGET): auto/config.mk objects $(KWORD_TEST_OBJ)
|
||||
$(CCC) version.c -o objects/version.o
|
||||
@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
|
||||
-o $(KWORD_TEST_TARGET) $(KWORD_TEST_OBJ) $(ALL_LIBS)" \
|
||||
MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
|
||||
sh $(srcdir)/link.sh
|
||||
|
||||
$(MEMFILE_TEST_TARGET): auto/config.mk objects $(MEMFILE_TEST_OBJ)
|
||||
$(CCC) version.c -o objects/version.o
|
||||
@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
|
||||
@ -3058,6 +3082,9 @@ objects/json.o: json.c
|
||||
objects/json_test.o: json_test.c
|
||||
$(CCC) -o $@ json_test.c
|
||||
|
||||
objects/kword_test.o: kword_test.c
|
||||
$(CCC) -o $@ kword_test.c
|
||||
|
||||
objects/list.o: list.c
|
||||
$(CCC) -o $@ list.c
|
||||
|
||||
@ -3597,6 +3624,10 @@ objects/json_test.o: json_test.c main.c vim.h auto/config.h feature.h os_unix.h
|
||||
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
|
||||
regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
|
||||
proto.h globals.h farsi.h arabic.h json.c
|
||||
objects/kword_test.o: kword_test.c main.c vim.h auto/config.h feature.h os_unix.h \
|
||||
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
|
||||
regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
|
||||
proto.h globals.h farsi.h arabic.h charset.c mbyte.c
|
||||
objects/memfile_test.o: memfile_test.c main.c vim.h auto/config.h feature.h \
|
||||
os_unix.h auto/osdef.h ascii.h keymap.h term.h macros.h option.h \
|
||||
structs.h regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h \
|
||||
|
@ -899,16 +899,17 @@ vim_iswordc(int c)
|
||||
int
|
||||
vim_iswordc_buf(int c, buf_T *buf)
|
||||
{
|
||||
#ifdef FEAT_MBYTE
|
||||
if (c >= 0x100)
|
||||
{
|
||||
#ifdef FEAT_MBYTE
|
||||
if (enc_dbcs != 0)
|
||||
return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
|
||||
if (enc_utf8)
|
||||
return utf_class(c) >= 2;
|
||||
}
|
||||
return utf_class_buf(c, buf) >= 2;
|
||||
#endif
|
||||
return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
|
||||
return FALSE;
|
||||
}
|
||||
return (c > 0 && GET_CHARTAB(buf, c) != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -917,21 +918,19 @@ vim_iswordc_buf(int c, buf_T *buf)
|
||||
int
|
||||
vim_iswordp(char_u *p)
|
||||
{
|
||||
#ifdef FEAT_MBYTE
|
||||
if (has_mbyte && MB_BYTE2LEN(*p) > 1)
|
||||
return mb_get_class(p) >= 2;
|
||||
#endif
|
||||
return GET_CHARTAB(curbuf, *p) != 0;
|
||||
return vim_iswordp_buf(p, curbuf);
|
||||
}
|
||||
|
||||
int
|
||||
vim_iswordp_buf(char_u *p, buf_T *buf)
|
||||
{
|
||||
int c = *p;
|
||||
|
||||
#ifdef FEAT_MBYTE
|
||||
if (has_mbyte && MB_BYTE2LEN(*p) > 1)
|
||||
return mb_get_class(p) >= 2;
|
||||
if (has_mbyte && MB_BYTE2LEN(c) > 1)
|
||||
c = (*mb_ptr2char)(p);
|
||||
#endif
|
||||
return (GET_CHARTAB(buf, *p) != 0);
|
||||
return vim_iswordc_buf(c, buf);
|
||||
}
|
||||
|
||||
/*
|
||||
|
85
src/kword_test.c
Normal file
85
src/kword_test.c
Normal file
@ -0,0 +1,85 @@
|
||||
/* vi:set ts=8 sts=4 sw=4 noet:
|
||||
*
|
||||
* VIM - Vi IMproved by Bram Moolenaar
|
||||
*
|
||||
* Do ":help uganda" in Vim to read copying and usage conditions.
|
||||
* Do ":help credits" in Vim to see a list of people who contributed.
|
||||
* See README.txt for an overview of the Vim source code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* kword_test.c: Unittests for vim_iswordc() and vim_iswordp().
|
||||
*/
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
/* Must include main.c because it contains much more than just main() */
|
||||
#define NO_VIM_MAIN
|
||||
#include "main.c"
|
||||
|
||||
/* This file has to be included because the tested functions are static */
|
||||
#include "charset.c"
|
||||
|
||||
#ifdef FEAT_MBYTE
|
||||
/*
|
||||
* Test the results of vim_iswordc() and vim_iswordp() are matched.
|
||||
*/
|
||||
static void
|
||||
test_isword_funcs_utf8(void)
|
||||
{
|
||||
buf_T buf;
|
||||
int c;
|
||||
|
||||
vim_memset(&buf, 0, sizeof(buf));
|
||||
p_enc = (char_u *)"utf-8";
|
||||
p_isi = (char_u *)"";
|
||||
p_isp = (char_u *)"";
|
||||
p_isf = (char_u *)"";
|
||||
buf.b_p_isk = (char_u *)"@,48-57,_,128-167,224-235";
|
||||
|
||||
curbuf = &buf;
|
||||
mb_init(); /* calls init_chartab() */
|
||||
|
||||
for (c = 0; c < 0x10000; ++c)
|
||||
{
|
||||
char_u p[4] = {0};
|
||||
int c1;
|
||||
int retc;
|
||||
int retp;
|
||||
|
||||
utf_char2bytes(c, p);
|
||||
c1 = utf_ptr2char(p);
|
||||
if (c != c1)
|
||||
{
|
||||
fprintf(stderr, "Failed: ");
|
||||
fprintf(stderr,
|
||||
"[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
|
||||
c, p[0], p[1], p[2]);
|
||||
fprintf(stderr, "c != utf_ptr2char(p) (=%#04x)\n", c1);
|
||||
abort();
|
||||
}
|
||||
retc = vim_iswordc_buf(c, &buf);
|
||||
retp = vim_iswordp_buf(p, &buf);
|
||||
if (retc != retp)
|
||||
{
|
||||
fprintf(stderr, "Failed: ");
|
||||
fprintf(stderr,
|
||||
"[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
|
||||
c, p[0], p[1], p[2]);
|
||||
fprintf(stderr, "vim_iswordc(c) (=%d) != vim_iswordp(p) (=%d)\n",
|
||||
retc, retp);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
#ifdef FEAT_MBYTE
|
||||
test_isword_funcs_utf8();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
10
src/mbyte.c
10
src/mbyte.c
@ -895,7 +895,7 @@ mb_get_class_buf(char_u *p, buf_T *buf)
|
||||
if (enc_dbcs != 0 && p[0] != NUL && p[1] != NUL)
|
||||
return dbcs_class(p[0], p[1]);
|
||||
if (enc_utf8)
|
||||
return utf_class(utf_ptr2char(p));
|
||||
return utf_class_buf(utf_ptr2char(p), buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2693,6 +2693,12 @@ static struct interval emoji_all[] =
|
||||
*/
|
||||
int
|
||||
utf_class(int c)
|
||||
{
|
||||
return utf_class_buf(c, curbuf);
|
||||
}
|
||||
|
||||
int
|
||||
utf_class_buf(int c, buf_T *buf)
|
||||
{
|
||||
/* sorted list of non-overlapping intervals */
|
||||
static struct clinterval
|
||||
@ -2780,7 +2786,7 @@ utf_class(int c)
|
||||
{
|
||||
if (c == ' ' || c == '\t' || c == NUL || c == 0xa0)
|
||||
return 0; /* blank */
|
||||
if (vim_iswordc(c))
|
||||
if (vim_iswordc_buf(c, buf))
|
||||
return 2; /* word character */
|
||||
return 1; /* punctuation */
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ int utf_char2bytes(int c, char_u *buf);
|
||||
int utf_iscomposing(int c);
|
||||
int utf_printable(int c);
|
||||
int utf_class(int c);
|
||||
int utf_class_buf(int c, buf_T *buf);
|
||||
int utf_ambiguous_width(int c);
|
||||
int utf_fold(int a);
|
||||
int utf_toupper(int a);
|
||||
|
@ -764,6 +764,8 @@ static char *(features[]) =
|
||||
|
||||
static int included_patches[] =
|
||||
{ /* Add new patch number below this line */
|
||||
/**/
|
||||
252,
|
||||
/**/
|
||||
251,
|
||||
/**/
|
||||
|
Reference in New Issue
Block a user