mirror of
https://github.com/neovim/neovim
synced 2025-07-17 17:51:48 +00:00
feat(editor): handle new multibyte sequences in normal mode replacement
while the implementation is not tied to screen chars, it is a reasonable expectation to support the same size. If nvim is able to display a multibyte character, it will accept the same character as input, including in normal mode commands like r{char}
This commit is contained in:
@ -7,10 +7,6 @@
|
|||||||
#include "nvim/pos_defs.h"
|
#include "nvim/pos_defs.h"
|
||||||
#include "nvim/types_defs.h"
|
#include "nvim/types_defs.h"
|
||||||
|
|
||||||
// Includes final NUL. MAX_MCO is no longer used, but at least 4*(MAX_MCO+1)+1=29
|
|
||||||
// ensures we can fit all composed chars which did fit before.
|
|
||||||
#define MAX_SCHAR_SIZE 32
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
kZIndexDefaultGrid = 0,
|
kZIndexDefaultGrid = 0,
|
||||||
kZIndexFloatDefault = 50,
|
kZIndexFloatDefault = 50,
|
||||||
|
@ -839,6 +839,13 @@ bool utf_composinglike(const char *p1, const char *p2, GraphemeState *state)
|
|||||||
return arabic_combine(first, second);
|
return arabic_combine(first, second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// same as utf_composinglike but operating on UCS-4 values
|
||||||
|
bool utf_iscomposing(int c1, int c2, GraphemeState *state)
|
||||||
|
{
|
||||||
|
return (!utf8proc_grapheme_break_stateful(c1, c2, state)
|
||||||
|
|| arabic_combine(c1, c2));
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the screen char at the beginning of a string
|
/// Get the screen char at the beginning of a string
|
||||||
///
|
///
|
||||||
/// Caller is expected to check for things like unprintable chars etc
|
/// Caller is expected to check for things like unprintable chars etc
|
||||||
@ -1852,8 +1859,7 @@ StrCharInfo utfc_next_impl(StrCharInfo cur)
|
|||||||
while (true) {
|
while (true) {
|
||||||
uint8_t const next_len = utf8len_tab[*next];
|
uint8_t const next_len = utf8len_tab[*next];
|
||||||
int32_t const next_code = utf_ptr2CharInfo_impl(next, (uintptr_t)next_len);
|
int32_t const next_code = utf_ptr2CharInfo_impl(next, (uintptr_t)next_len);
|
||||||
if (utf8proc_grapheme_break_stateful(prev_code, next_code, &state)
|
if (!utf_iscomposing(prev_code, next_code, &state)) {
|
||||||
&& !arabic_combine(prev_code, next_code)) {
|
|
||||||
return (StrCharInfo){
|
return (StrCharInfo){
|
||||||
.ptr = (char *)next,
|
.ptr = (char *)next,
|
||||||
.chr = (CharInfo){ .value = next_code, .len = (next_code < 0 ? 1 : next_len) },
|
.chr = (CharInfo){ .value = next_code, .len = (next_code < 0 ? 1 : next_len) },
|
||||||
|
@ -835,21 +835,29 @@ static void normal_get_additional_char(NormalState *s)
|
|||||||
// because if it's put back with vungetc() it's too late to apply
|
// because if it's put back with vungetc() it's too late to apply
|
||||||
// mapping.
|
// mapping.
|
||||||
no_mapping--;
|
no_mapping--;
|
||||||
|
GraphemeState state = GRAPHEME_STATE_INIT;
|
||||||
|
int prev_code = s->ca.nchar;
|
||||||
|
|
||||||
while ((s->c = vpeekc()) > 0
|
while ((s->c = vpeekc()) > 0
|
||||||
&& (s->c >= 0x100 || MB_BYTE2LEN(vpeekc()) > 1)) {
|
&& (s->c >= 0x100 || MB_BYTE2LEN(vpeekc()) > 1)) {
|
||||||
s->c = plain_vgetc();
|
s->c = plain_vgetc();
|
||||||
// TODO(bfredl): only allowing up to two composing chars is cringe af.
|
|
||||||
// Could reuse/abuse schar_T to at least allow us to input anything we are able
|
if (!utf_iscomposing(prev_code, s->c, &state)) {
|
||||||
// to display and use the stateful utf8proc algorithm like utf_composinglike
|
|
||||||
if (!utf_iscomposing_legacy(s->c)) {
|
|
||||||
vungetc(s->c); // it wasn't, put it back
|
vungetc(s->c); // it wasn't, put it back
|
||||||
break;
|
break;
|
||||||
} else if (s->ca.ncharC1 == 0) {
|
|
||||||
s->ca.ncharC1 = s->c;
|
|
||||||
} else {
|
|
||||||
s->ca.ncharC2 = s->c;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// first composing char, first put base char into buffer
|
||||||
|
if (s->ca.nchar_len == 0) {
|
||||||
|
s->ca.nchar_len = utf_char2bytes(s->ca.nchar, s->ca.nchar_composing);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->ca.nchar_len + utf_char2len(s->c) < (int)sizeof(s->ca.nchar_composing)) {
|
||||||
|
s->ca.nchar_len += utf_char2bytes(s->c, s->ca.nchar_composing + s->ca.nchar_len);
|
||||||
|
}
|
||||||
|
prev_code = s->c;
|
||||||
}
|
}
|
||||||
|
s->ca.nchar_composing[s->ca.nchar_len] = NUL;
|
||||||
no_mapping++;
|
no_mapping++;
|
||||||
// Vim may be in a different mode when the user types the next key,
|
// Vim may be in a different mode when the user types the next key,
|
||||||
// but when replaying a recording the next key is already in the
|
// but when replaying a recording the next key is already in the
|
||||||
@ -1735,7 +1743,12 @@ size_t find_ident_at_pos(win_T *wp, linenr_T lnum, colnr_T startcol, char **text
|
|||||||
static void prep_redo_cmd(cmdarg_T *cap)
|
static void prep_redo_cmd(cmdarg_T *cap)
|
||||||
{
|
{
|
||||||
prep_redo(cap->oap->regname, cap->count0,
|
prep_redo(cap->oap->regname, cap->count0,
|
||||||
NUL, cap->cmdchar, NUL, NUL, cap->nchar);
|
NUL, cap->cmdchar, NUL, NUL, NUL);
|
||||||
|
if (cap->nchar_len > 0) {
|
||||||
|
AppendToRedobuff(cap->nchar_composing);
|
||||||
|
} else {
|
||||||
|
AppendCharToRedobuff(cap->nchar);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Prepare for redo of any command.
|
/// Prepare for redo of any command.
|
||||||
@ -4548,17 +4561,15 @@ static void nv_replace(cmdarg_T *cap)
|
|||||||
// Give 'r' to edit(), to get the redo command right.
|
// Give 'r' to edit(), to get the redo command right.
|
||||||
invoke_edit(cap, true, 'r', false);
|
invoke_edit(cap, true, 'r', false);
|
||||||
} else {
|
} else {
|
||||||
prep_redo(cap->oap->regname, cap->count1,
|
prep_redo(cap->oap->regname, cap->count1, NUL, 'r', NUL, had_ctrl_v, 0);
|
||||||
NUL, 'r', NUL, had_ctrl_v, cap->nchar);
|
|
||||||
|
|
||||||
curbuf->b_op_start = curwin->w_cursor;
|
curbuf->b_op_start = curwin->w_cursor;
|
||||||
const int old_State = State;
|
const int old_State = State;
|
||||||
|
|
||||||
if (cap->ncharC1 != 0) {
|
if (cap->nchar_len > 0) {
|
||||||
AppendCharToRedobuff(cap->ncharC1);
|
AppendToRedobuff(cap->nchar_composing);
|
||||||
}
|
} else {
|
||||||
if (cap->ncharC2 != 0) {
|
AppendCharToRedobuff(cap->nchar);
|
||||||
AppendCharToRedobuff(cap->ncharC2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is slow, but it handles replacing a single-byte with a
|
// This is slow, but it handles replacing a single-byte with a
|
||||||
@ -4576,15 +4587,13 @@ static void nv_replace(cmdarg_T *cap)
|
|||||||
curwin->w_cursor.col++;
|
curwin->w_cursor.col++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ins_char(cap->nchar);
|
if (cap->nchar_len) {
|
||||||
|
ins_char_bytes(cap->nchar_composing, (size_t)cap->nchar_len);
|
||||||
|
} else {
|
||||||
|
ins_char(cap->nchar);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
State = old_State;
|
State = old_State;
|
||||||
if (cap->ncharC1 != 0) {
|
|
||||||
ins_char(cap->ncharC1);
|
|
||||||
}
|
|
||||||
if (cap->ncharC2 != 0) {
|
|
||||||
ins_char(cap->ncharC2);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
curwin->w_cursor.col--; // cursor on the last replaced char
|
curwin->w_cursor.col--; // cursor on the last replaced char
|
||||||
// if the character on the left of the current cursor is a multi-byte
|
// if the character on the left of the current cursor is a multi-byte
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
#include "nvim/pos_defs.h"
|
#include "nvim/pos_defs.h"
|
||||||
|
#include "nvim/types_defs.h"
|
||||||
|
|
||||||
/// Motion types, used for operators and for yank/delete registers.
|
/// Motion types, used for operators and for yank/delete registers.
|
||||||
///
|
///
|
||||||
@ -47,8 +48,8 @@ typedef struct {
|
|||||||
int prechar; ///< prefix character (optional, always 'g')
|
int prechar; ///< prefix character (optional, always 'g')
|
||||||
int cmdchar; ///< command character
|
int cmdchar; ///< command character
|
||||||
int nchar; ///< next command character (optional)
|
int nchar; ///< next command character (optional)
|
||||||
int ncharC1; ///< first composing character (optional)
|
char nchar_composing[MAX_SCHAR_SIZE]; ///< next char with composing chars (optional)
|
||||||
int ncharC2; ///< second composing character (optional)
|
int nchar_len; ///< len of nchar_composing (when zero, use nchar instead)
|
||||||
int extra_char; ///< yet another character (optional)
|
int extra_char; ///< yet another character (optional)
|
||||||
int opcount; ///< count before an operator
|
int opcount; ///< count before an operator
|
||||||
int count0; ///< count before command, default 0
|
int count0; ///< count before command, default 0
|
||||||
|
@ -113,7 +113,7 @@ static int last_idx = 0; // index in spats[] for RE_LAST
|
|||||||
static uint8_t lastc[2] = { NUL, NUL }; // last character searched for
|
static uint8_t lastc[2] = { NUL, NUL }; // last character searched for
|
||||||
static Direction lastcdir = FORWARD; // last direction of character search
|
static Direction lastcdir = FORWARD; // last direction of character search
|
||||||
static bool last_t_cmd = true; // last search t_cmd
|
static bool last_t_cmd = true; // last search t_cmd
|
||||||
static char lastc_bytes[MB_MAXBYTES + 1];
|
static char lastc_bytes[MAX_SCHAR_SIZE + 1];
|
||||||
static int lastc_bytelen = 1; // >1 for multi-byte char
|
static int lastc_bytelen = 1; // >1 for multi-byte char
|
||||||
|
|
||||||
// copy of spats[], for keeping the search patterns while executing autocmds
|
// copy of spats[], for keeping the search patterns while executing autocmds
|
||||||
@ -1550,14 +1550,11 @@ int searchc(cmdarg_T *cap, bool t_cmd)
|
|||||||
*lastc = (uint8_t)c;
|
*lastc = (uint8_t)c;
|
||||||
set_csearch_direction(dir);
|
set_csearch_direction(dir);
|
||||||
set_csearch_until(t_cmd);
|
set_csearch_until(t_cmd);
|
||||||
lastc_bytelen = utf_char2bytes(c, lastc_bytes);
|
if (cap->nchar_len) {
|
||||||
if (cap->ncharC1 != 0) {
|
lastc_bytelen = cap->nchar_len;
|
||||||
lastc_bytelen += utf_char2bytes(cap->ncharC1,
|
memcpy(lastc_bytes, cap->nchar_composing, (size_t)cap->nchar_len);
|
||||||
lastc_bytes + lastc_bytelen);
|
} else {
|
||||||
if (cap->ncharC2 != 0) {
|
lastc_bytelen = utf_char2bytes(c, lastc_bytes);
|
||||||
lastc_bytelen += utf_char2bytes(cap->ncharC2,
|
|
||||||
lastc_bytes + lastc_bytelen);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // repeat previous search
|
} else { // repeat previous search
|
||||||
|
@ -12,6 +12,10 @@ typedef int32_t sattr_T;
|
|||||||
// must be at least as big as the biggest of schar_T, sattr_T, colnr_T
|
// must be at least as big as the biggest of schar_T, sattr_T, colnr_T
|
||||||
typedef int32_t sscratch_T;
|
typedef int32_t sscratch_T;
|
||||||
|
|
||||||
|
// Includes final NUL. MAX_MCO is no longer used, but at least 4*(MAX_MCO+1)+1=29
|
||||||
|
// ensures we can fit all composed chars which did fit before.
|
||||||
|
#define MAX_SCHAR_SIZE 32
|
||||||
|
|
||||||
// Opaque handle used by API clients to refer to various objects in vim
|
// Opaque handle used by API clients to refer to various objects in vim
|
||||||
typedef int handle_T;
|
typedef int handle_T;
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ local feed = n.feed
|
|||||||
local fn = n.fn
|
local fn = n.fn
|
||||||
local command = n.command
|
local command = n.command
|
||||||
local eq = t.eq
|
local eq = t.eq
|
||||||
|
local api = n.api
|
||||||
|
|
||||||
describe('Normal mode', function()
|
describe('Normal mode', function()
|
||||||
before_each(clear)
|
before_each(clear)
|
||||||
@ -41,4 +42,23 @@ describe('Normal mode', function()
|
|||||||
attr_ids = {},
|
attr_ids = {},
|
||||||
})
|
})
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('replacing with ZWJ emoji sequences', function()
|
||||||
|
local screen = Screen.new(30, 8)
|
||||||
|
screen:attach()
|
||||||
|
api.nvim_buf_set_lines(0, 0, -1, true, { 'abcdefg' })
|
||||||
|
feed('05r🧑🌾') -- ZWJ
|
||||||
|
screen:expect([[
|
||||||
|
🧑🌾🧑🌾🧑🌾🧑🌾^🧑🌾fg |
|
||||||
|
{1:~ }|*6
|
||||||
|
|
|
||||||
|
]])
|
||||||
|
|
||||||
|
feed('2r🏳️⚧️') -- ZWJ and variant selectors
|
||||||
|
screen:expect([[
|
||||||
|
🧑🌾🧑🌾🧑🌾🧑🌾🏳️⚧️^🏳️⚧️g |
|
||||||
|
{1:~ }|*6
|
||||||
|
|
|
||||||
|
]])
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
Reference in New Issue
Block a user