Merge pull request #10513 from bfredl/bytecount

api/lua: add {byte_count} parameter to line region change event
This commit is contained in:
Björn Linse
2019-08-06 20:25:46 +02:00
committed by GitHub
10 changed files with 309 additions and 38 deletions

View File

@ -200,17 +200,26 @@ User reloads the buffer with ":edit", emits: >
nvim_buf_detach_event[{buf}]
*api-buffer-updates-lua*
In-process lua plugins can also recieve buffer updates, in the form of lua
In-process lua plugins can also receive buffer updates, in the form of lua
callbacks. These callbacks are called frequently in various contexts, buffer
contents or window layout should not be changed inside these |textlock|.
|vim.schedule| can be used to defer these operations to the main loop, where
they are allowed.
|nvim_buf_attach| will take keyword args for the callbacks. "on_lines" will
receive parameters ("lines", {buf}, {changedtick}, {firstline}, {lastline}, {new_lastline}).
Unlike remote channels the text contents are not passed. The new text can be
accessed inside the callback as
`vim.api.nvim_buf_get_lines(buf, firstline, new_lastline, true)`
receive parameters ("lines", {buf}, {changedtick}, {firstline}, {lastline},
{new_lastline}, {old_byte_size}[, {old_utf32_size}, {old_utf16_size}]).
Unlike remote channel events the text contents are not passed. The new text can
be accessed inside the callback as
`vim.api.nvim_buf_get_lines(buf, firstline, new_lastline, true)`
{old_byte_size} is the total size of the replaced region {firstline} to
{lastline} in bytes, including the final newline after {lastline}. if
`utf_sizes` is set to true in |nvim_buf_attach()| keyword args, then the
UTF-32 and UTF-16 sizes of the deleted region is also passed as additional
arguments {old_utf32_size} and {old_utf16_size}.
"on_changedtick" is invoked when |b:changedtick| was incremented but no text
was changed. The parameters recieved are ("changedtick", {buf}, {changedtick}).

View File

@ -109,9 +109,11 @@ String buffer_get_line(Buffer buffer, Integer index, Error *err)
/// `nvim_buf_lines_event`. Otherwise, the first notification will be
/// a `nvim_buf_changedtick_event`. Not used for lua callbacks.
/// @param opts Optional parameters.
/// `on_lines`: lua callback received on change.
/// `on_lines`: lua callback received on change.
/// `on_changedtick`: lua callback received on changedtick
/// increment without text change.
/// `utf_sizes`: include UTF-32 and UTF-16 size of
/// the replaced region.
/// See |api-buffer-updates-lua| for more information
/// @param[out] err Error details, if any
/// @return False when updates couldn't be enabled because the buffer isn't
@ -156,6 +158,12 @@ Boolean nvim_buf_attach(uint64_t channel_id,
}
cb.on_detach = v->data.luaref;
v->data.integer = LUA_NOREF;
} else if (is_lua && strequal("utf_sizes", k.data)) {
if (v->type != kObjectTypeBoolean) {
api_set_error(err, kErrorTypeValidation, "utf_sizes must be boolean");
goto error;
}
cb.utf_sizes = v->data.boolean;
} else {
api_set_error(err, kErrorTypeValidation, "unexpected key: %s", k.data);
goto error;
@ -1176,6 +1184,30 @@ free_exit:
return 0;
}
Dictionary nvim__buf_stats(Buffer buffer, Error *err)
{
Dictionary rv = ARRAY_DICT_INIT;
buf_T *buf = find_buffer_by_handle(buffer, err);
if (!buf) {
return rv;
}
// Number of times the cached line was flushed.
// This should generally not increase while editing the same
// line in the same mode.
PUT(rv, "flush_count", INTEGER_OBJ(buf->flush_count));
// lnum of current line
PUT(rv, "current_lnum", INTEGER_OBJ(buf->b_ml.ml_line_lnum));
// whether the line has unflushed changes.
PUT(rv, "line_dirty", BOOLEAN_OBJ(buf->b_ml.ml_flags & ML_LINE_DIRTY));
// NB: this should be zero at any time API functions are called,
// this exists to debug issues
PUT(rv, "dirty_bytes", INTEGER_OBJ((Integer)buf->deleted_bytes));
return rv;
}
// Check if deleting lines made the cursor position invalid.
// Changed lines from `lo` to `hi`; added `extra` lines (negative if deleted).
static void fix_cursor(linenr_T lo, linenr_T hi, linenr_T extra)

View File

@ -459,8 +459,9 @@ typedef struct {
LuaRef on_lines;
LuaRef on_changedtick;
LuaRef on_detach;
bool utf_sizes;
} BufUpdateCallbacks;
#define BUF_UPDATE_CALLBACKS_INIT { LUA_NOREF, LUA_NOREF, LUA_NOREF }
#define BUF_UPDATE_CALLBACKS_INIT { LUA_NOREF, LUA_NOREF, LUA_NOREF, false }
#define BUF_HAS_QF_ENTRY 1
#define BUF_HAS_LL_ENTRY 2
@ -802,11 +803,26 @@ struct file_buffer {
kvec_t(BufhlLine *) b_bufhl_move_space; // temporary space for highlights
// array of channelids which have asked to receive updates for this
// array of channel_id:s which have asked to receive updates for this
// buffer.
kvec_t(uint64_t) update_channels;
// array of lua callbacks for buffer updates.
kvec_t(BufUpdateCallbacks) update_callbacks;
// whether an update callback has requested codepoint size of deleted regions.
bool update_need_codepoints;
// Measurements of the deleted or replaced region since the last update
// event. Some consumers of buffer changes need to know the byte size (like
// tree-sitter) or the corresponding UTF-32/UTF-16 size (like LSP) of the
// deleted text.
size_t deleted_bytes;
size_t deleted_codepoints;
size_t deleted_codeunits;
// The number for times the current line has been flushed in the memline.
int flush_count;
int b_diff_failed; // internal diff failed for this buffer
};

View File

@ -26,6 +26,9 @@ bool buf_updates_register(buf_T *buf, uint64_t channel_id,
if (channel_id == LUA_INTERNAL_CALL) {
kv_push(buf->update_callbacks, cb);
if (cb.utf_sizes) {
buf->update_need_codepoints = true;
}
return true;
}
@ -169,6 +172,10 @@ void buf_updates_send_changes(buf_T *buf,
int64_t num_removed,
bool send_tick)
{
size_t deleted_codepoints, deleted_codeunits;
size_t deleted_bytes = ml_flush_deleted_bytes(buf, &deleted_codepoints,
&deleted_codeunits);
if (!buf_updates_active(buf)) {
return;
}
@ -231,8 +238,8 @@ void buf_updates_send_changes(buf_T *buf,
bool keep = true;
if (cb.on_lines != LUA_NOREF) {
Array args = ARRAY_DICT_INIT;
Object items[5];
args.size = 5;
Object items[8];
args.size = 6; // may be increased to 8 below
args.items = items;
// the first argument is always the buffer handle
@ -250,6 +257,13 @@ void buf_updates_send_changes(buf_T *buf,
// the last line in the updated range
args.items[4] = INTEGER_OBJ(firstline - 1 + num_added);
// byte count of previous contents
args.items[5] = INTEGER_OBJ((Integer)deleted_bytes);
if (cb.utf_sizes) {
args.size = 8;
args.items[6] = INTEGER_OBJ((Integer)deleted_codepoints);
args.items[7] = INTEGER_OBJ((Integer)deleted_codeunits);
}
textlock++;
Object res = executor_exec_lua_cb(cb.on_lines, "lines", args, true);
textlock--;

View File

@ -1755,6 +1755,9 @@ failed:
ml_delete(curbuf->b_ml.ml_line_count, false);
linecnt--;
}
curbuf->deleted_bytes = 0;
curbuf->deleted_codepoints = 0;
curbuf->deleted_codeunits = 0;
linecnt = curbuf->b_ml.ml_line_count - linecnt;
if (filesize == 0)
linecnt = 0;

View File

@ -627,6 +627,8 @@ EXTERN pos_T Insstart_orig;
EXTERN int orig_line_count INIT(= 0); /* Line count when "gR" started */
EXTERN int vr_lines_changed INIT(= 0); /* #Lines changed by "gR" so far */
// increase around internal delete/replace
EXTERN int inhibit_delete_count INIT(= 0);
/*
* These flags are set based upon 'fileencoding'.

View File

@ -1438,6 +1438,39 @@ int utf16_to_utf8(const wchar_t *strw, char **str)
#endif
/// Measure the length of a string in corresponding UTF-32 and UTF-16 units.
///
/// Invalid UTF-8 bytes, or embedded surrogates, count as one code point/unit
/// each.
///
/// The out parameters are incremented. This is used to measure the size of
/// a buffer region consisting of multiple line segments.
///
/// @param s the string
/// @param len maximum length (an earlier NUL terminates)
/// @param[out] codepoints incremented with UTF-32 code point size
/// @param[out] codeunits incremented with UTF-16 code unit size
void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
size_t *codeunits)
FUNC_ATTR_NONNULL_ALL
{
size_t count = 0, extra = 0;
size_t clen;
for (size_t i = 0; i < len && s[i] != NUL; i += clen) {
clen = utf_ptr2len_len(s+i, len-i);
// NB: gets the byte value of invalid sequence bytes.
// we only care whether the char fits in the BMP or not
int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
count++;
if (c > 0xFFFF) {
extra++;
}
}
*codepoints += count;
*codeunits += count + extra;
}
/*
* Version of strnicmp() that handles multi-byte characters.
* Needed for Big5, Shift-JIS and UTF-8 encoding. Other DBCS encodings can

View File

@ -2383,6 +2383,23 @@ static int ml_append_int(
return OK;
}
void ml_add_deleted_len(char_u *ptr, ssize_t len)
{
if (inhibit_delete_count) {
return;
}
if (len == -1) {
len = STRLEN(ptr);
}
curbuf->deleted_bytes += len+1;
if (curbuf->update_need_codepoints) {
mb_utflen(ptr, len, &curbuf->deleted_codepoints,
&curbuf->deleted_codeunits);
curbuf->deleted_codepoints++; // NL char
curbuf->deleted_codeunits++;
}
}
/*
* Replace line lnum, with buffering, in current buffer.
*
@ -2403,13 +2420,24 @@ int ml_replace(linenr_T lnum, char_u *line, bool copy)
if (curbuf->b_ml.ml_mfp == NULL && open_buffer(FALSE, NULL, 0) == FAIL)
return FAIL;
bool readlen = true;
if (copy) {
line = vim_strsave(line);
}
if (curbuf->b_ml.ml_line_lnum != lnum) /* other line buffered */
ml_flush_line(curbuf); /* flush it */
else if (curbuf->b_ml.ml_flags & ML_LINE_DIRTY) /* same line allocated */
xfree(curbuf->b_ml.ml_line_ptr); /* free it */
if (curbuf->b_ml.ml_line_lnum != lnum) { // other line buffered
ml_flush_line(curbuf); // flush it
} else if (curbuf->b_ml.ml_flags & ML_LINE_DIRTY) { // same line allocated
ml_add_deleted_len(curbuf->b_ml.ml_line_ptr, -1);
readlen = false; // already added the length
xfree(curbuf->b_ml.ml_line_ptr); // free it
}
if (readlen && kv_size(curbuf->update_callbacks)) {
ml_add_deleted_len(ml_get_buf(curbuf, lnum, false), -1);
}
curbuf->b_ml.ml_line_ptr = line;
curbuf->b_ml.ml_line_lnum = lnum;
curbuf->b_ml.ml_flags = (curbuf->b_ml.ml_flags | ML_LINE_DIRTY) & ~ML_EMPTY;
@ -2491,6 +2519,10 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message)
else
line_size = ((dp->db_index[idx - 1]) & DB_INDEX_MASK) - line_start;
// Line should always have an NL char internally (represented as NUL),
// even if 'noeol' is set.
assert(line_size >= 1);
ml_add_deleted_len((char_u *)dp + line_start, line_size-1);
/*
* special case: If there is only one line in the data block it becomes empty.
@ -2676,6 +2708,17 @@ void ml_clearmarked(void)
return;
}
size_t ml_flush_deleted_bytes(buf_T *buf, size_t *codepoints, size_t *codeunits)
{
size_t ret = buf->deleted_bytes;
*codepoints = buf->deleted_codepoints;
*codeunits = buf->deleted_codeunits;
buf->deleted_bytes = 0;
buf->deleted_codepoints = 0;
buf->deleted_codeunits = 0;
return ret;
}
/*
* flush ml_line if necessary
*/
@ -2704,6 +2747,8 @@ static void ml_flush_line(buf_T *buf)
return;
entered = TRUE;
buf->flush_count++;
lnum = buf->b_ml.ml_line_lnum;
new_line = buf->b_ml.ml_line_ptr;

View File

@ -780,6 +780,7 @@ open_line (
did_append = FALSE;
}
inhibit_delete_count++;
if (newindent
|| did_si
) {
@ -821,6 +822,7 @@ open_line (
did_si = false;
}
}
inhibit_delete_count--;
/*
* In REPLACE mode, for each character in the extra leader, there must be
@ -1685,6 +1687,7 @@ int del_bytes(colnr_T count, bool fixpos_arg, bool use_delcombine)
bool was_alloced = ml_line_alloced(); // check if oldp was allocated
char_u *newp;
if (was_alloced) {
ml_add_deleted_len(curbuf->b_ml.ml_line_ptr, oldlen);
newp = oldp; // use same allocated memory
} else { // need to allocate a new line
newp = xmalloc((size_t)(oldlen + 1 - count));

View File

@ -5,28 +5,31 @@ local command = helpers.command
local meths = helpers.meths
local clear = helpers.clear
local eq = helpers.eq
local exec_lua = helpers.exec_lua
local feed = helpers.feed
local origlines = {"original line 1",
"original line 2",
"original line 3",
"original line 4",
"original line 5",
"original line 6"}
"original line 6",
" indented line"}
describe('lua: buffer event callbacks', function()
before_each(function()
clear()
meths.execute_lua([[
exec_lua([[
local events = {}
function test_register(bufnr, id, changedtick)
function test_register(bufnr, id, changedtick, utf_sizes)
local function callback(...)
table.insert(events, {id, ...})
if test_unreg == id then
return true
end
end
local opts = {on_lines=callback, on_detach=callback}
local opts = {on_lines=callback, on_detach=callback, utf_sizes=utf_sizes}
if changedtick then
opts.on_changedtick = callback
end
@ -38,55 +41,166 @@ describe('lua: buffer event callbacks', function()
events = {}
return ret_events
end
]], {})
]])
end)
it('works', function()
-- verifying the sizes with nvim_buf_get_offset is nice (checks we cannot
-- assert the wrong thing), but masks errors with unflushed lines (as
-- nvim_buf_get_offset forces a flush of the memline). To be safe run the
-- test both ways.
local function check(verify,utf_sizes)
local lastsize
meths.buf_set_lines(0, 0, -1, true, origlines)
meths.execute_lua("return test_register(...)", {0, "test1"})
if verify then
lastsize = meths.buf_get_offset(0, meths.buf_line_count(0))
end
exec_lua("return test_register(...)", 0, "test1",false,utf_sizes)
local tick = meths.buf_get_changedtick(0)
local verify_name = "test1"
local function check_events(expected)
local events = exec_lua("return get_events(...)" )
if utf_sizes then
-- this test case uses ASCII only, so sizes sshould be the same.
-- Unicode is tested below.
for _, event in ipairs(expected) do
event[9] = event[8]
event[10] = event[8]
end
end
eq(expected, events)
if verify then
for _, event in ipairs(events) do
if event[1] == verify_name and event[2] == "lines" then
local startline, endline = event[5], event[7]
local newrange = meths.buf_get_offset(0, endline) - meths.buf_get_offset(0, startline)
local newsize = meths.buf_get_offset(0, meths.buf_line_count(0))
local oldrange = newrange + lastsize - newsize
eq(oldrange, event[8])
lastsize = newsize
end
end
end
end
command('set autoindent')
command('normal! GyyggP')
tick = tick + 1
eq({{ "test1", "lines", 1, tick, 0, 0, 1 }},
meths.execute_lua("return get_events(...)", {}))
check_events({{ "test1", "lines", 1, tick, 0, 0, 1, 0}})
meths.buf_set_lines(0, 3, 5, true, {"changed line"})
tick = tick + 1
eq({{ "test1", "lines", 1, tick, 3, 5, 4 }},
meths.execute_lua("return get_events(...)", {}))
check_events({{ "test1", "lines", 1, tick, 3, 5, 4, 32 }})
meths.execute_lua("return test_register(...)", {0, "test2", true})
exec_lua("return test_register(...)", 0, "test2", true, utf_sizes)
tick = tick + 1
command('undo')
-- plugins can opt in to receive changedtick events, or choose
-- to only recieve actual changes.
eq({{ "test1", "lines", 1, tick, 3, 4, 5 },
{ "test2", "lines", 1, tick, 3, 4, 5 },
{ "test2", "changedtick", 1, tick+1 } },
meths.execute_lua("return get_events(...)", {}))
check_events({{ "test1", "lines", 1, tick, 3, 4, 5, 13 },
{ "test2", "lines", 1, tick, 3, 4, 5, 13 },
{ "test2", "changedtick", 1, tick+1 } })
tick = tick + 1
-- simulate next callback returning true
meths.execute_lua("test_unreg = 'test1'", {})
exec_lua("test_unreg = 'test1'")
meths.buf_set_lines(0, 6, 7, true, {"x1","x2","x3"})
tick = tick + 1
-- plugins can opt in to receive changedtick events, or choose
-- to only recieve actual changes.
eq({{ "test1", "lines", 1, tick, 6, 7, 9 },
{ "test2", "lines", 1, tick, 6, 7, 9 }},
meths.execute_lua("return get_events(...)", {}))
check_events({{ "test1", "lines", 1, tick, 6, 7, 9, 16 },
{ "test2", "lines", 1, tick, 6, 7, 9, 16 }})
verify_name = "test2"
meths.buf_set_lines(0, 1, 1, true, {"added"})
tick = tick + 1
eq({{ "test2", "lines", 1, tick, 1, 1, 2 }},
meths.execute_lua("return get_events(...)", {}))
check_events({{ "test2", "lines", 1, tick, 1, 1, 2, 0 }})
feed('wix')
tick = tick + 1
check_events({{ "test2", "lines", 1, tick, 4, 5, 5, 16 }})
-- check hot path for multiple insert
feed('yz')
tick = tick + 1
check_events({{ "test2", "lines", 1, tick, 4, 5, 5, 17 }})
feed('<bs>')
tick = tick + 1
check_events({{ "test2", "lines", 1, tick, 4, 5, 5, 19 }})
feed('<esc>Go')
tick = tick + 1
check_events({{ "test2", "lines", 1, tick, 11, 11, 12, 0 }})
feed('x')
tick = tick + 1
check_events({{ "test2", "lines", 1, tick, 11, 12, 12, 5 }})
command('bwipe!')
eq({{ "test2", "detach", 1 }},
meths.execute_lua("return get_events(...)", {}))
check_events({{ "test2", "detach", 1 }})
end
it('works', function()
check(false)
end)
it('works with verify', function()
check(true)
end)
it('works with utf_sizes and ASCII text', function()
check(false,true)
end)
it('works with utf_sizes and unicode text', function()
local unicode_text = {"ascii text",
"latin text åäö",
"BMP text ɧ αλφά",
"BMP text 汉语 ↥↧",
"SMP 🤦 🦄🦃",
"combining å بِيَّة"}
meths.buf_set_lines(0, 0, -1, true, unicode_text)
feed('gg')
exec_lua("return test_register(...)", 0, "test1", false, true)
local tick = meths.buf_get_changedtick(0)
feed('dd')
tick = tick + 1
eq({{ "test1", "lines", 1, tick, 0, 1, 0, 11, 11, 11 }}, exec_lua("return get_events(...)" ))
feed('A<bs>')
tick = tick + 1
eq({{ "test1", "lines", 1, tick, 0, 1, 1, 18, 15, 15 }}, exec_lua("return get_events(...)" ))
feed('<esc>jylp')
tick = tick + 1
eq({{ "test1", "lines", 1, tick, 1, 2, 2, 21, 16, 16 }}, exec_lua("return get_events(...)" ))
feed('+eea<cr>')
tick = tick + 1
eq({{ "test1", "lines", 1, tick, 2, 3, 4, 23, 15, 15 }}, exec_lua("return get_events(...)" ))
feed('<esc>jdw')
tick = tick + 1
-- non-BMP chars count as 2 UTF-2 codeunits
eq({{ "test1", "lines", 1, tick, 4, 5, 5, 18, 9, 12 }}, exec_lua("return get_events(...)" ))
feed('+rx')
tick = tick + 1
-- count the individual codepoints of a composed character.
eq({{ "test1", "lines", 1, tick, 5, 6, 6, 27, 20, 20 }}, exec_lua("return get_events(...)" ))
feed('kJ')
tick = tick + 1
-- NB: this is inefficient (but not really wrong).
eq({{ "test1", "lines", 1, tick, 4, 5, 5, 14, 5, 8 },
{ "test1", "lines", 1, tick+1, 5, 6, 5, 27, 20, 20 }}, exec_lua("return get_events(...)" ))
end)
end)