mirror of
https://github.com/neovim/neovim
synced 2025-07-16 01:01:49 +00:00
feat(stdlib): overload vim.str_byteindex, vim.str_utfindex #30735
PROBLEM: There are several limitations to vim.str_byteindex, vim.str_utfindex: 1. They throw given out-of-range indexes. An invalid (often user/lsp-provided) index doesn't feel exceptional and should be handled by the caller. `:help dev-error-patterns` suggests that `retval, errmsg` is the preferred way to handle this kind of failure. 2. They cannot accept an encoding. So LSP needs wrapper functions. #25272 3. The current signatures are not extensible. * Calling: The function currently uses a fairly opaque boolean value to indicate to identify the encoding. * Returns: The fact it can throw requires wrapping in pcall. 4. The current name doesn't follow suggestions in `:h dev-naming` and I think `get` would be suitable. SOLUTION: - Because these are performance-sensitive, don't introduce `opts`. - Introduce an "overload" that accepts `encoding:string` and `strict_indexing:bool` params. ```lua local col = vim.str_utfindex(line, encoding, [index, [no_out_of_range]]) ``` Support the old versions by dispatching on the type of argument 2, and deprecate that form. ```lua vim.str_utfindex(line) -- (utf-32 length, utf-16 length), deprecated vim.str_utfindex(line, index) -- (utf-32 index, utf-16 index), deprecated vim.str_utfindex(line, 'utf-16') -- utf-16 length vim.str_utfindex(line, 'utf-16', index) -- utf-16 index vim.str_utfindex(line, 'utf-16', math.huge) -- error: index out of range vim.str_utfindex(line, 'utf-16', math.huge, false) -- utf-16 length ```
This commit is contained in:
@ -68,6 +68,12 @@ vim.log = {
|
||||
},
|
||||
}
|
||||
|
||||
local utfs = {
|
||||
['utf-8'] = true,
|
||||
['utf-16'] = true,
|
||||
['utf-32'] = true,
|
||||
}
|
||||
|
||||
-- TODO(lewis6991): document that the signature is system({cmd}, [{opts},] {on_exit})
|
||||
--- Runs a system command or throws an error if {cmd} cannot be run.
|
||||
---
|
||||
@ -714,7 +720,127 @@ function vim._on_key(buf, typed_buf)
|
||||
end
|
||||
end
|
||||
|
||||
--- Generates a list of possible completions for the string.
|
||||
--- Convert UTF-32, UTF-16 or UTF-8 {index} to byte index.
|
||||
--- If {strict_indexing} is false
|
||||
--- then then an out of range index will return byte length
|
||||
--- instead of throwing an error.
|
||||
---
|
||||
--- Invalid UTF-8 and NUL is treated like in |vim.str_utfindex()|.
|
||||
--- An {index} in the middle of a UTF-16 sequence is rounded upwards to
|
||||
--- the end of that sequence.
|
||||
---@param s string
|
||||
---@param encoding "utf-8"|"utf-16"|"utf-32"
|
||||
---@param index integer
|
||||
---@param strict_indexing? boolean # default: true
|
||||
---@return integer
|
||||
function vim.str_byteindex(s, encoding, index, strict_indexing)
|
||||
if type(encoding) == 'number' then
|
||||
-- Legacy support for old API
|
||||
-- Parameters: ~
|
||||
-- • {str} (`string`)
|
||||
-- • {index} (`integer`)
|
||||
-- • {use_utf16} (`boolean?`)
|
||||
local old_index = encoding
|
||||
local use_utf16 = index or false
|
||||
return vim.__str_byteindex(s, old_index, use_utf16) or error('index out of range')
|
||||
end
|
||||
|
||||
vim.validate('s', s, 'string')
|
||||
vim.validate('index', index, 'number')
|
||||
|
||||
local len = #s
|
||||
|
||||
if index == 0 or len == 0 then
|
||||
return 0
|
||||
end
|
||||
|
||||
vim.validate('encoding', encoding, function(v)
|
||||
return utfs[v], 'invalid encoding'
|
||||
end)
|
||||
|
||||
vim.validate('strict_indexing', strict_indexing, 'boolean', true)
|
||||
if strict_indexing == nil then
|
||||
strict_indexing = true
|
||||
end
|
||||
|
||||
if encoding == 'utf-8' then
|
||||
if index > len then
|
||||
return strict_indexing and error('index out of range') or len
|
||||
end
|
||||
return index
|
||||
end
|
||||
return vim.__str_byteindex(s, index, encoding == 'utf-16')
|
||||
or strict_indexing and error('index out of range')
|
||||
or len
|
||||
end
|
||||
|
||||
--- Convert byte index to UTF-32, UTF-16 or UTF-8 indices. If {index} is not
|
||||
--- supplied, the length of the string is used. All indices are zero-based.
|
||||
---
|
||||
--- If {strict_indexing} is false then an out of range index will return string
|
||||
--- length instead of throwing an error.
|
||||
--- Invalid UTF-8 bytes, and embedded surrogates are counted as one code point
|
||||
--- each. An {index} in the middle of a UTF-8 sequence is rounded upwards to the end of
|
||||
--- that sequence.
|
||||
---@param s string
|
||||
---@param encoding "utf-8"|"utf-16"|"utf-32"
|
||||
---@param index? integer
|
||||
---@param strict_indexing? boolean # default: true
|
||||
---@return integer
|
||||
function vim.str_utfindex(s, encoding, index, strict_indexing)
|
||||
if encoding == nil or type(encoding) == 'number' then
|
||||
-- Legacy support for old API
|
||||
-- Parameters: ~
|
||||
-- • {str} (`string`)
|
||||
-- • {index} (`integer?`)
|
||||
local old_index = encoding
|
||||
local col32, col16 = vim.__str_utfindex(s, old_index) --[[@as integer,integer]]
|
||||
if not col32 or not col16 then
|
||||
error('index out of range')
|
||||
end
|
||||
-- Return (multiple): ~
|
||||
-- (`integer`) UTF-32 index
|
||||
-- (`integer`) UTF-16 index
|
||||
return col32, col16
|
||||
end
|
||||
|
||||
vim.validate('s', s, 'string')
|
||||
vim.validate('index', index, 'number', true)
|
||||
if not index then
|
||||
index = math.huge
|
||||
strict_indexing = false
|
||||
end
|
||||
|
||||
if index == 0 then
|
||||
return 0
|
||||
end
|
||||
|
||||
vim.validate('encoding', encoding, function(v)
|
||||
return utfs[v], 'invalid encoding'
|
||||
end)
|
||||
|
||||
vim.validate('strict_indexing', strict_indexing, 'boolean', true)
|
||||
if strict_indexing == nil then
|
||||
strict_indexing = true
|
||||
end
|
||||
|
||||
if encoding == 'utf-8' then
|
||||
local len = #s
|
||||
return index <= len and index or (strict_indexing and error('index out of range') or len)
|
||||
end
|
||||
local col32, col16 = vim.__str_utfindex(s, index) --[[@as integer?,integer?]]
|
||||
local col = encoding == 'utf-16' and col16 or col32
|
||||
if col then
|
||||
return col
|
||||
end
|
||||
if strict_indexing then
|
||||
error('index out of range')
|
||||
end
|
||||
local max32, max16 = vim.__str_utfindex(s)--[[@as integer integer]]
|
||||
return encoding == 'utf-16' and max16 or max32
|
||||
end
|
||||
|
||||
--- Generates a list of possible completions for the str
|
||||
--- String has the pattern.
|
||||
---
|
||||
--- 1. Can we get it to just return things in the global namespace with that name prefix
|
||||
|
Reference in New Issue
Block a user