feat(treesitter): add more metadata to language.inspect() (#32657)

Problem: No way to check the version of a treesitter parser.

Solution: Add version metadata (ABI 15 parsers only) as well as parser state count and supertype information (ABI 15) in `vim.treesitter.language.inspect()`. Also graduate the `abi_version` field, as this is now the official upstream name.

---------

Co-authored-by: Christian Clason <c.clason@uni-graz.at>
This commit is contained in:
Lewis Russell
2025-03-01 15:51:09 +00:00
committed by GitHub
parent 48e6147e64
commit ec8922978e
7 changed files with 104 additions and 36 deletions

View File

@ -409,6 +409,8 @@ TREESITTER
• |:InspectTree| now shows which nodes are missing. • |:InspectTree| now shows which nodes are missing.
• Bundled markdown highlight queries use `conceal_lines` metadata to conceal • Bundled markdown highlight queries use `conceal_lines` metadata to conceal
code block fence lines vertically. code block fence lines vertically.
• |vim.treesitter.language.inspect()| shows additional information, including
parser version for ABI 15 parsers.
TUI TUI

View File

@ -1192,14 +1192,17 @@ get_lang({filetype}) *vim.treesitter.language.get_lang()*
inspect({lang}) *vim.treesitter.language.inspect()* inspect({lang}) *vim.treesitter.language.inspect()*
Inspects the provided language. Inspects the provided language.
Inspecting provides some useful information on the language like node and Inspecting provides some useful information on the language like ABI
field names, ABI version, and whether the language came from a WASM version, parser state count (a measure of parser complexity), node and
module. field names, and whether the language came from a WASM module.
Node names are returned in a table mapping each node name to a `boolean` Node names are returned in a table mapping each node name to a `boolean`
indicating whether or not the node is named (i.e., not anonymous). indicating whether or not the node is named (i.e., not anonymous).
Anonymous nodes are surrounded with double quotes (`"`). Anonymous nodes are surrounded with double quotes (`"`).
For ABI 15 parsers, also show parser metadata (major, minor, patch
version) and a table of supertypes with their respective subtypes.
Parameters: ~ Parameters: ~
• {lang} (`string`) Language • {lang} (`string`) Language

View File

@ -22,10 +22,15 @@ error('Cannot require a meta file')
---@field patterns table<integer, (integer|string)[][]> ---@field patterns table<integer, (integer|string)[][]>
--- ---
---@class TSLangInfo ---@class TSLangInfo
---@field abi_version integer
---@field major_version? integer
---@field minor_version? integer
---@field patch_version? integer
---@field state_count integer
---@field fields string[] ---@field fields string[]
---@field symbols table<string,boolean> ---@field symbols table<string,boolean>
---@field supertypes table<string,string[]>
---@field _wasm boolean ---@field _wasm boolean
---@field _abi_version integer
--- @param lang string --- @param lang string
--- @return TSLangInfo --- @return TSLangInfo

View File

@ -35,7 +35,7 @@ function M.check()
else else
local lang = ts.language.inspect(parsername) local lang = ts.language.inspect(parsername)
health.ok( health.ok(
string.format('Parser: %-20s ABI: %d, path: %s', parsername, lang._abi_version, parser) string.format('Parser: %-20s ABI: %d, path: %s', parsername, lang.abi_version, parser)
) )
end end
end end

View File

@ -168,13 +168,17 @@ end
--- Inspects the provided language. --- Inspects the provided language.
--- ---
--- Inspecting provides some useful information on the language like node and field names, ABI --- Inspecting provides some useful information on the language like ABI version, parser state count
--- version, and whether the language came from a WASM module. --- (a measure of parser complexity), node and field names, and whether the language came from a
--- WASM module.
--- ---
--- Node names are returned in a table mapping each node name to a `boolean` indicating whether or --- Node names are returned in a table mapping each node name to a `boolean` indicating whether or
--- not the node is named (i.e., not anonymous). Anonymous nodes are surrounded with double quotes --- not the node is named (i.e., not anonymous). Anonymous nodes are surrounded with double quotes
--- (`"`). --- (`"`).
--- ---
--- For ABI 15 parsers, also show parser metadata (major, minor, patch version) and a table of
--- supertypes with their respective subtypes.
---
---@param lang string Language ---@param lang string Language
---@return TSLangInfo ---@return TSLangInfo
function M.inspect(lang) function M.inspect(lang)

View File

@ -263,45 +263,92 @@ int tslua_inspect_lang(lua_State *L)
lua_createtable(L, 0, 2); // [retval] lua_createtable(L, 0, 2); // [retval]
uint32_t nsymbols = ts_language_symbol_count(lang); { // Symbols
assert(nsymbols < INT_MAX); uint32_t nsymbols = ts_language_symbol_count(lang);
assert(nsymbols < INT_MAX);
lua_createtable(L, (int)(nsymbols - 1), 1); // [retval, symbols] lua_createtable(L, (int)(nsymbols - 1), 1); // [retval, symbols]
for (uint32_t i = 0; i < nsymbols; i++) { for (uint32_t i = 0; i < nsymbols; i++) {
TSSymbolType t = ts_language_symbol_type(lang, (TSSymbol)i); TSSymbolType t = ts_language_symbol_type(lang, (TSSymbol)i);
if (t == TSSymbolTypeAuxiliary) { if (t == TSSymbolTypeAuxiliary) {
// not used by the API // not used by the API
continue; continue;
} }
const char *name = ts_language_symbol_name(lang, (TSSymbol)i); const char *name = ts_language_symbol_name(lang, (TSSymbol)i);
bool named = t != TSSymbolTypeAnonymous; bool named = t != TSSymbolTypeAnonymous;
lua_pushboolean(L, named); // [retval, symbols, is_named] lua_pushboolean(L, named); // [retval, symbols, is_named]
if (!named) { if (!named) {
char buf[256]; char buf[256];
snprintf(buf, sizeof(buf), "\"%s\"", name); snprintf(buf, sizeof(buf), "\"%s\"", name);
lua_setfield(L, -2, buf); // [retval, symbols] lua_setfield(L, -2, buf); // [retval, symbols]
} else { } else {
lua_setfield(L, -2, name); // [retval, symbols] lua_setfield(L, -2, name); // [retval, symbols]
}
} }
lua_setfield(L, -2, "symbols"); // [retval]
} }
lua_setfield(L, -2, "symbols"); // [retval] { // Fields
uint32_t nfields = ts_language_field_count(lang);
lua_createtable(L, (int)nfields, 1); // [retval, fields]
// Field IDs go from 1 to nfields inclusive (extra index 0 maps to NULL)
for (uint32_t i = 1; i <= nfields; i++) {
lua_pushstring(L, ts_language_field_name_for_id(lang, (TSFieldId)i));
lua_rawseti(L, -2, (int)i); // [retval, fields]
}
uint32_t nfields = ts_language_field_count(lang); lua_setfield(L, -2, "fields"); // [retval]
lua_createtable(L, (int)nfields, 1); // [retval, fields]
// Field IDs go from 1 to nfields inclusive (extra index 0 maps to NULL)
for (uint32_t i = 1; i <= nfields; i++) {
lua_pushstring(L, ts_language_field_name_for_id(lang, (TSFieldId)i));
lua_rawseti(L, -2, (int)i); // [retval, fields]
} }
lua_setfield(L, -2, "fields"); // [retval]
lua_pushboolean(L, ts_language_is_wasm(lang)); lua_pushboolean(L, ts_language_is_wasm(lang));
lua_setfield(L, -2, "_wasm"); lua_setfield(L, -2, "_wasm");
lua_pushinteger(L, ts_language_abi_version(lang)); // [retval, version] lua_pushinteger(L, ts_language_abi_version(lang)); // [retval, version]
lua_setfield(L, -2, "_abi_version"); lua_setfield(L, -2, "abi_version");
{ // Metadata
const TSLanguageMetadata *meta = ts_language_metadata(lang);
if (meta != NULL) {
lua_createtable(L, 0, 3);
lua_pushinteger(L, meta->major_version);
lua_setfield(L, -2, "major_version");
lua_pushinteger(L, meta->minor_version);
lua_setfield(L, -2, "minor_version");
lua_pushinteger(L, meta->patch_version);
lua_setfield(L, -2, "patch_version");
lua_setfield(L, -2, "metadata");
}
}
lua_pushinteger(L, ts_language_state_count(lang));
lua_setfield(L, -2, "state_count");
{ // Supertypes
uint32_t nsupertypes;
const TSSymbol *supertypes = ts_language_supertypes(lang, &nsupertypes);
lua_createtable(L, 0, (int)nsupertypes); // [retval, supertypes]
for (uint32_t i = 0; i < nsupertypes; i++) {
const TSSymbol supertype = *(supertypes + i);
uint32_t nsubtypes;
const TSSymbol *subtypes = ts_language_subtypes(lang, supertype, &nsubtypes);
lua_createtable(L, (int)nsubtypes, 0);
for (uint32_t j = 1; j <= nsubtypes; j++) {
lua_pushstring(L, ts_language_symbol_name(lang, *(subtypes + j)));
lua_rawseti(L, -2, (int)j);
}
lua_setfield(L, -2, ts_language_symbol_name(lang, supertype));
}
lua_setfield(L, -2, "supertypes"); // [retval]
}
return 1; return 1;
} }

View File

@ -63,7 +63,14 @@ describe('treesitter language API', function()
return { keys, lang.fields, lang.symbols } return { keys, lang.fields, lang.symbols }
end)) end))
eq({ fields = true, symbols = true, _abi_version = true, _wasm = false }, keys) eq({
abi_version = true,
fields = true,
symbols = true,
state_count = true,
supertypes = true,
_wasm = false,
}, keys)
local fset = {} local fset = {}
for _, f in pairs(fields) do for _, f in pairs(fields) do