feat(treesitter): add more metadata to language.inspect() (#32657)

Problem: No way to check the version of a treesitter parser.

Solution: Add version metadata (ABI 15 parsers only) as well as parser state count and supertype information (ABI 15) in `vim.treesitter.language.inspect()`. Also graduate the `abi_version` field, as this is now the official upstream name.

---------

Co-authored-by: Christian Clason <c.clason@uni-graz.at>
This commit is contained in:
Lewis Russell
2025-03-01 15:51:09 +00:00
committed by GitHub
parent 48e6147e64
commit ec8922978e
7 changed files with 104 additions and 36 deletions

View File

@ -409,6 +409,8 @@ TREESITTER
• |:InspectTree| now shows which nodes are missing.
• Bundled markdown highlight queries use `conceal_lines` metadata to conceal
code block fence lines vertically.
• |vim.treesitter.language.inspect()| shows additional information, including
parser version for ABI 15 parsers.
TUI

View File

@ -1192,14 +1192,17 @@ get_lang({filetype}) *vim.treesitter.language.get_lang()*
inspect({lang}) *vim.treesitter.language.inspect()*
Inspects the provided language.
Inspecting provides some useful information on the language like node and
field names, ABI version, and whether the language came from a WASM
module.
Inspecting provides some useful information on the language like ABI
version, parser state count (a measure of parser complexity), node and
field names, and whether the language came from a WASM module.
Node names are returned in a table mapping each node name to a `boolean`
indicating whether or not the node is named (i.e., not anonymous).
Anonymous nodes are surrounded with double quotes (`"`).
For ABI 15 parsers, also show parser metadata (major, minor, patch
version) and a table of supertypes with their respective subtypes.
Parameters: ~
• {lang} (`string`) Language

View File

@ -22,10 +22,15 @@ error('Cannot require a meta file')
---@field patterns table<integer, (integer|string)[][]>
---
---@class TSLangInfo
---@field abi_version integer
---@field major_version? integer
---@field minor_version? integer
---@field patch_version? integer
---@field state_count integer
---@field fields string[]
---@field symbols table<string,boolean>
---@field supertypes table<string,string[]>
---@field _wasm boolean
---@field _abi_version integer
--- @param lang string
--- @return TSLangInfo

View File

@ -35,7 +35,7 @@ function M.check()
else
local lang = ts.language.inspect(parsername)
health.ok(
string.format('Parser: %-20s ABI: %d, path: %s', parsername, lang._abi_version, parser)
string.format('Parser: %-20s ABI: %d, path: %s', parsername, lang.abi_version, parser)
)
end
end

View File

@ -168,13 +168,17 @@ end
--- Inspects the provided language.
---
--- Inspecting provides some useful information on the language like node and field names, ABI
--- version, and whether the language came from a WASM module.
--- Inspecting provides some useful information on the language like ABI version, parser state count
--- (a measure of parser complexity), node and field names, and whether the language came from a
--- WASM module.
---
--- Node names are returned in a table mapping each node name to a `boolean` indicating whether or
--- not the node is named (i.e., not anonymous). Anonymous nodes are surrounded with double quotes
--- (`"`).
---
--- For ABI 15 parsers, also show parser metadata (major, minor, patch version) and a table of
--- supertypes with their respective subtypes.
---
---@param lang string Language
---@return TSLangInfo
function M.inspect(lang)

View File

@ -263,45 +263,92 @@ int tslua_inspect_lang(lua_State *L)
lua_createtable(L, 0, 2); // [retval]
uint32_t nsymbols = ts_language_symbol_count(lang);
assert(nsymbols < INT_MAX);
{ // Symbols
uint32_t nsymbols = ts_language_symbol_count(lang);
assert(nsymbols < INT_MAX);
lua_createtable(L, (int)(nsymbols - 1), 1); // [retval, symbols]
for (uint32_t i = 0; i < nsymbols; i++) {
TSSymbolType t = ts_language_symbol_type(lang, (TSSymbol)i);
if (t == TSSymbolTypeAuxiliary) {
// not used by the API
continue;
}
const char *name = ts_language_symbol_name(lang, (TSSymbol)i);
bool named = t != TSSymbolTypeAnonymous;
lua_pushboolean(L, named); // [retval, symbols, is_named]
if (!named) {
char buf[256];
snprintf(buf, sizeof(buf), "\"%s\"", name);
lua_setfield(L, -2, buf); // [retval, symbols]
} else {
lua_setfield(L, -2, name); // [retval, symbols]
lua_createtable(L, (int)(nsymbols - 1), 1); // [retval, symbols]
for (uint32_t i = 0; i < nsymbols; i++) {
TSSymbolType t = ts_language_symbol_type(lang, (TSSymbol)i);
if (t == TSSymbolTypeAuxiliary) {
// not used by the API
continue;
}
const char *name = ts_language_symbol_name(lang, (TSSymbol)i);
bool named = t != TSSymbolTypeAnonymous;
lua_pushboolean(L, named); // [retval, symbols, is_named]
if (!named) {
char buf[256];
snprintf(buf, sizeof(buf), "\"%s\"", name);
lua_setfield(L, -2, buf); // [retval, symbols]
} else {
lua_setfield(L, -2, name); // [retval, symbols]
}
}
lua_setfield(L, -2, "symbols"); // [retval]
}
lua_setfield(L, -2, "symbols"); // [retval]
{ // Fields
uint32_t nfields = ts_language_field_count(lang);
lua_createtable(L, (int)nfields, 1); // [retval, fields]
// Field IDs go from 1 to nfields inclusive (extra index 0 maps to NULL)
for (uint32_t i = 1; i <= nfields; i++) {
lua_pushstring(L, ts_language_field_name_for_id(lang, (TSFieldId)i));
lua_rawseti(L, -2, (int)i); // [retval, fields]
}
uint32_t nfields = ts_language_field_count(lang);
lua_createtable(L, (int)nfields, 1); // [retval, fields]
// Field IDs go from 1 to nfields inclusive (extra index 0 maps to NULL)
for (uint32_t i = 1; i <= nfields; i++) {
lua_pushstring(L, ts_language_field_name_for_id(lang, (TSFieldId)i));
lua_rawseti(L, -2, (int)i); // [retval, fields]
lua_setfield(L, -2, "fields"); // [retval]
}
lua_setfield(L, -2, "fields"); // [retval]
lua_pushboolean(L, ts_language_is_wasm(lang));
lua_setfield(L, -2, "_wasm");
lua_pushinteger(L, ts_language_abi_version(lang)); // [retval, version]
lua_setfield(L, -2, "_abi_version");
lua_setfield(L, -2, "abi_version");
{ // Metadata
const TSLanguageMetadata *meta = ts_language_metadata(lang);
if (meta != NULL) {
lua_createtable(L, 0, 3);
lua_pushinteger(L, meta->major_version);
lua_setfield(L, -2, "major_version");
lua_pushinteger(L, meta->minor_version);
lua_setfield(L, -2, "minor_version");
lua_pushinteger(L, meta->patch_version);
lua_setfield(L, -2, "patch_version");
lua_setfield(L, -2, "metadata");
}
}
lua_pushinteger(L, ts_language_state_count(lang));
lua_setfield(L, -2, "state_count");
{ // Supertypes
uint32_t nsupertypes;
const TSSymbol *supertypes = ts_language_supertypes(lang, &nsupertypes);
lua_createtable(L, 0, (int)nsupertypes); // [retval, supertypes]
for (uint32_t i = 0; i < nsupertypes; i++) {
const TSSymbol supertype = *(supertypes + i);
uint32_t nsubtypes;
const TSSymbol *subtypes = ts_language_subtypes(lang, supertype, &nsubtypes);
lua_createtable(L, (int)nsubtypes, 0);
for (uint32_t j = 1; j <= nsubtypes; j++) {
lua_pushstring(L, ts_language_symbol_name(lang, *(subtypes + j)));
lua_rawseti(L, -2, (int)j);
}
lua_setfield(L, -2, ts_language_symbol_name(lang, supertype));
}
lua_setfield(L, -2, "supertypes"); // [retval]
}
return 1;
}

View File

@ -63,7 +63,14 @@ describe('treesitter language API', function()
return { keys, lang.fields, lang.symbols }
end))
eq({ fields = true, symbols = true, _abi_version = true, _wasm = false }, keys)
eq({
abi_version = true,
fields = true,
symbols = true,
state_count = true,
supertypes = true,
_wasm = false,
}, keys)
local fset = {}
for _, f in pairs(fields) do