feat(terminal): support grapheme clusters, including emoji

This commit is contained in:
bfredl
2024-12-18 14:49:38 +01:00
parent 9d9ee3476e
commit e3bfcf2fd4
11 changed files with 184 additions and 631 deletions

View File

@ -370,6 +370,9 @@ These existing features changed their behavior.
more emoji characters than before, including those encoded with multiple more emoji characters than before, including those encoded with multiple
emoji codepoints combined with ZWJ (zero width joiner) codepoints. emoji codepoints combined with ZWJ (zero width joiner) codepoints.
This also applies to :terminal output, where width of cells will be calculated
using the upgraded implementation.
• Custom highlights in 'rulerformat', 'statuscolumn', 'statusline', 'tabline', • Custom highlights in 'rulerformat', 'statuscolumn', 'statusline', 'tabline',
'winbar' and the number column (through |:sign-define| `numhl`) now combine 'winbar' and the number column (through |:sign-define| `numhl`) now combine
with their respective highlight groups, as opposed to |hl-Normal|. with their respective highlight groups, as opposed to |hl-Normal|.

View File

@ -65,6 +65,7 @@
#include "nvim/ex_docmd.h" #include "nvim/ex_docmd.h"
#include "nvim/getchar.h" #include "nvim/getchar.h"
#include "nvim/globals.h" #include "nvim/globals.h"
#include "nvim/grid.h"
#include "nvim/highlight.h" #include "nvim/highlight.h"
#include "nvim/highlight_defs.h" #include "nvim/highlight_defs.h"
#include "nvim/highlight_group.h" #include "nvim/highlight_group.h"
@ -1347,7 +1348,7 @@ static int term_sb_pop(int cols, VTermScreenCell *cells, void *data)
// copy to vterm state // copy to vterm state
memcpy(cells, sbrow->cells, sizeof(cells[0]) * cols_to_copy); memcpy(cells, sbrow->cells, sizeof(cells[0]) * cols_to_copy);
for (size_t col = cols_to_copy; col < (size_t)cols; col++) { for (size_t col = cols_to_copy; col < (size_t)cols; col++) {
cells[col].chars[0] = 0; cells[col].schar = 0;
cells[col].width = 1; cells[col].width = 1;
} }
@ -1857,12 +1858,8 @@ static void fetch_row(Terminal *term, int row, int end_col)
while (col < end_col) { while (col < end_col) {
VTermScreenCell cell; VTermScreenCell cell;
fetch_cell(term, row, col, &cell); fetch_cell(term, row, col, &cell);
if (cell.chars[0]) { if (cell.schar) {
int cell_len = 0; schar_get_adv(&ptr, cell.schar);
for (int i = 0; i < VTERM_MAX_CHARS_PER_CELL && cell.chars[i]; i++) {
cell_len += utf_char2bytes((int)cell.chars[i], ptr + cell_len);
}
ptr += cell_len;
line_len = (size_t)(ptr - term->textbuf); line_len = (size_t)(ptr - term->textbuf);
} else { } else {
*ptr++ = ' '; *ptr++ = ' ';
@ -1883,7 +1880,7 @@ static bool fetch_cell(Terminal *term, int row, int col, VTermScreenCell *cell)
} else { } else {
// fill the pointer with an empty cell // fill the pointer with an empty cell
*cell = (VTermScreenCell) { *cell = (VTermScreenCell) {
.chars = { 0 }, .schar = 0,
.width = 1, .width = 1,
}; };
return false; return false;

View File

@ -1,111 +0,0 @@
{ 0x1100, 0x115f },
{ 0x231a, 0x231b },
{ 0x2329, 0x232a },
{ 0x23e9, 0x23ec },
{ 0x23f0, 0x23f0 },
{ 0x23f3, 0x23f3 },
{ 0x25fd, 0x25fe },
{ 0x2614, 0x2615 },
{ 0x2648, 0x2653 },
{ 0x267f, 0x267f },
{ 0x2693, 0x2693 },
{ 0x26a1, 0x26a1 },
{ 0x26aa, 0x26ab },
{ 0x26bd, 0x26be },
{ 0x26c4, 0x26c5 },
{ 0x26ce, 0x26ce },
{ 0x26d4, 0x26d4 },
{ 0x26ea, 0x26ea },
{ 0x26f2, 0x26f3 },
{ 0x26f5, 0x26f5 },
{ 0x26fa, 0x26fa },
{ 0x26fd, 0x26fd },
{ 0x2705, 0x2705 },
{ 0x270a, 0x270b },
{ 0x2728, 0x2728 },
{ 0x274c, 0x274c },
{ 0x274e, 0x274e },
{ 0x2753, 0x2755 },
{ 0x2757, 0x2757 },
{ 0x2795, 0x2797 },
{ 0x27b0, 0x27b0 },
{ 0x27bf, 0x27bf },
{ 0x2b1b, 0x2b1c },
{ 0x2b50, 0x2b50 },
{ 0x2b55, 0x2b55 },
{ 0x2e80, 0x2e99 },
{ 0x2e9b, 0x2ef3 },
{ 0x2f00, 0x2fd5 },
{ 0x2ff0, 0x2ffb },
{ 0x3000, 0x303e },
{ 0x3041, 0x3096 },
{ 0x3099, 0x30ff },
{ 0x3105, 0x312f },
{ 0x3131, 0x318e },
{ 0x3190, 0x31ba },
{ 0x31c0, 0x31e3 },
{ 0x31f0, 0x321e },
{ 0x3220, 0x3247 },
{ 0x3250, 0x4dbf },
{ 0x4e00, 0xa48c },
{ 0xa490, 0xa4c6 },
{ 0xa960, 0xa97c },
{ 0xac00, 0xd7a3 },
{ 0xf900, 0xfaff },
{ 0xfe10, 0xfe19 },
{ 0xfe30, 0xfe52 },
{ 0xfe54, 0xfe66 },
{ 0xfe68, 0xfe6b },
{ 0xff01, 0xff60 },
{ 0xffe0, 0xffe6 },
{ 0x16fe0, 0x16fe3 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18af2 },
{ 0x1b000, 0x1b11e },
{ 0x1b150, 0x1b152 },
{ 0x1b164, 0x1b167 },
{ 0x1b170, 0x1b2fb },
{ 0x1f004, 0x1f004 },
{ 0x1f0cf, 0x1f0cf },
{ 0x1f18e, 0x1f18e },
{ 0x1f191, 0x1f19a },
{ 0x1f200, 0x1f202 },
{ 0x1f210, 0x1f23b },
{ 0x1f240, 0x1f248 },
{ 0x1f250, 0x1f251 },
{ 0x1f260, 0x1f265 },
{ 0x1f300, 0x1f320 },
{ 0x1f32d, 0x1f335 },
{ 0x1f337, 0x1f37c },
{ 0x1f37e, 0x1f393 },
{ 0x1f3a0, 0x1f3ca },
{ 0x1f3cf, 0x1f3d3 },
{ 0x1f3e0, 0x1f3f0 },
{ 0x1f3f4, 0x1f3f4 },
{ 0x1f3f8, 0x1f43e },
{ 0x1f440, 0x1f440 },
{ 0x1f442, 0x1f4fc },
{ 0x1f4ff, 0x1f53d },
{ 0x1f54b, 0x1f54e },
{ 0x1f550, 0x1f567 },
{ 0x1f57a, 0x1f57a },
{ 0x1f595, 0x1f596 },
{ 0x1f5a4, 0x1f5a4 },
{ 0x1f5fb, 0x1f64f },
{ 0x1f680, 0x1f6c5 },
{ 0x1f6cc, 0x1f6cc },
{ 0x1f6d0, 0x1f6d2 },
{ 0x1f6d5, 0x1f6d5 },
{ 0x1f6eb, 0x1f6ec },
{ 0x1f6f4, 0x1f6fa },
{ 0x1f7e0, 0x1f7eb },
{ 0x1f90d, 0x1f971 },
{ 0x1f973, 0x1f976 },
{ 0x1f97a, 0x1f9a2 },
{ 0x1f9a5, 0x1f9aa },
{ 0x1f9ae, 0x1f9ca },
{ 0x1f9cd, 0x1f9ff },
{ 0x1fa70, 0x1fa73 },
{ 0x1fa78, 0x1fa7a },
{ 0x1fa80, 0x1fa82 },
{ 0x1fa90, 0x1fa95 },

View File

@ -2,6 +2,7 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "nvim/grid.h"
#include "nvim/mbyte.h" #include "nvim/mbyte.h"
#include "nvim/tui/termkey/termkey.h" #include "nvim/tui/termkey/termkey.h"
@ -41,7 +42,7 @@ typedef struct
/* Internal representation of a screen cell */ /* Internal representation of a screen cell */
typedef struct typedef struct
{ {
uint32_t chars[VTERM_MAX_CHARS_PER_CELL]; schar_T schar;
ScreenPen pen; ScreenPen pen;
} ScreenCell; } ScreenCell;
@ -79,7 +80,7 @@ struct VTermScreen
static inline void clearcell(const VTermScreen *screen, ScreenCell *cell) static inline void clearcell(const VTermScreen *screen, ScreenCell *cell)
{ {
cell->chars[0] = 0; cell->schar = 0;
cell->pen = screen->pen; cell->pen = screen->pen;
} }
@ -182,16 +183,13 @@ static int putglyph(VTermGlyphInfo *info, VTermPos pos, void *user)
if(!cell) if(!cell)
return 0; return 0;
int i; cell->schar = info->schar;
for(i = 0; i < VTERM_MAX_CHARS_PER_CELL && info->chars[i]; i++) { if (info->schar != 0) {
cell->chars[i] = info->chars[i];
cell->pen = screen->pen; cell->pen = screen->pen;
} }
if(i < VTERM_MAX_CHARS_PER_CELL)
cell->chars[i] = 0;
for(int col = 1; col < info->width; col++) for(int col = 1; col < info->width; col++)
getcell(screen, pos.row, pos.col + col)->chars[0] = (uint32_t)-1; getcell(screen, pos.row, pos.col + col)->schar = (uint32_t)-1;
VTermRect rect = { VTermRect rect = {
.start_row = pos.row, .start_row = pos.row,
@ -284,7 +282,7 @@ static int erase_internal(VTermRect rect, int selective, void *user)
if(selective && cell->pen.protected_cell) if(selective && cell->pen.protected_cell)
continue; continue;
cell->chars[0] = 0; cell->schar = 0;
cell->pen = (ScreenPen){ cell->pen = (ScreenPen){
/* Only copy .fg and .bg; leave things like rv in reset state */ /* Only copy .fg and .bg; leave things like rv in reset state */
.fg = screen->pen.fg, .fg = screen->pen.fg,
@ -504,7 +502,7 @@ static int bell(void *user)
static int line_popcount(ScreenCell *buffer, int row, int rows, int cols) static int line_popcount(ScreenCell *buffer, int row, int rows, int cols)
{ {
int col = cols - 1; int col = cols - 1;
while(col >= 0 && buffer[row * cols + col].chars[0] == 0) while(col >= 0 && buffer[row * cols + col].schar == 0)
col--; col--;
return col + 1; return col + 1;
} }
@ -690,11 +688,7 @@ static void resize_buffer(VTermScreen *screen, int bufidx, int new_rows, int new
VTermScreenCell *src = &screen->sb_buffer[pos.col]; VTermScreenCell *src = &screen->sb_buffer[pos.col];
ScreenCell *dst = &new_buffer[pos.row * new_cols + pos.col]; ScreenCell *dst = &new_buffer[pos.row * new_cols + pos.col];
for(int i = 0; i < VTERM_MAX_CHARS_PER_CELL; i++) { dst->schar = src->schar;
dst->chars[i] = src->chars[i];
if(!src->chars[i])
break;
}
dst->pen.bold = src->attrs.bold; dst->pen.bold = src->attrs.bold;
dst->pen.underline = src->attrs.underline; dst->pen.underline = src->attrs.underline;
@ -713,7 +707,7 @@ static void resize_buffer(VTermScreen *screen, int bufidx, int new_rows, int new
dst->pen.uri = src->uri; dst->pen.uri = src->uri;
if(src->width == 2 && pos.col < (new_cols-1)) if(src->width == 2 && pos.col < (new_cols-1))
(dst + 1)->chars[0] = (uint32_t) -1; (dst + 1)->schar = (uint32_t) -1;
} }
for( ; pos.col < new_cols; pos.col++) for( ; pos.col < new_cols; pos.col++)
clearcell(screen, &new_buffer[pos.row * new_cols + pos.col]); clearcell(screen, &new_buffer[pos.row * new_cols + pos.col]);
@ -914,49 +908,41 @@ void vterm_screen_reset(VTermScreen *screen, int hard)
vterm_screen_flush_damage(screen); vterm_screen_flush_damage(screen);
} }
static size_t _get_chars(const VTermScreen *screen, const int utf8, void *buffer, size_t len, const VTermRect rect) size_t vterm_screen_get_text(const VTermScreen *screen, char *buffer, size_t len, const VTermRect rect)
{ {
size_t outpos = 0; size_t outpos = 0;
int padding = 0; int padding = 0;
#define PUT(c) \ #define PUT(bytes, thislen) \
if(utf8) { \ if(true) { \
size_t thislen = utf_char2len(c); \
if(buffer && outpos + thislen <= len) \ if(buffer && outpos + thislen <= len) \
outpos += fill_utf8((c), (char *)buffer + outpos); \ memcpy((char *)buffer + outpos, bytes, thislen); \
else \ outpos += thislen; \
outpos += thislen; \
} \ } \
else { \
if(buffer && outpos + 1 <= len) \
((uint32_t*)buffer)[outpos++] = (c); \
else \
outpos++; \
}
for(int row = rect.start_row; row < rect.end_row; row++) { for(int row = rect.start_row; row < rect.end_row; row++) {
for(int col = rect.start_col; col < rect.end_col; col++) { for(int col = rect.start_col; col < rect.end_col; col++) {
ScreenCell *cell = getcell(screen, row, col); ScreenCell *cell = getcell(screen, row, col);
if(cell->chars[0] == 0) if(cell->schar == 0)
// Erased cell, might need a space // Erased cell, might need a space
padding++; padding++;
else if(cell->chars[0] == (uint32_t)-1) else if(cell->schar == (uint32_t)-1)
// Gap behind a double-width char, do nothing // Gap behind a double-width char, do nothing
; ;
else { else {
while(padding) { while(padding) {
PUT(UNICODE_SPACE); PUT(" ", 1);
padding--; padding--;
} }
for(int i = 0; i < VTERM_MAX_CHARS_PER_CELL && cell->chars[i]; i++) { char buf[MAX_SCHAR_SIZE + 1];
PUT(cell->chars[i]); size_t thislen = schar_get(buf, cell->schar);
} PUT(buf, thislen);
} }
} }
if(row < rect.end_row - 1) { if(row < rect.end_row - 1) {
PUT(UNICODE_LINEFEED); PUT("\n", 1);
padding = 0; padding = 0;
} }
} }
@ -964,16 +950,6 @@ static size_t _get_chars(const VTermScreen *screen, const int utf8, void *buffer
return outpos; return outpos;
} }
size_t vterm_screen_get_chars(const VTermScreen *screen, uint32_t *chars, size_t len, const VTermRect rect)
{
return _get_chars(screen, 0, chars, len, rect);
}
size_t vterm_screen_get_text(const VTermScreen *screen, char *str, size_t len, const VTermRect rect)
{
return _get_chars(screen, 1, str, len, rect);
}
/* Copy internal to external representation of a screen cell */ /* Copy internal to external representation of a screen cell */
int vterm_screen_get_cell(const VTermScreen *screen, VTermPos pos, VTermScreenCell *cell) int vterm_screen_get_cell(const VTermScreen *screen, VTermPos pos, VTermScreenCell *cell)
{ {
@ -981,11 +957,7 @@ int vterm_screen_get_cell(const VTermScreen *screen, VTermPos pos, VTermScreenCe
if(!intcell) if(!intcell)
return 0; return 0;
for(int i = 0; i < VTERM_MAX_CHARS_PER_CELL; i++) { cell->schar = intcell->schar;
cell->chars[i] = intcell->chars[i];
if(!intcell->chars[i])
break;
}
cell->attrs.bold = intcell->pen.bold; cell->attrs.bold = intcell->pen.bold;
cell->attrs.underline = intcell->pen.underline; cell->attrs.underline = intcell->pen.underline;
@ -1007,7 +979,7 @@ int vterm_screen_get_cell(const VTermScreen *screen, VTermPos pos, VTermScreenCe
cell->uri = intcell->pen.uri; cell->uri = intcell->pen.uri;
if(pos.col < (screen->cols - 1) && if(pos.col < (screen->cols - 1) &&
getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) getcell(screen, pos.row, pos.col + 1)->schar == (uint32_t)-1)
cell->width = 2; cell->width = 2;
else else
cell->width = 1; cell->width = 1;
@ -1020,7 +992,7 @@ int vterm_screen_is_eol(const VTermScreen *screen, VTermPos pos)
/* This cell is EOL if this and every cell to the right is black */ /* This cell is EOL if this and every cell to the right is black */
for(; pos.col < screen->cols; pos.col++) { for(; pos.col < screen->cols; pos.col++) {
ScreenCell *cell = getcell(screen, pos.row, pos.col); ScreenCell *cell = getcell(screen, pos.row, pos.col);
if(cell->chars[0] != 0) if(cell->schar != 0)
return 0; return 0;
} }

View File

@ -3,6 +3,9 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "nvim/grid.h"
#include "nvim/mbyte.h"
#define strneq(a,b,n) (strncmp(a,b,n)==0) #define strneq(a,b,n) (strncmp(a,b,n)==0)
#if defined(DEBUG) && DEBUG > 1 #if defined(DEBUG) && DEBUG > 1
@ -11,10 +14,10 @@
/* Some convenient wrappers to make callback functions easier */ /* Some convenient wrappers to make callback functions easier */
static void putglyph(VTermState *state, const uint32_t chars[], int width, VTermPos pos) static void putglyph(VTermState *state, const schar_T schar, int width, VTermPos pos)
{ {
VTermGlyphInfo info = { VTermGlyphInfo info = {
.chars = chars, .schar = schar,
.width = width, .width = width,
.protected_cell = state->protected_cell, .protected_cell = state->protected_cell,
.dwl = state->lineinfo[pos.row].doublewidth, .dwl = state->lineinfo[pos.row].doublewidth,
@ -82,8 +85,7 @@ static VTermState *vterm_state_new(VTerm *vt)
state->bold_is_highbright = 0; state->bold_is_highbright = 0;
state->combine_chars_size = 16; state->combine_pos.row = -1;
state->combine_chars = vterm_allocator_malloc(state->vt, state->combine_chars_size * sizeof(state->combine_chars[0]));
state->tabstops = vterm_allocator_malloc(state->vt, (state->cols + 7) / 8); state->tabstops = vterm_allocator_malloc(state->vt, (state->cols + 7) / 8);
@ -105,7 +107,6 @@ INTERNAL void vterm_state_free(VTermState *state)
vterm_allocator_free(state->vt, state->lineinfos[BUFIDX_PRIMARY]); vterm_allocator_free(state->vt, state->lineinfos[BUFIDX_PRIMARY]);
if(state->lineinfos[BUFIDX_ALTSCREEN]) if(state->lineinfos[BUFIDX_ALTSCREEN])
vterm_allocator_free(state->vt, state->lineinfos[BUFIDX_ALTSCREEN]); vterm_allocator_free(state->vt, state->lineinfos[BUFIDX_ALTSCREEN]);
vterm_allocator_free(state->vt, state->combine_chars);
vterm_allocator_free(state->vt, state); vterm_allocator_free(state->vt, state);
} }
@ -171,19 +172,6 @@ static void linefeed(VTermState *state)
state->pos.row++; state->pos.row++;
} }
static void grow_combine_buffer(VTermState *state)
{
size_t new_size = state->combine_chars_size * 2;
uint32_t *new_chars = vterm_allocator_malloc(state->vt, new_size * sizeof(new_chars[0]));
memcpy(new_chars, state->combine_chars, state->combine_chars_size * sizeof(new_chars[0]));
vterm_allocator_free(state->vt, state->combine_chars);
state->combine_chars = new_chars;
state->combine_chars_size = new_size;
}
static void set_col_tabstop(VTermState *state, int col) static void set_col_tabstop(VTermState *state, int col)
{ {
unsigned char mask = 1 << (col & 7); unsigned char mask = 1 << (col & 7);
@ -301,88 +289,35 @@ static int on_text(const char bytes[], size_t len, void *user)
state->gsingle_set = 0; state->gsingle_set = 0;
int i = 0; int i = 0;
GraphemeState grapheme_state = GRAPHEME_STATE_INIT;
size_t grapheme_len = 0;
bool recombine = false;
/* See if the cursor has moved since */
if(state->pos.row == state->combine_pos.row && state->pos.col == state->combine_pos.col + state->combine_width) {
/* This is a combining char. that needs to be merged with the previous /* This is a combining char. that needs to be merged with the previous
* glyph output */ * glyph output */
if(vterm_unicode_is_combining(codepoints[i])) { if(utf_iscomposing(state->grapheme_last, codepoints[i], &state->grapheme_state)) {
/* See if the cursor has moved since */
if(state->pos.row == state->combine_pos.row && state->pos.col == state->combine_pos.col + state->combine_width) {
#ifdef DEBUG_GLYPH_COMBINE
int printpos;
printf("DEBUG: COMBINING SPLIT GLYPH of chars {");
for(printpos = 0; state->combine_chars[printpos]; printpos++)
printf("U+%04x ", state->combine_chars[printpos]);
printf("} + {");
#endif
/* Find where we need to append these combining chars */ /* Find where we need to append these combining chars */
int saved_i = 0; grapheme_len = state->grapheme_len;
while(state->combine_chars[saved_i]) grapheme_state = state->grapheme_state;
saved_i++; state->pos.col = state->combine_pos.col;
recombine = true;
/* Add extra ones */ } else {
while(i < npoints && vterm_unicode_is_combining(codepoints[i])) {
if(saved_i >= state->combine_chars_size)
grow_combine_buffer(state);
state->combine_chars[saved_i++] = codepoints[i++];
}
if(saved_i >= state->combine_chars_size)
grow_combine_buffer(state);
state->combine_chars[saved_i] = 0;
#ifdef DEBUG_GLYPH_COMBINE
for(; state->combine_chars[printpos]; printpos++)
printf("U+%04x ", state->combine_chars[printpos]);
printf("}\n");
#endif
/* Now render it */
putglyph(state, state->combine_chars, state->combine_width, state->combine_pos);
}
else {
DEBUG_LOG("libvterm: TODO: Skip over split char+combining\n"); DEBUG_LOG("libvterm: TODO: Skip over split char+combining\n");
} }
} }
for(; i < npoints; i++) { while(i < npoints) {
// Try to find combining characters following this // Try to find combining characters following this
int glyph_starts = i; do {
int glyph_ends; if (grapheme_len < sizeof(state->grapheme_buf) - 4) {
for(glyph_ends = i + 1; grapheme_len += utf_char2bytes(codepoints[i], state->grapheme_buf + grapheme_len);
(glyph_ends < npoints) && (glyph_ends < glyph_starts + VTERM_MAX_CHARS_PER_CELL);
glyph_ends++)
if(!vterm_unicode_is_combining(codepoints[glyph_ends]))
break;
int width = 0;
uint32_t chars[VTERM_MAX_CHARS_PER_CELL + 1];
for( ; i < glyph_ends; i++) {
chars[i - glyph_starts] = codepoints[i];
int this_width = vterm_unicode_width(codepoints[i]);
#ifdef DEBUG
if(this_width < 0) {
fprintf(stderr, "Text with negative-width codepoint U+%04x\n", codepoints[i]);
abort();
} }
#endif
width += this_width;
}
while(i < npoints && vterm_unicode_is_combining(codepoints[i]))
i++; i++;
} while(i < npoints && utf_iscomposing(codepoints[i-1], codepoints[i], &grapheme_state));
chars[glyph_ends - glyph_starts] = 0; int width = utf_ptr2cells_len(state->grapheme_buf, grapheme_len);
i--;
#ifdef DEBUG_GLYPH_COMBINE
int printpos;
printf("DEBUG: COMBINED GLYPH of %d chars {", glyph_ends - glyph_starts);
for(printpos = 0; printpos < glyph_ends - glyph_starts; printpos++)
printf("U+%04x ", chars[printpos]);
printf("}, onscreen width %d\n", width);
#endif
if(state->at_phantom || state->pos.col + width > THISROWWIDTH(state)) { if(state->at_phantom || state->pos.col + width > THISROWWIDTH(state)) {
linefeed(state); linefeed(state);
@ -391,7 +326,7 @@ static int on_text(const char bytes[], size_t len, void *user)
state->lineinfo[state->pos.row].continuation = 1; state->lineinfo[state->pos.row].continuation = 1;
} }
if(state->mode.insert) { if(state->mode.insert && !recombine) {
/* TODO: This will be a little inefficient for large bodies of text, as /* TODO: This will be a little inefficient for large bodies of text, as
* it'll have to 'ICH' effectively before every glyph. We should scan * it'll have to 'ICH' effectively before every glyph. We should scan
* ahead and ICH as many times as required * ahead and ICH as many times as required
@ -405,22 +340,20 @@ static int on_text(const char bytes[], size_t len, void *user)
scroll(state, rect, 0, -1); scroll(state, rect, 0, -1);
} }
putglyph(state, chars, width, state->pos); schar_T sc = schar_from_buf(state->grapheme_buf, grapheme_len);
putglyph(state, sc, width, state->pos);
if(i == npoints - 1) { if(i == npoints) {
/* End of the buffer. Save the chars in case we have to combine with /* End of the buffer. Save the chars in case we have to combine with
* more on the next call */ * more on the next call */
int save_i; state->grapheme_len = grapheme_len;
for(save_i = 0; chars[save_i]; save_i++) { state->grapheme_last = codepoints[i-1];
if(save_i >= state->combine_chars_size) state->grapheme_state = grapheme_state;
grow_combine_buffer(state);
state->combine_chars[save_i] = chars[save_i];
}
if(save_i >= state->combine_chars_size)
grow_combine_buffer(state);
state->combine_chars[save_i] = 0;
state->combine_width = width; state->combine_width = width;
state->combine_pos = state->pos; state->combine_pos = state->pos;
} else {
grapheme_len = 0;
recombine = false;
} }
if(state->pos.col + width >= THISROWWIDTH(state)) { if(state->pos.col + width >= THISROWWIDTH(state)) {
@ -646,7 +579,7 @@ static int on_escape(const char *bytes, size_t len, void *user)
case '8': // DECALN case '8': // DECALN
{ {
VTermPos pos; VTermPos pos;
uint32_t E[] = { 'E', 0 }; schar_T E = schar_from_ascii('E'); // E
for(pos.row = 0; pos.row < state->rows; pos.row++) for(pos.row = 0; pos.row < state->rows; pos.row++)
for(pos.col = 0; pos.col < ROWWIDTH(state, pos.row); pos.col++) for(pos.col = 0; pos.col < ROWWIDTH(state, pos.row); pos.col++)
putglyph(state, E, 1, pos); putglyph(state, E, 1, pos);
@ -1234,8 +1167,9 @@ static int on_csi(const char *leader, const long args[], int argcount, const cha
count = CSI_ARG_COUNT(args[0]); count = CSI_ARG_COUNT(args[0]);
col = state->pos.col + count; col = state->pos.col + count;
UBOUND(col, row_width); UBOUND(col, row_width);
schar_T sc = schar_from_buf(state->grapheme_buf, state->grapheme_len);
while (state->pos.col < col) { while (state->pos.col < col) {
putglyph(state, state->combine_chars, state->combine_width, state->pos); putglyph(state, sc, state->combine_width, state->pos);
state->pos.col += state->combine_width; state->pos.col += state->combine_width;
} }
if (state->pos.col + state->combine_width >= row_width) { if (state->pos.col + state->combine_width >= row_width) {

View File

@ -1,313 +0,0 @@
#include "vterm_internal.h"
// ### The following from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
// With modifications:
// made functions static
// moved 'combining' table to file scope, so other functions can see it
// ###################################################################
/*
* This is an implementation of wcwidth() and wcswidth() (defined in
* IEEE Std 1002.1-2001) for Unicode.
*
* http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
* http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
*
* In fixed-width output devices, Latin characters all occupy a single
* "cell" position of equal width, whereas ideographic CJK characters
* occupy two such cells. Interoperability between terminal-line
* applications and (teletype-style) character terminals using the
* UTF-8 encoding requires agreement on which character should advance
* the cursor by how many cell positions. No established formal
* standards exist at present on which Unicode character shall occupy
* how many cell positions on character terminals. These routines are
* a first attempt of defining such behavior based on simple rules
* applied to data provided by the Unicode Consortium.
*
* For some graphical characters, the Unicode standard explicitly
* defines a character-cell width via the definition of the East Asian
* FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
* In all these cases, there is no ambiguity about which width a
* terminal shall use. For characters in the East Asian Ambiguous (A)
* class, the width choice depends purely on a preference of backward
* compatibility with either historic CJK or Western practice.
* Choosing single-width for these characters is easy to justify as
* the appropriate long-term solution, as the CJK practice of
* displaying these characters as double-width comes from historic
* implementation simplicity (8-bit encoded characters were displayed
* single-width and 16-bit ones double-width, even for Greek,
* Cyrillic, etc.) and not any typographic considerations.
*
* Much less clear is the choice of width for the Not East Asian
* (Neutral) class. Existing practice does not dictate a width for any
* of these characters. It would nevertheless make sense
* typographically to allocate two character cells to characters such
* as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
* represented adequately with a single-width glyph. The following
* routines at present merely assign a single-cell width to all
* neutral characters, in the interest of simplicity. This is not
* entirely satisfactory and should be reconsidered before
* establishing a formal standard in this area. At the moment, the
* decision which Not East Asian (Neutral) characters should be
* represented by double-width glyphs cannot yet be answered by
* applying a simple rule from the Unicode database content. Setting
* up a proper standard for the behavior of UTF-8 character terminals
* will require a careful analysis not only of each Unicode character,
* but also of each presentation form, something the author of these
* routines has avoided to do so far.
*
* http://www.unicode.org/unicode/reports/tr11/
*
* Markus Kuhn -- 2007-05-26 (Unicode 5.0)
*
* Permission to use, copy, modify, and distribute this software
* for any purpose and without fee is hereby granted. The author
* disclaims all warranties with regard to this software.
*
* Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
*/
struct interval {
int first;
int last;
};
/* sorted list of non-overlapping intervals of non-spacing characters */
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
static const struct interval combining[] = {
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
{ 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
{ 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
{ 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
{ 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
{ 0xE0100, 0xE01EF }
};
/* auxiliary function for binary search in interval table */
static int bisearch(uint32_t ucs, const struct interval *table, int max) {
int min = 0;
int mid;
if (ucs < (uint32_t) table[0].first || ucs > (uint32_t) table[max].last)
return 0;
while (max >= min) {
mid = (min + max) / 2;
if (ucs > (uint32_t) table[mid].last)
min = mid + 1;
else if (ucs < (uint32_t) table[mid].first)
max = mid - 1;
else
return 1;
}
return 0;
}
/* The following two functions define the column width of an ISO 10646
* character as follows:
*
* - The null character (U+0000) has a column width of 0.
*
* - Other C0/C1 control characters and DEL will lead to a return
* value of -1.
*
* - Non-spacing and enclosing combining characters (general
* category code Mn or Me in the Unicode database) have a
* column width of 0.
*
* - SOFT HYPHEN (U+00AD) has a column width of 1.
*
* - Other format characters (general category code Cf in the Unicode
* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
*
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
* have a column width of 0.
*
* - Spacing characters in the East Asian Wide (W) or East Asian
* Full-width (F) category as defined in Unicode Technical
* Report #11 have a column width of 2.
*
* - All remaining characters (including all printable
* ISO 8859-1 and WGL4 characters, Unicode control characters,
* etc.) have a column width of 1.
*
* This implementation assumes that uint32_t characters are encoded
* in ISO 10646.
*/
static int mk_wcwidth(uint32_t ucs)
{
/* test for 8-bit control characters */
if (ucs == 0)
return 0;
if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
return -1;
/* binary search in table of non-spacing characters */
if (bisearch(ucs, combining,
sizeof(combining) / sizeof(struct interval) - 1))
return 0;
/* if we arrive here, ucs is not a combining or C0/C1 control character */
return 1 +
(ucs >= 0x1100 &&
(ucs <= 0x115f || /* Hangul Jamo init. consonants */
ucs == 0x2329 || ucs == 0x232a ||
(ucs >= 0x2e80 && ucs <= 0xa4cf &&
ucs != 0x303f) || /* CJK ... Yi */
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
(ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
(ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
(ucs >= 0xffe0 && ucs <= 0xffe6) ||
(ucs >= 0x20000 && ucs <= 0x2fffd) ||
(ucs >= 0x30000 && ucs <= 0x3fffd)));
}
#ifdef USE_MK_WCWIDTH_CJK
/*
* The following functions are the same as mk_wcwidth() and
* mk_wcswidth(), except that spacing characters in the East Asian
* Ambiguous (A) category as defined in Unicode Technical Report #11
* have a column width of 2. This variant might be useful for users of
* CJK legacy encodings who want to migrate to UCS without changing
* the traditional terminal character-width behaviour. It is not
* otherwise recommended for general use.
*/
static int mk_wcwidth_cjk(uint32_t ucs)
{
/* sorted list of non-overlapping intervals of East Asian Ambiguous
* characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
static const struct interval ambiguous[] = {
{ 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
{ 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
{ 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
{ 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
{ 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
{ 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
{ 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
{ 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
{ 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
{ 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
{ 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
{ 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
{ 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
{ 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
{ 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
{ 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
{ 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
{ 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
{ 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
{ 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
{ 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
{ 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
{ 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
{ 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
{ 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
{ 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
{ 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
{ 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
{ 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
{ 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
{ 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
{ 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
{ 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
{ 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
{ 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
{ 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
{ 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
{ 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
{ 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
{ 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
{ 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
{ 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
{ 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
{ 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
{ 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
{ 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
{ 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
{ 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
{ 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
{ 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
{ 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
{ 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
};
/* binary search in table of non-spacing characters */
if (bisearch(ucs, ambiguous,
sizeof(ambiguous) / sizeof(struct interval) - 1))
return 2;
return mk_wcwidth(ucs);
}
#endif
// ################################
// ### The rest added by Paul Evans
static const struct interval fullwidth[] = {
#include "fullwidth.inc"
};
INTERNAL int vterm_unicode_width(uint32_t codepoint)
{
if(bisearch(codepoint, fullwidth, sizeof(fullwidth) / sizeof(fullwidth[0]) - 1))
return 2;
return mk_wcwidth(codepoint);
}
INTERNAL int vterm_unicode_is_combining(uint32_t codepoint)
{
return bisearch(codepoint, combining, sizeof(combining) / sizeof(struct interval) - 1);
}

View File

@ -10,6 +10,7 @@ extern "C" {
#include <stdbool.h> #include <stdbool.h>
#include "nvim/macros_defs.h" #include "nvim/macros_defs.h"
#include "nvim/types_defs.h"
#include "vterm_keycodes.h" #include "vterm_keycodes.h"
#define VTERM_VERSION_MAJOR 0 #define VTERM_VERSION_MAJOR 0
@ -19,11 +20,6 @@ extern "C" {
#define VTERM_CHECK_VERSION \ #define VTERM_CHECK_VERSION \
vterm_check_version(VTERM_VERSION_MAJOR, VTERM_VERSION_MINOR) vterm_check_version(VTERM_VERSION_MAJOR, VTERM_VERSION_MINOR)
/* Any cell can contain at most one basic printing character and 5 combining
* characters. This number could be changed but will be ABI-incompatible if
* you do */
enum{ VTERM_MAX_CHARS_PER_CELL=6};
typedef struct VTerm VTerm; typedef struct VTerm VTerm;
typedef struct VTermState VTermState; typedef struct VTermState VTermState;
typedef struct VTermScreen VTermScreen; typedef struct VTermScreen VTermScreen;
@ -292,7 +288,7 @@ typedef enum {
} VTermSelectionMask; } VTermSelectionMask;
typedef struct { typedef struct {
const uint32_t *chars; schar_T schar;
int width; int width;
unsigned int protected_cell:1; /* DECSCA-protected against DECSEL/DECSED */ unsigned int protected_cell:1; /* DECSCA-protected against DECSEL/DECSED */
unsigned int dwl:1; /* DECDWL or DECDHL double-width line */ unsigned int dwl:1; /* DECDWL or DECDHL double-width line */
@ -528,7 +524,7 @@ enum {
}; };
typedef struct { typedef struct {
uint32_t chars[VTERM_MAX_CHARS_PER_CELL]; schar_T schar;
char width; char width;
VTermScreenCellAttrs attrs; VTermScreenCellAttrs attrs;
VTermColor fg, bg; VTermColor fg, bg;

View File

@ -4,6 +4,7 @@
#include "vterm.h" #include "vterm.h"
#include <stdarg.h> #include <stdarg.h>
#include "nvim/mbyte.h"
#if defined(__GNUC__) #if defined(__GNUC__)
# define INTERNAL __attribute__((visibility("internal"))) # define INTERNAL __attribute__((visibility("internal")))
@ -101,8 +102,10 @@ struct VTermState
enum { MOUSE_X10, MOUSE_UTF8, MOUSE_SGR, MOUSE_RXVT } mouse_protocol; enum { MOUSE_X10, MOUSE_UTF8, MOUSE_SGR, MOUSE_RXVT } mouse_protocol;
/* Last glyph output, for Unicode recombining purposes */ /* Last glyph output, for Unicode recombining purposes */
uint32_t *combine_chars; char grapheme_buf[MAX_SCHAR_SIZE];
size_t combine_chars_size; // Number of ELEMENTS in the above size_t grapheme_len;
uint32_t grapheme_last; // last added UTF-32 char
GraphemeState grapheme_state;
int combine_width; // The width of the glyph above int combine_width; // The width of the glyph above
VTermPos combine_pos; // Position before movement VTermPos combine_pos; // Position before movement
@ -292,7 +295,4 @@ void vterm_screen_free(VTermScreen *screen);
VTermEncoding *vterm_lookup_encoding(VTermEncodingType type, char designation); VTermEncoding *vterm_lookup_encoding(VTermEncodingType type, char designation);
int vterm_unicode_width(uint32_t codepoint);
int vterm_unicode_is_combining(uint32_t codepoint);
#endif #endif

View File

@ -400,15 +400,28 @@ describe(':terminal buffer', function()
assert_alive() assert_alive()
end) end)
it('truncates number of composing characters to 5', function() it('truncates the size of grapheme clusters', function()
local chan = api.nvim_open_term(0, {}) local chan = api.nvim_open_term(0, {})
local composing = (''):sub(2) local composing = (''):sub(2)
api.nvim_chan_send(chan, 'a' .. composing:rep(8)) api.nvim_chan_send(chan, 'a' .. composing:rep(20))
retry(nil, nil, function() retry(nil, nil, function()
eq('a' .. composing:rep(5), api.nvim_get_current_line()) eq('a' .. composing:rep(14), api.nvim_get_current_line())
end) end)
end) end)
it('handles extended grapheme clusters', function()
local screen = Screen.new(50, 7)
feed 'i'
local chan = api.nvim_open_term(0, {})
api.nvim_chan_send(chan, '🏴‍☠️ yarrr')
screen:expect([[
🏴‍☠️ yarrr^ |
|*5
{5:-- TERMINAL --} |
]])
eq('🏴‍☠️ yarrr', api.nvim_get_current_line())
end)
it('handles split UTF-8 sequences #16245', function() it('handles split UTF-8 sequences #16245', function()
local screen = Screen.new(50, 7) local screen = Screen.new(50, 7)
fn.jobstart({ testprg('shell-test'), 'UTF-8' }, { term = true }) fn.jobstart({ testprg('shell-test'), 'UTF-8' }, { term = true })

View File

@ -1,4 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include "nvim/grid.h"
#include "nvim/mbyte.h"
#include "vterm_test.h" #include "vterm_test.h"
@ -202,6 +204,26 @@ int selection_query(VTermSelectionMask mask, void *user)
return 1; return 1;
} }
static void print_schar(FILE *f, schar_T schar) {
char buf[MAX_SCHAR_SIZE];
schar_get(buf, schar);
StrCharInfo ci = utf_ptr2StrCharInfo(buf);
bool did = false;
while (*ci.ptr != 0) {
if (did) {
fprintf(f, ",");
}
if (ci.chr.len == 1 && ci.chr.value >= 0x80) {
fprintf(f, "??%x", ci.chr.value);
} else {
fprintf(f, "%x", ci.chr.value);
}
did = true;
ci = utf_ptr2StrCharInfo(ci.ptr + ci.chr.len);
}
}
bool want_state_putglyph; bool want_state_putglyph;
int state_putglyph(VTermGlyphInfo *info, VTermPos pos, void *user) int state_putglyph(VTermGlyphInfo *info, VTermPos pos, void *user)
{ {
@ -211,9 +233,7 @@ int state_putglyph(VTermGlyphInfo *info, VTermPos pos, void *user)
FILE *f = fopen(VTERM_TEST_FILE, "a"); FILE *f = fopen(VTERM_TEST_FILE, "a");
fprintf(f, "putglyph "); fprintf(f, "putglyph ");
for (int i = 0; i < VTERM_MAX_CHARS_PER_CELL && info->chars[i]; i++) { print_schar(f, info->schar);
fprintf(f, i ? ",%x" : "%x", info->chars[i]);
}
fprintf(f, " %d %d,%d", info->width, pos.row, pos.col); fprintf(f, " %d %d,%d", info->width, pos.row, pos.col);
if (info->protected_cell) { if (info->protected_cell) {
fprintf(f, " prot"); fprintf(f, " prot");
@ -443,14 +463,15 @@ int screen_sb_pushline(int cols, const VTermScreenCell *cells, void *user)
} }
int eol = cols; int eol = cols;
while (eol && !cells[eol - 1].chars[0]) { while (eol && !cells[eol-1].schar) {
eol--; eol--;
} }
FILE *f = fopen(VTERM_TEST_FILE, "a"); FILE *f = fopen(VTERM_TEST_FILE, "a");
fprintf(f, "sb_pushline %d =", cols); fprintf(f, "sb_pushline %d =", cols);
for (int c = 0; c < eol; c++) { for (int c = 0; c < eol; c++) {
fprintf(f, " %02X", cells[c].chars[0]); fprintf(f, " ");
print_schar(f, cells[c].schar);
} }
fprintf(f, "\n"); fprintf(f, "\n");
@ -467,10 +488,10 @@ int screen_sb_popline(int cols, VTermScreenCell *cells, void *user)
// All lines of scrollback contain "ABCDE" // All lines of scrollback contain "ABCDE"
for (int col = 0; col < cols; col++) { for (int col = 0; col < cols; col++) {
if (col < 5) { if(col < 5) {
cells[col].chars[0] = (uint32_t)('A' + col); cells[col].schar = schar_from_ascii((uint32_t)('A' + col));
} else { } else {
cells[col].chars[0] = 0; cells[col].schar = 0;
} }
cells[col].width = 1; cells[col].width = 1;

View File

@ -17,7 +17,6 @@ local bit = require('bit')
--- @field VTERM_KEY_NONE integer --- @field VTERM_KEY_NONE integer
--- @field VTERM_KEY_TAB integer --- @field VTERM_KEY_TAB integer
--- @field VTERM_KEY_UP integer --- @field VTERM_KEY_UP integer
--- @field VTERM_MAX_CHARS_PER_CELL integer
--- @field VTERM_MOD_ALT integer --- @field VTERM_MOD_ALT integer
--- @field VTERM_MOD_CTRL integer --- @field VTERM_MOD_CTRL integer
--- @field VTERM_MOD_SHIFT integer --- @field VTERM_MOD_SHIFT integer
@ -80,6 +79,8 @@ local bit = require('bit')
--- @field vterm_state_set_selection_callbacks function --- @field vterm_state_set_selection_callbacks function
--- @field vterm_state_set_unrecognised_fallbacks function --- @field vterm_state_set_unrecognised_fallbacks function
local vterm = t.cimport( local vterm = t.cimport(
'./src/nvim/mbyte.h',
'./src/nvim/grid.h',
'./src/vterm/vterm.h', './src/vterm/vterm.h',
'./src/vterm/vterm_internal.h', './src/vterm/vterm_internal.h',
'./test/unit/fixtures/vterm_test.h' './test/unit/fixtures/vterm_test.h'
@ -302,16 +303,12 @@ local function screen_chars(start_row, start_col, end_row, end_col, expected, sc
rect['end_row'] = end_row rect['end_row'] = end_row
rect['end_col'] = end_col rect['end_col'] = end_col
local len = vterm.vterm_screen_get_chars(screen, nil, 0, rect) local len = vterm.vterm_screen_get_text(screen, nil, 0, rect)
local chars = t.ffi.new('uint32_t[?]', len) local text = t.ffi.new('unsigned char[?]', len)
vterm.vterm_screen_get_chars(screen, chars, len, rect) vterm.vterm_screen_get_text(screen, text, len, rect)
local actual = ''
for i = 0, tonumber(len) - 1 do
actual = actual .. string.char(chars[i])
end
local actual = t.ffi.string(text, len)
t.eq(expected, actual) t.eq(expected, actual)
end end
@ -349,7 +346,7 @@ local function screen_row(row, expected, screen, end_col)
local text = t.ffi.new('unsigned char[?]', len) local text = t.ffi.new('unsigned char[?]', len)
vterm.vterm_screen_get_text(screen, text, len, rect) vterm.vterm_screen_get_text(screen, text, len, rect)
t.eq(expected, t.ffi.string(text)) t.eq(expected, t.ffi.string(text, len))
end end
local function screen_cell(row, col, expected, screen) local function screen_cell(row, col, expected, screen)
@ -360,14 +357,20 @@ local function screen_cell(row, col, expected, screen)
local cell = t.ffi.new('VTermScreenCell') local cell = t.ffi.new('VTermScreenCell')
vterm.vterm_screen_get_cell(screen, pos, cell) vterm.vterm_screen_get_cell(screen, pos, cell)
local buf = t.ffi.new('unsigned char[32]')
vterm.schar_get(buf, cell.schar)
local actual = '{' local actual = '{'
for i = 0, vterm.VTERM_MAX_CHARS_PER_CELL - 1 do local i = 0
if cell['chars'][i] ~= 0 then while buf[i] > 0 do
if i > 0 then local char = vterm.utf_ptr2char(buf + i)
actual = actual .. ',' local charlen = vterm.utf_ptr2len(buf + i)
end if i > 0 then
actual = string.format('%s%02x', actual, cell['chars'][i]) actual = actual .. ','
end end
local invalid = char >= 128 and charlen == 1
actual = string.format('%s%s%02x', actual, invalid and '?' or '', char)
i = i + charlen
end end
actual = string.format('%s} width=%d attrs={', actual, cell['width']) actual = string.format('%s} width=%d attrs={', actual, cell['width'])
actual = actual .. (cell['attrs'].bold ~= 0 and 'B' or '') actual = actual .. (cell['attrs'].bold ~= 0 and 'B' or '')
@ -962,8 +965,8 @@ describe('vterm', function()
-- Spare combining chars get truncated -- Spare combining chars get truncated
reset(state, nil) reset(state, nil)
push('e' .. string.rep('\xCC\x81', 10), vt) push('e' .. string.rep('\xCC\x81', 20), vt)
expect('putglyph 65,301,301,301,301,301 1 0,0') -- and nothing more expect('putglyph 65,301,301,301,301,301,301,301,301,301,301,301,301,301,301 1 0,0') -- and nothing more
reset(state, nil) reset(state, nil)
push('e', vt) push('e', vt)
@ -973,6 +976,34 @@ describe('vterm', function()
push('\xCC\x82', vt) push('\xCC\x82', vt)
expect('putglyph 65,301,302 1 0,0') expect('putglyph 65,301,302 1 0,0')
-- emoji with ZWJ and variant selectors, as one chunk
reset(state, nil)
push('🏳️‍🌈🏳️‍⚧️🏴‍☠️', vt)
expect([[putglyph 1f3f3,fe0f,200d,1f308 2 0,0
putglyph 1f3f3,fe0f,200d,26a7,fe0f 2 0,2
putglyph 1f3f4,200d,2620,fe0f 2 0,4]])
-- emoji, one code point at a time
reset(state, nil)
push('🏳', vt)
expect('putglyph 1f3f3 2 0,0')
push('\xef\xb8\x8f', vt)
expect('putglyph 1f3f3,fe0f 2 0,0')
push('\xe2\x80\x8d', vt)
expect('putglyph 1f3f3,fe0f,200d 2 0,0')
push('🌈', vt)
expect('putglyph 1f3f3,fe0f,200d,1f308 2 0,0')
-- modifier can change width
push('', vt)
expect('putglyph 2764 1 0,2')
push('\xef\xb8\x8f', vt)
expect('putglyph 2764,fe0f 2 0,2')
-- also works batched
push('❤️', vt)
expect('putglyph 2764,fe0f 2 0,4')
-- DECSCA protected -- DECSCA protected
reset(state, nil) reset(state, nil)
push('A\x1b[1"qB\x1b[2"qC', vt) push('A\x1b[1"qB\x1b[2"qC', vt)
@ -3046,7 +3077,7 @@ describe('vterm', function()
screen_cell( screen_cell(
0, 0,
0, 0,
'{65,301,302,303,304,305} width=1 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', '{65,301,302,303,304,305,306,307,308,309,30a} width=1 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)',
screen screen
) )
@ -3063,7 +3094,7 @@ describe('vterm', function()
screen_cell( screen_cell(
0, 0,
0, 0,
'{65,301,301,301,301,301} width=1 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', '{65,301,301,301,301,301,301,301,301,301,301,301,301,301,301} width=1 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)',
screen screen
) )
@ -3072,6 +3103,16 @@ describe('vterm', function()
push('\x1b[80G\xEF\xBC\x90', vt) push('\x1b[80G\xEF\xBC\x90', vt)
screen_cell(0, 79, '{} width=1 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen) screen_cell(0, 79, '{} width=1 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen)
screen_cell(1, 0, '{ff10} width=2 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen) screen_cell(1, 0, '{ff10} width=2 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen)
-- Outputting emoji with ZWJ and variant selectors
reset(nil, screen)
push('🏳️‍🌈🏳️‍⚧️🏴‍☠️', vt)
-- stylua: ignore start
screen_cell(0, 0, '{1f3f3,fe0f,200d,1f308} width=2 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen)
screen_cell(0, 2, '{1f3f3,fe0f,200d,26a7,fe0f} width=2 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen)
screen_cell(0, 4, '{1f3f4,200d,2620,fe0f} width=2 attrs={} fg=rgb(240,240,240) bg=rgb(0,0,0)', screen)
-- stylua: ignore end
end) end)
pending('62screen_damage', function() end) pending('62screen_damage', function() end)
@ -3125,7 +3166,7 @@ describe('vterm', function()
screen = wantscreen(vt, { b = true }) screen = wantscreen(vt, { b = true })
resize(20, 80, vt) resize(20, 80, vt)
expect( expect(
'sb_pushline 80 = 54 6F 70\nsb_pushline 80 =\nsb_pushline 80 =\nsb_pushline 80 =\nsb_pushline 80 =' 'sb_pushline 80 = 54 6f 70\nsb_pushline 80 =\nsb_pushline 80 =\nsb_pushline 80 =\nsb_pushline 80 ='
) )
-- TODO(dundargoc): fix or remove -- TODO(dundargoc): fix or remove
-- screen_row( 0 , "",screen) -- screen_row( 0 , "",screen)