From 7138cdaef84b1a49024fc08144a8c4f4bcb36265 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Tue, 24 Jun 2025 08:05:32 +0800 Subject: [PATCH 1/6] vim-patch:9.1.1477: no easy way to deduplicate text Problem: no easy way to deduplicate text Solution: add the :uniq ex command (Hirohito Higashi) closes: vim/vim#17538 https://github.com/vim/vim/commit/74f0a77bb9a170a25b03cacf640aa4e1b1d222b2 Co-authored-by: Hirohito Higashi --- runtime/doc/change.txt | 58 ++- runtime/doc/index.txt | 1 + runtime/doc/news.txt | 1 + runtime/doc/vimfn.txt | 1 + runtime/lua/vim/_meta/vimfn.lua | 1 + src/nvim/eval.lua | 1 + src/nvim/ex_cmds.c | 207 ++++++++++- src/nvim/ex_cmds.lua | 6 + src/nvim/ex_getln.c | 3 +- test/old/testdir/test_uniq.vim | 612 ++++++++++++++++++++++++++++++++ 10 files changed, 885 insertions(+), 6 deletions(-) create mode 100644 test/old/testdir/test_uniq.vim diff --git a/runtime/doc/change.txt b/runtime/doc/change.txt index 3a9ee166a7..b715a7f089 100644 --- a/runtime/doc/change.txt +++ b/runtime/doc/change.txt @@ -1835,6 +1835,7 @@ And a few warnings: Vim has a sorting function and a sorting command. The sorting function can be found here: |sort()|, |uniq()|. +Also see |:uniq|. *:sor* *:sort* :[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/] @@ -1844,7 +1845,7 @@ found here: |sort()|, |uniq()|. With [!] the order is reversed. With [i] case is ignored. - + *:sort-l* With [l] sort uses the current collation locale. Implementation details: strcoll() is used to compare strings. See |:language| to check or set the collation @@ -1876,13 +1877,14 @@ found here: |sort()|, |uniq()|. With [b] sorting is done on the first binary number in the line (after or inside a {pattern} match). - + *:sort-u* *:sort-uniq* With [u] (u stands for unique) only keep the first of a sequence of identical lines (ignoring case when [i] is used). Without this flag, a sequence of identical lines will be kept in their original order. Note that leading and trailing white space may cause lines to be different. + When you just want to make things unique, use |:uniq|. When /{pattern}/ is specified and there is no [r] flag the text matched with {pattern} is skipped, so that @@ -1929,4 +1931,56 @@ The sorting can be interrupted, but if you interrupt it too late in the process you may end up with duplicated lines. This also depends on the system library function used. +============================================================================== +8. Deduplicating text *deduplicating* *unique* + +Vim has a deduplicating function and a deduplicating command. The +deduplicating function can be found here: |uniq()|. +Also see |:sort-uniq|. + + *:uni* *:uniq* +:[range]uni[q][!] [i][l][r][u] [/{pattern}/] + Remove duplicate lines that are adjacent to each other + in [range]. When no range is given, all lines are + processed. + + With [i] case is ignored when comparing lines. + + With [l] comparison uses the current collation locale. + See |:sort-l| for more details. + + With [r] comparison is done on the text that matches + /{pattern}/ instead of the full line. + + When /{pattern}/ is specified and [r] is not used, the + text matched with {pattern} is skipped and comparison + is done on what comes after the match. + 'ignorecase' applies to the pattern, but 'smartcase' + is not used. + + Instead of the slash any non-letter can be used. + + For example, to remove adjacent duplicate lines based + on the second comma-separated field: > + :uniq r /[^,]*,/ +< Or to keep only unique lines ignoring the first 5 + characters: > + :uniq u /.\{5}/ +< If {pattern} is empty (e.g. // is used), the last + search pattern is used. + + With [u] only keep lines that do not repeat (i.e., are + not immediately followed by the same line). + + With [!] only keep lines that are immediately followed + by a duplicate. + + If both [!] and [u] are given, [u] is ignored and [!] + takes effect. + + Note that leading and trailing white space, and lines + that are not adjacent, are not considered duplicates. + To remove all duplicates regardless of position, use + |:sort-u| or external tools. + vim:tw=78:ts=8:noet:ft=help:norl: diff --git a/runtime/doc/index.txt b/runtime/doc/index.txt index 1a9724195b..c30b393582 100644 --- a/runtime/doc/index.txt +++ b/runtime/doc/index.txt @@ -1666,6 +1666,7 @@ tag command action ~ |:unabbreviate| :una[bbreviate] remove abbreviation |:unhide| :unh[ide] open a window for each loaded file in the buffer list +|:uniq| :uni[q] uniq lines |:unlet| :unl[et] delete variable |:unlockvar| :unlo[ckvar] unlock variables |:unmap| :unm[ap] remove mapping diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index b0471cb2de..d606e616e9 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -155,6 +155,7 @@ DIAGNOSTICS EDITOR • |:iput| works like |:put| but adjusts indent. +• |:uniq| deduplicates text in the current buffer. • |omnicompletion| in `help` buffer. |ft-help-omni| • Setting "'0" in 'shada' prevents storing the jumplist in the shada file. • 'shada' now correctly respects "/0" and "f0". diff --git a/runtime/doc/vimfn.txt b/runtime/doc/vimfn.txt index 87fb95063e..172eb72675 100644 --- a/runtime/doc/vimfn.txt +++ b/runtime/doc/vimfn.txt @@ -11653,6 +11653,7 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E88 let newlist = uniq(copy(mylist)) < The default compare function uses the string representation of each item. For the use of {func} and {dict} see |sort()|. + For deduplicating text in the current buffer see |:uniq|. Returns zero if {list} is not a |List|. diff --git a/runtime/lua/vim/_meta/vimfn.lua b/runtime/lua/vim/_meta/vimfn.lua index 5150ddedce..4443e8c729 100644 --- a/runtime/lua/vim/_meta/vimfn.lua +++ b/runtime/lua/vim/_meta/vimfn.lua @@ -10612,6 +10612,7 @@ function vim.fn.undotree(buf) end --- let newlist = uniq(copy(mylist)) --- nextcmd = check_nextcmd(p); @@ -678,7 +677,6 @@ void ex_sort(exarg_T *eap) extmark_splice(curbuf, eap->line1 - 1, 0, (int)count, 0, old_count, lnum - eap->line2, 0, new_count, kExtmarkUndo); - changed_lines(curbuf, eap->line1, 0, eap->line2 + 1, -deleted, true); } @@ -695,6 +693,209 @@ sortend: } } +/// ":uniq". +void ex_uniq(exarg_T *eap) +{ + regmatch_T regmatch; + int maxlen = 0; + linenr_T count = eap->line2 - eap->line1 + 1; + bool keep_only_unique = false; + bool keep_only_not_unique = eap->forceit; + linenr_T deleted = 0; + + // Uniq one line is really quick! + if (count <= 1) { + return; + } + + if (u_save((linenr_T)(eap->line1 - 1), (linenr_T)(eap->line2 + 1)) == FAIL) { + return; + } + sortbuf1 = NULL; + regmatch.regprog = NULL; + + sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = sort_flt = false; + bool change_occurred = false; // Buffer contents changed. + + for (char *p = eap->arg; *p != NUL; p++) { + if (ascii_iswhite(*p)) { + // Skip + } else if (*p == 'i') { + sort_ic = true; + } else if (*p == 'l') { + sort_lc = true; + } else if (*p == 'r') { + sort_rx = true; + } else if (*p == 'u') { + // 'u' is only valid when '!' is not given. + if (!keep_only_not_unique) { + keep_only_unique = true; + } + } else if (*p == '"') { // comment start + break; + } else if (eap->nextcmd == NULL && check_nextcmd(p) != NULL) { + eap->nextcmd = check_nextcmd(p); + break; + } else if (!ASCII_ISALPHA(*p) && regmatch.regprog == NULL) { + char *s = skip_regexp_err(p + 1, *p, true); + if (s == NULL) { + goto uniqend; + } + *s = NUL; + // Use last search pattern if uniq pattern is empty. + if (s == p + 1) { + if (last_search_pat() == NULL) { + emsg(_(e_noprevre)); + goto uniqend; + } + regmatch.regprog = vim_regcomp(last_search_pat(), RE_MAGIC); + } else { + regmatch.regprog = vim_regcomp(p + 1, RE_MAGIC); + } + if (regmatch.regprog == NULL) { + goto uniqend; + } + p = s; // continue after the regexp + regmatch.rm_ic = p_ic; + } else { + semsg(_(e_invarg2), p); + goto uniqend; + } + } + + // Make an array with all line numbers. This avoids having to copy all + // the lines into allocated memory. + // When remove deplicating on strings "start_col_nr" is the offset in the + // line, for numbers remove deplicating it's the number to uniq on. This + // means the pattern matching only has to be done once per line. + // Also get the longest line length for allocating "sortbuf". + for (linenr_T lnum = eap->line1; lnum <= eap->line2; lnum++) { + // char *s = ml_get(lnum); + int len = ml_get_len(lnum); + if (maxlen < len) { + maxlen = len; + } + + if (got_int) { + goto uniqend; + } + } + + // Allocate a buffer that can hold the longest line. + sortbuf1 = xmalloc((size_t)maxlen + 1); + + // Delete lines according to options. + bool match_continue = false; + bool next_is_unmatch = false; + linenr_T done_lnum = eap->line1 - 1; + linenr_T delete_lnum = 0; + for (linenr_T i = 0; i < count; i++) { + linenr_T get_lnum = eap->line1 + i; + + char *s = ml_get(get_lnum); + int len = ml_get_len(get_lnum); + + colnr_T start_col = 0; + colnr_T end_col = len; + if (regmatch.regprog != NULL && vim_regexec(®match, s, 0)) { + if (sort_rx) { + start_col = (colnr_T)(regmatch.startp[0] - s); + end_col = (colnr_T)(regmatch.endp[0] - s); + } else { + start_col = (colnr_T)(regmatch.endp[0] - s); + } + } else if (regmatch.regprog != NULL) { + end_col = 0; + } + char save_c; // temporary character storage + if (end_col > 0) { + save_c = s[end_col]; + s[end_col] = NUL; + } + + bool is_match = i > 0 ? !string_compare(&s[start_col], sortbuf1) : false; + delete_lnum = 0; + if (next_is_unmatch) { + is_match = false; + next_is_unmatch = false; + } + + if (!keep_only_unique && !keep_only_not_unique) { + if (is_match) { + delete_lnum = get_lnum; + } else { + STRCPY(sortbuf1, &s[start_col]); + } + } else if (keep_only_not_unique) { + if (is_match) { + done_lnum = get_lnum - 1; + delete_lnum = get_lnum; + match_continue = true; + } else { + if (i > 0 && !match_continue && get_lnum - 1 > done_lnum) { + delete_lnum = get_lnum - 1; + next_is_unmatch = true; + } else if (i >= count - 1) { + delete_lnum = get_lnum; + } + match_continue = false; + STRCPY(sortbuf1, &s[start_col]); + } + } else { // keep_only_unique + if (is_match) { + if (!match_continue) { + delete_lnum = get_lnum - 1; + } else { + delete_lnum = get_lnum; + } + match_continue = true; + } else { + if (i == 0 && match_continue) { + delete_lnum = get_lnum; + } + match_continue = false; + STRCPY(sortbuf1, &s[start_col]); + } + } + + if (end_col > 0) { + s[end_col] = save_c; + } + + if (delete_lnum > 0) { + ml_delete(delete_lnum, false); + i -= get_lnum - delete_lnum + 1; + count--; + deleted++; + change_occurred = true; + } + + fast_breakcheck(); + if (got_int) { + goto uniqend; + } + } + + // Adjust marks for deleted lines and prepare for displaying. + mark_adjust(eap->line2 - deleted, eap->line2, MAXLNUM, -deleted, + change_occurred ? kExtmarkUndo : kExtmarkNOOP); + msgmore(-deleted); + + if (change_occurred) { + changed_lines(curbuf, eap->line1, 0, eap->line2 + 1, -deleted, true); + } + + curwin->w_cursor.lnum = eap->line1; + beginline(BL_WHITE | BL_FIX); + +uniqend: + xfree(sortbuf1); + vim_regfree(regmatch.regprog); + if (got_int) { + emsg(_(e_interr)); + } +} + /// :move command - move lines line1-line2 to line dest /// /// @return FAIL for failure, OK otherwise diff --git a/src/nvim/ex_cmds.lua b/src/nvim/ex_cmds.lua index 246a0a91ec..7a50d3d72e 100644 --- a/src/nvim/ex_cmds.lua +++ b/src/nvim/ex_cmds.lua @@ -3017,6 +3017,12 @@ M.cmds = { addr_type = 'ADDR_OTHER', func = 'ex_buffer_all', }, + { + command = 'uniq', + flags = bit.bor(RANGE, DFLALL, WHOLEFOLD, BANG, EXTRA, NOTRLCOM, MODIFY), + addr_type = 'ADDR_LINES', + func = 'ex_uniq', + }, { command = 'unlet', flags = bit.bor(BANG, EXTRA, NEEDARG, SBOXOK, CMDWIN, LOCK_OK), diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c index 5c85e5fe88..bec58f12b6 100644 --- a/src/nvim/ex_getln.c +++ b/src/nvim/ex_getln.c @@ -344,7 +344,8 @@ static bool do_incsearch_highlighting(int firstc, int *search_delim, incsearch_s } else if (*cmd == 's' && cmd[1] == 'n') { magic_overruled = OPTION_MAGIC_OFF; } - } else if (strncmp(cmd, "sort", (size_t)MAX(p - cmd, 3)) == 0) { + } else if (strncmp(cmd, "sort", (size_t)MAX(p - cmd, 3)) == 0 + || strncmp(cmd, "uniq", (size_t)MAX(p - cmd, 3)) == 0) { // skip over ! and flags if (*p == '!') { p = skipwhite(p + 1); diff --git a/test/old/testdir/test_uniq.vim b/test/old/testdir/test_uniq.vim new file mode 100644 index 0000000000..a60fd49b16 --- /dev/null +++ b/test/old/testdir/test_uniq.vim @@ -0,0 +1,612 @@ +" Tests for the ":uniq" command. + +source check.vim + +" Tests for the ":uniq" command. +func Test_uniq_cmd() + let tests = [ + \ { + \ 'name' : 'Alphabetical uniq #1', + \ 'cmd' : '%uniq', + \ 'input' : [ + \ 'abc', + \ 'ab', + \ 'a', + \ 'a321', + \ 'a123', + \ 'a123', + \ 'a123', + \ 'a123', + \ 'a122', + \ 'a123', + \ 'b321', + \ 'c123d', + \ ' 123b', + \ 'c321d', + \ 'b322b', + \ 'b321', + \ 'b321b' + \ ], + \ 'expected' : [ + \ 'abc', + \ 'ab', + \ 'a', + \ 'a321', + \ 'a123', + \ 'a122', + \ 'a123', + \ 'b321', + \ 'c123d', + \ ' 123b', + \ 'c321d', + \ 'b322b', + \ 'b321', + \ 'b321b' + \ ] + \ }, + \ { + \ 'name' : 'Alphabetical uniq #2', + \ 'cmd' : '%uniq', + \ 'input' : [ + \ 'abc', + \ 'abc', + \ 'abc', + \ 'ab', + \ 'a', + \ 'a321', + \ 'a122', + \ 'b321', + \ 'a123', + \ 'a123', + \ 'c123d', + \ ' 123b', + \ 'c321d', + \ 'b322b', + \ 'b321', + \ 'b321b' + \ ], + \ 'expected' : [ + \ 'abc', + \ 'ab', + \ 'a', + \ 'a321', + \ 'a122', + \ 'b321', + \ 'a123', + \ 'c123d', + \ ' 123b', + \ 'c321d', + \ 'b322b', + \ 'b321', + \ 'b321b' + \ ] + \ }, + \ { + \ 'name' : 'alphabetical, uniqed input', + \ 'cmd' : 'uniq', + \ 'input' : [ + \ 'a', + \ 'b', + \ 'c', + \ ], + \ 'expected' : [ + \ 'a', + \ 'b', + \ 'c', + \ ] + \ }, + \ { + \ 'name' : 'alphabetical, uniqed input, unique at end', + \ 'cmd' : 'uniq', + \ 'input' : [ + \ 'aa', + \ 'bb', + \ 'cc', + \ 'cc', + \ ], + \ 'expected' : [ + \ 'aa', + \ 'bb', + \ 'cc', + \ ] + \ }, + \ { + \ 'name' : 'uniq one line buffer', + \ 'cmd' : 'uniq', + \ 'input' : [ + \ 'single line' + \ ], + \ 'expected' : [ + \ 'single line' + \ ] + \ }, + \ { + \ 'name' : 'uniq ignoring case', + \ 'cmd' : '%uniq i', + \ 'input' : [ + \ 'BB', + \ 'Cc', + \ 'cc', + \ 'Cc', + \ 'aa' + \ ], + \ 'expected' : [ + \ 'BB', + \ 'Cc', + \ 'aa' + \ ] + \ }, + \ { + \ 'name' : 'uniq not uniqued #1', + \ 'cmd' : '%uniq!', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc', + \ 'cc', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'cc', + \ 'yyy', + \ ] + \ }, + \ { + \ 'name' : 'uniq not uniqued #2', + \ 'cmd' : '%uniq!', + \ 'input' : [ + \ 'aa', + \ 'aa', + \ 'bb', + \ 'cc', + \ 'cc', + \ 'cc', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'aa', + \ 'cc', + \ 'yyy', + \ ] + \ }, + \ { + \ 'name' : 'uniq not uniqued ("u" is ignored)', + \ 'cmd' : '%uniq! u', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc', + \ 'cc', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'cc', + \ 'yyy', + \ ] + \ }, + \ { + \ 'name' : 'uniq not uniqued, ignoring case', + \ 'cmd' : '%uniq! i', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc', + \ 'Cc', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'cc', + \ 'yyy', + \ ] + \ }, + \ { + \ 'name' : 'uniq only unique #1', + \ 'cmd' : '%uniq u', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc', + \ 'cc', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'aa', + \ 'bb', + \ 'aa', + \ 'zz' + \ ] + \ }, + \ { + \ 'name' : 'uniq only unique #2', + \ 'cmd' : '%uniq u', + \ 'input' : [ + \ 'aa', + \ 'aa', + \ 'bb', + \ 'cc', + \ 'cc', + \ 'cc', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'bb', + \ 'zz' + \ ] + \ }, + \ { + \ 'name' : 'uniq only unique, ignoring case', + \ 'cmd' : '%uniq ui', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'Cc', + \ 'cc', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'aa', + \ 'bb', + \ 'aa', + \ 'zz' + \ ] + \ }, + \ { + \ 'name' : 'uniq on first 2 charscters', + \ 'cmd' : '%uniq r /^../', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc1', + \ 'cc2', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy2', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'aa', + \ 'cc', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'zz' + \ ] + \ }, + \ { + \ 'name' : 'uniq on after 2 charscters', + \ 'cmd' : '%uniq /^../', + \ 'input' : [ + \ '11aa', + \ '11cc', + \ '13cc', + \ '13cc', + \ '13bb', + \ '13aa', + \ '12yyy', + \ '11yyy', + \ '11zz' + \ ], + \ 'expected' : [ + \ '11aa', + \ '11cc', + \ '13bb', + \ '13aa', + \ '12yyy', + \ '11zz' + \ ] + \ }, + \ { + \ 'name' : 'uniq on first 2 charscters, not uniqued', + \ 'cmd' : '%uniq! r /^../', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc1', + \ 'cc2', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy2', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'cc', + \ 'yyy' + \ ] + \ }, + \ { + \ 'name' : 'uniq on after 2 charscters, not uniqued', + \ 'cmd' : '%uniq! /^../', + \ 'input' : [ + \ '11aa', + \ '11cc', + \ '13cc', + \ '13cc', + \ '13bb', + \ '13aa', + \ '12yyy', + \ '11yyy', + \ '11zz' + \ ], + \ 'expected' : [ + \ '11cc', + \ '12yyy' + \ ] + \ }, + \ { + \ 'name' : 'uniq on first 2 charscters, only unique', + \ 'cmd' : '%uniq ru /^../', + \ 'input' : [ + \ 'aa', + \ 'cc', + \ 'cc1', + \ 'cc2', + \ 'bb', + \ 'aa', + \ 'yyy', + \ 'yyy2', + \ 'zz' + \ ], + \ 'expected' : [ + \ 'aa', + \ 'bb', + \ 'aa', + \ 'zz' + \ ] + \ }, + \ { + \ 'name' : 'uniq on after 2 charscters, only unique', + \ 'cmd' : '%uniq u /^../', + \ 'input' : [ + \ '11aa', + \ '11cc', + \ '13cc', + \ '13cc', + \ '13bb', + \ '13aa', + \ '12yyy', + \ '11yyy', + \ '11zz' + \ ], + \ 'expected' : [ + \ '11aa', + \ '13bb', + \ '13aa', + \ '11zz' + \ ] + \ } + \ ] + + " This does not appear to work correctly on Mac. + if !has('mac') + if v:collate =~? '^\(en\|fr\)_ca.utf-\?8$' + " en_CA.utf-8 uniqs capitals before lower case + " 'Œ' is omitted because it can uniq before or after 'œ' + let tests += [ + \ { + \ 'name' : 'uniq with locale ' .. v:collate, + \ 'cmd' : '%uniq l', + \ 'input' : [ + \ 'A', + \ 'a', + \ 'À', + \ 'à', + \ 'E', + \ 'e', + \ 'É', + \ 'é', + \ 'È', + \ 'è', + \ 'O', + \ 'o', + \ 'Ô', + \ 'ô', + \ 'œ', + \ 'Z', + \ 'z' + \ ], + \ 'expected' : [ + \ 'A', + \ 'a', + \ 'À', + \ 'à', + \ 'E', + \ 'e', + \ 'É', + \ 'é', + \ 'È', + \ 'è', + \ 'O', + \ 'o', + \ 'Ô', + \ 'ô', + \ 'œ', + \ 'Z', + \ 'z' + \ ] + \ }, + \ ] + elseif v:collate =~? '^\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8$' + " With these locales, the accentuated letters are ordered + " similarly to the non-accentuated letters. + let tests += [ + \ { + \ 'name' : 'uniq with locale ' .. v:collate, + \ 'cmd' : '%uniq li', + \ 'input' : [ + \ 'A', + \ 'À', + \ 'a', + \ 'à', + \ 'à', + \ 'E', + \ 'È', + \ 'É', + \ 'o', + \ 'O', + \ 'Ô', + \ 'e', + \ 'è', + \ 'é', + \ 'ô', + \ 'Œ', + \ 'œ', + \ 'z', + \ 'Z' + \ ], + \ 'expected' : [ + \ 'A', + \ 'À', + \ 'a', + \ 'à', + \ 'E', + \ 'È', + \ 'É', + \ 'o', + \ 'O', + \ 'Ô', + \ 'e', + \ 'è', + \ 'é', + \ 'ô', + \ 'Œ', + \ 'œ', + \ 'z', + \ 'Z' + \ ] + \ }, + \ ] + endif + endif + + for t in tests + enew! + call append(0, t.input) + $delete _ + setlocal nomodified + execute t.cmd + + call assert_equal(t.expected, getline(1, '$'), t.name) + + " Previously, the ":uniq" command would set 'modified' even if the buffer + " contents did not change. Here, we check that this problem is fixed. + if t.input == t.expected + call assert_false(&modified, t.name . ': &mod is not correct') + else + call assert_true(&modified, t.name . ': &mod is not correct') + endif + endfor + + " Needs at least two lines for this test + call setline(1, ['line1', 'line2']) + call assert_fails('uniq no', 'E475:') + call assert_fails('uniq c', 'E475:') + call assert_fails('uniq #pat%', 'E654:') + call assert_fails('uniq /\%(/', 'E53:') + call assert_fails('333uniq', 'E16:') + call assert_fails('1,999uniq', 'E16:') + + enew! +endfunc + +func Test_uniq_cmd_report() + enew! + call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3)) + $delete _ + setlocal nomodified + let res = execute('%uniq') + + call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0')) + call assert_match("6 fewer lines", res) + enew! + call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3)) + $delete _ + setlocal nomodified report=10 + let res = execute('%uniq') + + call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0')) + call assert_equal("", res) + enew! + call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3)) + $delete _ + setl report&vim + setlocal nomodified + let res = execute('1g/^/%uniq') + + call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0')) + " the output comes from the :g command, not from the :uniq + call assert_match("6 fewer lines", res) + enew! +endfunc + +" Test for a :uniq command followed by another command +func Test_uniq_followed_by_cmd() + new + let var = '' + call setline(1, ['cc', 'aa', 'bb']) + %uniq | let var = "uniqcmdtest" + call assert_equal(var, "uniqcmdtest") + call assert_equal(['cc', 'aa', 'bb'], getline(1, '$')) + " Test for :uniq followed by a comment + call setline(1, ['3b', '3b', '3b', '1c', '2a']) + %uniq " uniq alphabetically + call assert_equal(['3b', '1c', '2a'], getline(1, '$')) + bw! +endfunc + +" Test for retaining marks across a :uniq +func Test_uniq_with_marks() + new + call setline(1, ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb']) + call setpos("'c", [0, 1, 0, 0]) + call setpos("'a", [0, 4, 0, 0]) + call setpos("'b", [0, 7, 0, 0]) + %uniq + call assert_equal(['cc', 'aa', 'bb'], getline(1, '$')) + call assert_equal(1, line("'c")) + call assert_equal(0, line("'a")) + call assert_equal(0, line("'b")) + bw! +endfunc + +" Test for undo after a :uniq +func Test_uniq_undo() + new + let li = ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb', 'aa'] + call writefile(li, 'XfileUniq', 'D') + edit XfileUniq + uniq + call assert_equal(['cc', 'aa', 'bb', 'aa'], getline(1, '$')) + call assert_true(&modified) + undo + call assert_equal(li, getline(1, '$')) + call assert_false(&modified) + bw! +endfunc + +" vim: shiftwidth=2 sts=2 expandtab From 11f8e8eb6316ca2c57a4a001bd20b0a3cb1af813 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Wed, 25 Jun 2025 07:31:22 +0800 Subject: [PATCH 2/6] vim-patch:9.1.1478: Unused assignment in ex_uniq() Problem: Unused assignment in ex_uniq() (after v9.1.1476) Solution: Remove the assignment and the wrong comments above (zeertzjq). closes: vim/vim#17596 https://github.com/vim/vim/commit/fc378a88d808ec1d44dd8582498c1b1b6af8e8d6 --- src/nvim/ex_cmds.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 307fcc0259..68292d6ac2 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -763,14 +763,8 @@ void ex_uniq(exarg_T *eap) } } - // Make an array with all line numbers. This avoids having to copy all - // the lines into allocated memory. - // When remove deplicating on strings "start_col_nr" is the offset in the - // line, for numbers remove deplicating it's the number to uniq on. This - // means the pattern matching only has to be done once per line. - // Also get the longest line length for allocating "sortbuf". + // Find the length of the longest line. for (linenr_T lnum = eap->line1; lnum <= eap->line2; lnum++) { - // char *s = ml_get(lnum); int len = ml_get_len(lnum); if (maxlen < len) { maxlen = len; From d574f9479d9feab34507dc740311198f4ab35e49 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Wed, 25 Jun 2025 07:32:24 +0800 Subject: [PATCH 3/6] vim-patch:ca793e6: runtime(vim): Update base-syntax, match :uniq command closes: vim/vim#17601 https://github.com/vim/vim/commit/ca793e60dbcf00a8d5658b0120965edb4b0f3fab Co-authored-by: Doug Kearns --- runtime/syntax/vim.vim | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/runtime/syntax/vim.vim b/runtime/syntax/vim.vim index 264a36f4a8..a92d233922 100644 --- a/runtime/syntax/vim.vim +++ b/runtime/syntax/vim.vim @@ -212,7 +212,7 @@ syn match vimNumber '\<0z\%(\x\x\)\+\%(\.\%(\x\x\)\+\)*' skipwhite nextgroup=@vi syn case match " All vimCommands are contained by vimIsCommand. {{{2 -syn cluster vimCmdList contains=vimAbb,vimAddress,vimAutocmd,vimAugroup,vimBehave,vimCall,vimCatch,vimConst,vimDoautocmd,vimDebuggreedy,vimDef,vimDefFold,vimDelcommand,vimDelFunction,@vimEcho,vimElse,vimEnddef,vimEndfunction,vimEndif,vimExecute,vimIsCommand,vimExtCmd,vimExFilter,vimExMark,vimFor,vimFunction,vimFunctionFold,vimGrep,vimGrepAdd,vimGlobal,vimHelpgrep,vimHighlight,vimImport,vimLet,vimLoadkeymap,vimLockvar,vimMake,vimMap,vimMark,vimMatch,vimNotFunc,vimNormal,vimProfdel,vimProfile,vimRedir,vimSet,vimSleep,vimSort,vimSyntax,vimThrow,vimUnlet,vimUnlockvar,vimUnmap,vimUserCmd,vimVimgrep,vimVimgrepadd,vimMenu,vimMenutranslate,@vim9CmdList,@vimExUserCmdList,vimLua,vimMzScheme,vimPerl,vimPython,vimPython3,vimPythonX,vimRuby,vimTcl +syn cluster vimCmdList contains=vimAbb,vimAddress,vimAutocmd,vimAugroup,vimBehave,vimCall,vimCatch,vimConst,vimDoautocmd,vimDebuggreedy,vimDef,vimDefFold,vimDelcommand,vimDelFunction,@vimEcho,vimElse,vimEnddef,vimEndfunction,vimEndif,vimExecute,vimIsCommand,vimExtCmd,vimExFilter,vimExMark,vimFor,vimFunction,vimFunctionFold,vimGrep,vimGrepAdd,vimGlobal,vimHelpgrep,vimHighlight,vimImport,vimLet,vimLoadkeymap,vimLockvar,vimMake,vimMap,vimMark,vimMatch,vimNotFunc,vimNormal,vimProfdel,vimProfile,vimRedir,vimSet,vimSleep,vimSort,vimSyntax,vimThrow,vimUniq,vimUnlet,vimUnlockvar,vimUnmap,vimUserCmd,vimVimgrep,vimVimgrepadd,vimMenu,vimMenutranslate,@vim9CmdList,@vimExUserCmdList,vimLua,vimMzScheme,vimPerl,vimPython,vimPython3,vimPythonX,vimRuby,vimTcl syn cluster vim9CmdList contains=vim9Abstract,vim9Class,vim9Const,vim9Enum,vim9Export,vim9Final,vim9For,vim9Interface,vim9Type,vim9Var syn match vimCmdSep "\\\@1" skipwhite nextgroup=vimUniqBang,@vimUniqOptions,vimUniqPattern,vimCmdSep +syn match vimUniqBang contained "\a\@1<=!" skipwhite nextgroup=@vimUniqOptions,vimUniqPattern,vimCmdSep +syn match vimUniqOptionsError contained "\a\+" +syn match vimUniqOptions contained "\<[ilur]*\>" skipwhite nextgroup=vimUniqPattern,vimCmdSep +syn region vimUniqPattern contained + \ matchgroup=Delimiter + \ start="\z([^[:space:][:alpha:]|]\)" + \ skip="\\\\\|\\\z1" + \ end="\z1" + \ skipwhite nextgroup=@vimUniqOptions,vimCmdSep + \ contains=@vimSubstList + \ oneline + +syn cluster vimUniqOptions contains=vimUniqOptions,vimUniqOptionsError + " Syntax: {{{2 "======= syn match vimGroupList contained "[^[:space:],]\+\%(\s*,\s*[^[:space:],]\+\)*" contains=vimGroupSpecial @@ -2379,6 +2396,9 @@ if !exists("skip_vim_syntax_inits") hi def link vimThrow vimCommand hi def link vimTodo Todo hi def link vimType Type + hi def link vimUniq vimCommand + hi def link vimUniqBang vimBang + hi def link vimUniqOptions Special hi def link vimUnlet vimCommand hi def link vimUnletBang vimBang hi def link vimUnmap vimMap From 3bf27b2c74aaa800c17790d03a0dcf640f8e266a Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Thu, 26 Jun 2025 07:30:26 +0800 Subject: [PATCH 4/6] vim-patch:9.1.1481: gcc complains about uninitialized variable Problem: gcc complains about uninitialized variable (Tony Mechelynck, after v9.1.1476) Solution: initialize variable https://github.com/vim/vim/commit/42d2c5e803cec6ec661217fa1ae630a2f7c463bc Co-authored-by: Christian Brabandt --- src/nvim/ex_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 68292d6ac2..36cca6cf22 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -801,7 +801,7 @@ void ex_uniq(exarg_T *eap) } else if (regmatch.regprog != NULL) { end_col = 0; } - char save_c; // temporary character storage + char save_c = NUL; // temporary character storage if (end_col > 0) { save_c = s[end_col]; s[end_col] = NUL; From 3564e624261b6a5f12efac34876307095a177573 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Thu, 26 Jun 2025 07:24:02 +0800 Subject: [PATCH 5/6] vim-patch:1c471ac: runtime(doc): update description of :uniq command The examples mention the [u] flag, so at least the [u] flag should be introduced before the examples. Slightly reword the sentence about trailing/leading white space. closes: vim/vim#17604 https://github.com/vim/vim/commit/1c471ac548cc883fdd5446c62a4b90cdcb6769ee Co-authored-by: Hirohito Higashi --- runtime/doc/change.txt | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/runtime/doc/change.txt b/runtime/doc/change.txt index b715a7f089..591c7a7c9d 100644 --- a/runtime/doc/change.txt +++ b/runtime/doc/change.txt @@ -1952,12 +1952,20 @@ Also see |:sort-uniq|. With [r] comparison is done on the text that matches /{pattern}/ instead of the full line. + With [u] only keep lines that do not repeat (i.e., are + not immediately followed by the same line). + + With [!] only keep lines that are immediately followed + by a duplicate. + + If both [!] and [u] are given, [u] is ignored and [!] + takes effect. + When /{pattern}/ is specified and [r] is not used, the text matched with {pattern} is skipped and comparison is done on what comes after the match. 'ignorecase' applies to the pattern, but 'smartcase' is not used. - Instead of the slash any non-letter can be used. For example, to remove adjacent duplicate lines based @@ -1969,17 +1977,8 @@ Also see |:sort-uniq|. < If {pattern} is empty (e.g. // is used), the last search pattern is used. - With [u] only keep lines that do not repeat (i.e., are - not immediately followed by the same line). - - With [!] only keep lines that are immediately followed - by a duplicate. - - If both [!] and [u] are given, [u] is ignored and [!] - takes effect. - - Note that leading and trailing white space, and lines - that are not adjacent, are not considered duplicates. + Note that leading and trailing white space may cause + lines to be considered different. To remove all duplicates regardless of position, use |:sort-u| or external tools. From 023f157a609a1eb0ccd59442249d86a8bfa6a95c Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Fri, 27 Jun 2025 07:08:19 +0800 Subject: [PATCH 6/6] vim-patch:26ebe21: runtime(doc): mismatch between the :uniq document's description and examples closes: vim/vim#17612 https://github.com/vim/vim/commit/26ebe21caa93ae4c3d7ceb8e83e799b0dabc6271 Co-authored-by: Hirohito Higashi --- runtime/doc/change.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/doc/change.txt b/runtime/doc/change.txt index 591c7a7c9d..742c1b4a08 100644 --- a/runtime/doc/change.txt +++ b/runtime/doc/change.txt @@ -1970,7 +1970,7 @@ Also see |:sort-uniq|. For example, to remove adjacent duplicate lines based on the second comma-separated field: > - :uniq r /[^,]*,/ + :uniq /[^,]*,/ < Or to keep only unique lines ignoring the first 5 characters: > :uniq u /.\{5}/