patch 9.1.1476: no easy way to deduplicate text

Problem:  no easy way to deduplicate text
Solution: add the :uniq ex command
          (Hirohito Higashi)

closes: #17538

Signed-off-by: Hirohito Higashi <h.east.727@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
Hirohito Higashi
2025-06-23 21:42:36 +02:00
committed by Christian Brabandt
parent 159d392427
commit 74f0a77bb9
12 changed files with 929 additions and 14 deletions

View File

@ -1,4 +1,4 @@
*builtin.txt* For Vim version 9.1. Last change: 2025 Jun 07
*builtin.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@ -12143,6 +12143,7 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
:let newlist = uniq(copy(mylist))
< The default compare function uses the string representation of
each item. For the use of {func} and {dict} see |sort()|.
For deduplicating text in the current buffer see |:uniq|.
Returns zero if {list} is not a |List|.

View File

@ -1,4 +1,4 @@
*change.txt* For Vim version 9.1. Last change: 2025 May 28
*change.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@ -20,6 +20,7 @@ commands with the "." command.
5. Copying and moving text |copy-move|
6. Formatting text |formatting|
7. Sorting text |sorting|
8. Deduplicating text |deduplicating|
For inserting text see |insert.txt|.
@ -1895,6 +1896,7 @@ And a few warnings:
Vim has a sorting function and a sorting command. The sorting function can be
found here: |sort()|, |uniq()|.
Also see |:uniq|.
*:sor* *:sort*
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
@ -1904,7 +1906,7 @@ found here: |sort()|, |uniq()|.
With [!] the order is reversed.
With [i] case is ignored.
*:sort-l*
With [l] sort uses the current collation locale.
Implementation details: strcoll() is used to compare
strings. See |:language| to check or set the collation
@ -1937,13 +1939,14 @@ found here: |sort()|, |uniq()|.
With [b] sorting is done on the first binary number in
the line (after or inside a {pattern} match).
*:sort-u* *:sort-uniq*
With [u] (u stands for unique) only keep the first of
a sequence of identical lines (ignoring case when [i]
is used). Without this flag, a sequence of identical
lines will be kept in their original order.
Note that leading and trailing white space may cause
lines to be different.
When you just want to make things unique, use |:uniq|.
When /{pattern}/ is specified and there is no [r] flag
the text matched with {pattern} is skipped, so that
@ -1990,4 +1993,56 @@ The sorting can be interrupted, but if you interrupt it too late in the
process you may end up with duplicated lines. This also depends on the system
library function used.
==============================================================================
8. Deduplicating text *deduplicating* *unique*
Vim has a deduplicating function and a deduplicating command. The
deduplicating function can be found here: |uniq()|.
Also see |:sort-uniq|.
*:uni* *:uniq*
:[range]uni[q][!] [i][l][r][u] [/{pattern}/]
Remove duplicate lines that are adjacent to each other
in [range]. When no range is given, all lines are
processed.
With [i] case is ignored when comparing lines.
With [l] comparison uses the current collation locale.
See |:sort-l| for more details.
With [r] comparison is done on the text that matches
/{pattern}/ instead of the full line.
When /{pattern}/ is specified and [r] is not used, the
text matched with {pattern} is skipped and comparison
is done on what comes after the match.
'ignorecase' applies to the pattern, but 'smartcase'
is not used.
Instead of the slash any non-letter can be used.
For example, to remove adjacent duplicate lines based
on the second comma-separated field: >
:uniq r /[^,]*,/
< Or to keep only unique lines ignoring the first 5
characters: >
:uniq u /.\{5}/
< If {pattern} is empty (e.g. // is used), the last
search pattern is used.
With [u] only keep lines that do not repeat (i.e., are
not immediately followed by the same line).
With [!] only keep lines that are immediately followed
by a duplicate.
If both [!] and [u] are given, [u] is ignored and [!]
takes effect.
Note that leading and trailing white space, and lines
that are not adjacent, are not considered duplicates.
To remove all duplicates regardless of position, use
|:sort-u| or external tools.
vim:tw=78:ts=8:noet:ft=help:norl:

View File

@ -1,4 +1,4 @@
*index.txt* For Vim version 9.1. Last change: 2025 Jun 02
*index.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@ -1740,6 +1740,7 @@ tag command action ~
|:unabbreviate| :una[bbreviate] remove abbreviation
|:unhide| :unh[ide] open a window for each loaded file in the
buffer list
|:uniq| :uni[q] uniq lines
|:unlet| :unl[et] delete variable
|:unlockvar| :unlo[ckvar] unlock variables
|:unmap| :unm[ap] remove mapping

View File

@ -3331,6 +3331,9 @@ $quote eval.txt /*$quote*
:so repeat.txt /*:so*
:sor change.txt /*:sor*
:sort change.txt /*:sort*
:sort-l change.txt /*:sort-l*
:sort-u change.txt /*:sort-u*
:sort-uniq change.txt /*:sort-uniq*
:source repeat.txt /*:source*
:source! repeat.txt /*:source!*
:source-range repeat.txt /*:source-range*
@ -3565,6 +3568,8 @@ $quote eval.txt /*$quote*
:undolist undo.txt /*:undolist*
:unh windows.txt /*:unh*
:unhide windows.txt /*:unhide*
:uni change.txt /*:uni*
:uniq change.txt /*:uniq*
:unl eval.txt /*:unl*
:unlet eval.txt /*:unlet*
:unlet-$ eval.txt /*:unlet-$*
@ -6864,6 +6869,7 @@ debugger-support debugger.txt /*debugger-support*
debugger.txt debugger.txt /*debugger.txt*
dec-mouse options.txt /*dec-mouse*
decada_members ft_ada.txt /*decada_members*
deduplicating change.txt /*deduplicating*
deepcopy() builtin.txt /*deepcopy()*
default-constructor vim9class.txt /*default-constructor*
defaults.vim starting.txt /*defaults.vim*
@ -11015,6 +11021,7 @@ undofile() builtin.txt /*undofile()*
undotree() builtin.txt /*undotree()*
unicode mbyte.txt /*unicode*
uniq() builtin.txt /*uniq()*
unique change.txt /*unique*
unix os_unix.txt /*unix*
unlisted-buffer windows.txt /*unlisted-buffer*
up-down-motions motion.txt /*up-down-motions*

View File

@ -1,4 +1,4 @@
*version9.txt* For Vim version 9.1. Last change: 2025 Jun 16
*version9.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@ -41756,6 +41756,7 @@ Ex-Commands: ~
|:pbuffer| Edit buffer [N] from the buffer list in the preview
window
|:redrawtabpanel| Force updating the 'tabpanel'.
|:uniq| Deduplicate text in the current buffer.
Options: ~

View File

@ -26,11 +26,11 @@ static const unsigned short cmdidxs1[26] =
/* s */ 406,
/* t */ 476,
/* u */ 523,
/* v */ 534,
/* w */ 555,
/* x */ 569,
/* y */ 579,
/* z */ 580
/* v */ 535,
/* w */ 556,
/* x */ 570,
/* y */ 580,
/* z */ 581
};
/*
@ -61,7 +61,7 @@ static const unsigned char cmdidxs2[26][26] =
/* r */ { 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 20, 0, 0, 0, 0 },
/* s */ { 2, 6, 15, 0, 19, 23, 0, 25, 26, 0, 0, 29, 31, 35, 39, 41, 0, 50, 0, 51, 0, 64, 65, 0, 66, 0 },
/* t */ { 2, 0, 19, 0, 24, 26, 0, 27, 0, 29, 0, 30, 34, 37, 39, 40, 0, 41, 43, 0, 44, 0, 0, 0, 46, 0 },
/* u */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
/* u */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
/* v */ { 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 12, 15, 0, 0, 0, 0, 18, 0, 19, 0, 0, 0, 0, 0 },
/* w */ { 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, 8, 0, 9, 10, 0, 0, 0, 12, 13, 0, 0, 0, 0 },
/* x */ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 7, 0, 0, 8, 0, 0, 0, 0, 0 },
@ -69,4 +69,4 @@ static const unsigned char cmdidxs2[26][26] =
/* z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
static const int command_count = 597;
static const int command_count = 598;

View File

@ -643,6 +643,237 @@ sortend:
emsg(_(e_interrupted));
}
/*
* ":uniq".
*/
void
ex_uniq(exarg_T *eap)
{
regmatch_T regmatch;
int len;
linenr_T lnum;
long maxlen = 0;
linenr_T count = eap->line2 - eap->line1 + 1;
char_u *p;
char_u *s;
char_u save_c; // temporary character storage
int keep_only_unique = FALSE;
int keep_only_not_unique = eap->forceit ? TRUE : FALSE;
long deleted = 0;
colnr_T start_col;
colnr_T end_col;
int change_occurred = FALSE; // Buffer contents changed.
// Uniq one line is really quick!
if (count <= 1)
return;
if (u_save((linenr_T)(eap->line1 - 1), (linenr_T)(eap->line2 + 1)) == FAIL)
return;
sortbuf1 = NULL;
regmatch.regprog = NULL;
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
sort_flt = 0;
for (p = eap->arg; *p != NUL; ++p)
{
if (VIM_ISWHITE(*p))
;
else if (*p == 'i')
sort_ic = TRUE;
else if (*p == 'l')
sort_lc = TRUE;
else if (*p == 'r')
sort_rx = TRUE;
else if (*p == 'u')
{
// 'u' is only valid when '!' is not given.
if (!keep_only_not_unique)
keep_only_unique = TRUE;
}
else if (*p == '"') // comment start
break;
else if (eap->nextcmd == NULL && check_nextcmd(p) != NULL)
{
eap->nextcmd = check_nextcmd(p);
break;
}
else if (!ASCII_ISALPHA(*p) && regmatch.regprog == NULL)
{
s = skip_regexp_err(p + 1, *p, TRUE);
if (s == NULL)
goto uniqend;
*s = NUL;
// Use last search pattern if uniq pattern is empty.
if (s == p + 1)
{
if (last_search_pat() == NULL)
{
emsg(_(e_no_previous_regular_expression));
goto uniqend;
}
regmatch.regprog = vim_regcomp(last_search_pat(), RE_MAGIC);
}
else
regmatch.regprog = vim_regcomp(p + 1, RE_MAGIC);
if (regmatch.regprog == NULL)
goto uniqend;
p = s; // continue after the regexp
regmatch.rm_ic = p_ic;
}
else
{
semsg(_(e_invalid_argument_str), p);
goto uniqend;
}
}
// Make an array with all line numbers. This avoids having to copy all
// the lines into allocated memory.
// When remove deplicating on strings "start_col_nr" is the offset in the
// line, for numbers remove deplicating it's the number to uniq on. This
// means the pattern matching only has to be done once per line.
// Also get the longest line length for allocating "sortbuf".
for (lnum = eap->line1; lnum <= eap->line2; ++lnum)
{
s = ml_get(lnum);
len = ml_get_len(lnum);
if (maxlen < len)
maxlen = len;
if (got_int)
goto uniqend;
}
// Allocate a buffer that can hold the longest line.
sortbuf1 = alloc(maxlen + 1);
if (sortbuf1 == NULL)
goto uniqend;
// Delete lines according to options.
int match_continue = FALSE;
int next_is_unmatch = FALSE;
int is_match;
linenr_T done_lnum = eap->line1 - 1;
linenr_T delete_lnum = 0;
for (linenr_T i = 0; i < count; ++i)
{
linenr_T get_lnum = eap->line1 + i;
s = ml_get(get_lnum);
len = ml_get_len(get_lnum);
start_col = 0;
end_col = len;
if (regmatch.regprog != NULL && vim_regexec(&regmatch, s, 0))
{
if (sort_rx)
{
start_col = (colnr_T)(regmatch.startp[0] - s);
end_col = (colnr_T)(regmatch.endp[0] - s);
}
else
start_col = (colnr_T)(regmatch.endp[0] - s);
}
else
if (regmatch.regprog != NULL)
end_col = 0;
if (end_col > 0)
{
save_c = s[end_col];
s[end_col] = NUL;
}
is_match = i > 0 ? !string_compare(&s[start_col], sortbuf1) : FALSE;
delete_lnum = 0;
if (next_is_unmatch)
{
is_match = FALSE;
next_is_unmatch = FALSE;
}
if (!keep_only_unique && !keep_only_not_unique)
{
if (is_match)
delete_lnum = get_lnum;
else
STRCPY(sortbuf1, &s[start_col]);
}
else if (keep_only_not_unique)
{
if (is_match)
{
done_lnum = get_lnum - 1;
delete_lnum = get_lnum;
match_continue = TRUE;
}
else
{
if (i > 0 && !match_continue && get_lnum - 1 > done_lnum)
{
delete_lnum = get_lnum - 1;
next_is_unmatch = TRUE;
}
else if (i >= count - 1)
delete_lnum = get_lnum;
match_continue = FALSE;
STRCPY(sortbuf1, &s[start_col]);
}
}
else // keep_only_unique
{
if (is_match)
{
if (!match_continue)
delete_lnum = get_lnum - 1;
else
delete_lnum = get_lnum;
match_continue = TRUE;
}
else
{
if (i == 0 && match_continue)
delete_lnum = get_lnum;
match_continue = FALSE;
STRCPY(sortbuf1, &s[start_col]);
}
}
if (end_col > 0)
s[end_col] = save_c;
if (delete_lnum > 0)
{
ml_delete(delete_lnum);
i -= get_lnum - delete_lnum + 1;
count--;
deleted++;
change_occurred = TRUE;
}
fast_breakcheck();
if (got_int)
goto uniqend;
}
// Adjust marks for deleted lines and prepare for displaying.
mark_adjust(eap->line2 - deleted, eap->line2, (long)MAXLNUM, -deleted);
msgmore(-deleted);
if (change_occurred)
changed_lines(eap->line1, 0, eap->line2 + 1, -deleted);
curwin->w_cursor.lnum = eap->line1;
beginline(BL_WHITE | BL_FIX);
uniqend:
vim_free(sortbuf1);
vim_regfree(regmatch.regprog);
if (got_int)
emsg(_(e_interrupted));
}
/*
* :move command - move lines line1-line2 to line dest
*

View File

@ -1700,6 +1700,9 @@ EXCMD(CMD_unabbreviate, "unabbreviate", ex_abbreviate,
EXCMD(CMD_unhide, "unhide", ex_buffer_all,
EX_RANGE|EX_COUNT|EX_TRLBAR,
ADDR_OTHER),
EXCMD(CMD_uniq, "uniq", ex_uniq,
EX_RANGE|EX_DFLALL|EX_WHOLEFOLD|EX_BANG|EX_EXTRA|EX_NOTRLCOM|EX_MODIFY,
ADDR_LINES),
EXCMD(CMD_unlet, "unlet", ex_unlet,
EX_BANG|EX_EXTRA|EX_NEEDARG|EX_SBOXOK|EX_CMDWIN|EX_LOCK_OK,
ADDR_NONE),

View File

@ -276,7 +276,8 @@ do_incsearch_highlighting(
else if (*cmd == 's' && cmd[1] == 'n')
magic_overruled = OPTION_MAGIC_OFF;
}
else if (STRNCMP(cmd, "sort", MAX(p - cmd, 3)) == 0)
else if (STRNCMP(cmd, "sort", MAX(p - cmd, 3)) == 0
|| STRNCMP(cmd, "uniq", MAX(p - cmd, 3)) == 0)
{
// skip over ! and flags
if (*p == '!')

View File

@ -2,6 +2,7 @@
void do_ascii(exarg_T *eap);
void ex_align(exarg_T *eap);
void ex_sort(exarg_T *eap);
void ex_uniq(exarg_T *eap);
int do_move(linenr_T line1, linenr_T line2, linenr_T dest);
void ex_copy(linenr_T line1, linenr_T line2, linenr_T n);
void free_prev_shellcmd(void);

View File

@ -331,6 +331,7 @@ NEW_TESTS = \
test_trycatch \
test_tuple \
test_undo \
test_uniq \
test_unlet \
test_user_func \
test_usercommands \
@ -586,6 +587,7 @@ NEW_TESTS_RES = \
test_trycatch.res \
test_tuple.res \
test_undo.res \
test_uniq.res \
test_user_func.res \
test_usercommands.res \
test_vartabs.res \

612
src/testdir/test_uniq.vim Normal file
View File

@ -0,0 +1,612 @@
" Tests for the ":uniq" command.
source check.vim
" Tests for the ":uniq" command.
func Test_uniq_cmd()
let tests = [
\ {
\ 'name' : 'Alphabetical uniq #1',
\ 'cmd' : '%uniq',
\ 'input' : [
\ 'abc',
\ 'ab',
\ 'a',
\ 'a321',
\ 'a123',
\ 'a123',
\ 'a123',
\ 'a123',
\ 'a122',
\ 'a123',
\ 'b321',
\ 'c123d',
\ ' 123b',
\ 'c321d',
\ 'b322b',
\ 'b321',
\ 'b321b'
\ ],
\ 'expected' : [
\ 'abc',
\ 'ab',
\ 'a',
\ 'a321',
\ 'a123',
\ 'a122',
\ 'a123',
\ 'b321',
\ 'c123d',
\ ' 123b',
\ 'c321d',
\ 'b322b',
\ 'b321',
\ 'b321b'
\ ]
\ },
\ {
\ 'name' : 'Alphabetical uniq #2',
\ 'cmd' : '%uniq',
\ 'input' : [
\ 'abc',
\ 'abc',
\ 'abc',
\ 'ab',
\ 'a',
\ 'a321',
\ 'a122',
\ 'b321',
\ 'a123',
\ 'a123',
\ 'c123d',
\ ' 123b',
\ 'c321d',
\ 'b322b',
\ 'b321',
\ 'b321b'
\ ],
\ 'expected' : [
\ 'abc',
\ 'ab',
\ 'a',
\ 'a321',
\ 'a122',
\ 'b321',
\ 'a123',
\ 'c123d',
\ ' 123b',
\ 'c321d',
\ 'b322b',
\ 'b321',
\ 'b321b'
\ ]
\ },
\ {
\ 'name' : 'alphabetical, uniqed input',
\ 'cmd' : 'uniq',
\ 'input' : [
\ 'a',
\ 'b',
\ 'c',
\ ],
\ 'expected' : [
\ 'a',
\ 'b',
\ 'c',
\ ]
\ },
\ {
\ 'name' : 'alphabetical, uniqed input, unique at end',
\ 'cmd' : 'uniq',
\ 'input' : [
\ 'aa',
\ 'bb',
\ 'cc',
\ 'cc',
\ ],
\ 'expected' : [
\ 'aa',
\ 'bb',
\ 'cc',
\ ]
\ },
\ {
\ 'name' : 'uniq one line buffer',
\ 'cmd' : 'uniq',
\ 'input' : [
\ 'single line'
\ ],
\ 'expected' : [
\ 'single line'
\ ]
\ },
\ {
\ 'name' : 'uniq ignoring case',
\ 'cmd' : '%uniq i',
\ 'input' : [
\ 'BB',
\ 'Cc',
\ 'cc',
\ 'Cc',
\ 'aa'
\ ],
\ 'expected' : [
\ 'BB',
\ 'Cc',
\ 'aa'
\ ]
\ },
\ {
\ 'name' : 'uniq not uniqued #1',
\ 'cmd' : '%uniq!',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc',
\ 'cc',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'cc',
\ 'yyy',
\ ]
\ },
\ {
\ 'name' : 'uniq not uniqued #2',
\ 'cmd' : '%uniq!',
\ 'input' : [
\ 'aa',
\ 'aa',
\ 'bb',
\ 'cc',
\ 'cc',
\ 'cc',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'aa',
\ 'cc',
\ 'yyy',
\ ]
\ },
\ {
\ 'name' : 'uniq not uniqued ("u" is ignored)',
\ 'cmd' : '%uniq! u',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc',
\ 'cc',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'cc',
\ 'yyy',
\ ]
\ },
\ {
\ 'name' : 'uniq not uniqued, ignoring case',
\ 'cmd' : '%uniq! i',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc',
\ 'Cc',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'cc',
\ 'yyy',
\ ]
\ },
\ {
\ 'name' : 'uniq only unique #1',
\ 'cmd' : '%uniq u',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc',
\ 'cc',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'aa',
\ 'bb',
\ 'aa',
\ 'zz'
\ ]
\ },
\ {
\ 'name' : 'uniq only unique #2',
\ 'cmd' : '%uniq u',
\ 'input' : [
\ 'aa',
\ 'aa',
\ 'bb',
\ 'cc',
\ 'cc',
\ 'cc',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'bb',
\ 'zz'
\ ]
\ },
\ {
\ 'name' : 'uniq only unique, ignoring case',
\ 'cmd' : '%uniq ui',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'Cc',
\ 'cc',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy',
\ 'zz'
\ ],
\ 'expected' : [
\ 'aa',
\ 'bb',
\ 'aa',
\ 'zz'
\ ]
\ },
\ {
\ 'name' : 'uniq on first 2 charscters',
\ 'cmd' : '%uniq r /^../',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc1',
\ 'cc2',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy2',
\ 'zz'
\ ],
\ 'expected' : [
\ 'aa',
\ 'cc',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'zz'
\ ]
\ },
\ {
\ 'name' : 'uniq on after 2 charscters',
\ 'cmd' : '%uniq /^../',
\ 'input' : [
\ '11aa',
\ '11cc',
\ '13cc',
\ '13cc',
\ '13bb',
\ '13aa',
\ '12yyy',
\ '11yyy',
\ '11zz'
\ ],
\ 'expected' : [
\ '11aa',
\ '11cc',
\ '13bb',
\ '13aa',
\ '12yyy',
\ '11zz'
\ ]
\ },
\ {
\ 'name' : 'uniq on first 2 charscters, not uniqued',
\ 'cmd' : '%uniq! r /^../',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc1',
\ 'cc2',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy2',
\ 'zz'
\ ],
\ 'expected' : [
\ 'cc',
\ 'yyy'
\ ]
\ },
\ {
\ 'name' : 'uniq on after 2 charscters, not uniqued',
\ 'cmd' : '%uniq! /^../',
\ 'input' : [
\ '11aa',
\ '11cc',
\ '13cc',
\ '13cc',
\ '13bb',
\ '13aa',
\ '12yyy',
\ '11yyy',
\ '11zz'
\ ],
\ 'expected' : [
\ '11cc',
\ '12yyy'
\ ]
\ },
\ {
\ 'name' : 'uniq on first 2 charscters, only unique',
\ 'cmd' : '%uniq ru /^../',
\ 'input' : [
\ 'aa',
\ 'cc',
\ 'cc1',
\ 'cc2',
\ 'bb',
\ 'aa',
\ 'yyy',
\ 'yyy2',
\ 'zz'
\ ],
\ 'expected' : [
\ 'aa',
\ 'bb',
\ 'aa',
\ 'zz'
\ ]
\ },
\ {
\ 'name' : 'uniq on after 2 charscters, only unique',
\ 'cmd' : '%uniq u /^../',
\ 'input' : [
\ '11aa',
\ '11cc',
\ '13cc',
\ '13cc',
\ '13bb',
\ '13aa',
\ '12yyy',
\ '11yyy',
\ '11zz'
\ ],
\ 'expected' : [
\ '11aa',
\ '13bb',
\ '13aa',
\ '11zz'
\ ]
\ }
\ ]
" This does not appear to work correctly on Mac.
if !has('mac')
if v:collate =~? '^\(en\|fr\)_ca.utf-\?8$'
" en_CA.utf-8 uniqs capitals before lower case
" 'Œ' is omitted because it can uniq before or after 'œ'
let tests += [
\ {
\ 'name' : 'uniq with locale ' .. v:collate,
\ 'cmd' : '%uniq l',
\ 'input' : [
\ 'A',
\ 'a',
\ 'À',
\ 'à',
\ 'E',
\ 'e',
\ 'É',
\ 'é',
\ 'È',
\ 'è',
\ 'O',
\ 'o',
\ 'Ô',
\ 'ô',
\ 'œ',
\ 'Z',
\ 'z'
\ ],
\ 'expected' : [
\ 'A',
\ 'a',
\ 'À',
\ 'à',
\ 'E',
\ 'e',
\ 'É',
\ 'é',
\ 'È',
\ 'è',
\ 'O',
\ 'o',
\ 'Ô',
\ 'ô',
\ 'œ',
\ 'Z',
\ 'z'
\ ]
\ },
\ ]
elseif v:collate =~? '^\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8$'
" With these locales, the accentuated letters are ordered
" similarly to the non-accentuated letters.
let tests += [
\ {
\ 'name' : 'uniq with locale ' .. v:collate,
\ 'cmd' : '%uniq li',
\ 'input' : [
\ 'A',
\ 'À',
\ 'a',
\ 'à',
\ 'à',
\ 'E',
\ 'È',
\ 'É',
\ 'o',
\ 'O',
\ 'Ô',
\ 'e',
\ 'è',
\ 'é',
\ 'ô',
\ 'Œ',
\ 'œ',
\ 'z',
\ 'Z'
\ ],
\ 'expected' : [
\ 'A',
\ 'À',
\ 'a',
\ 'à',
\ 'E',
\ 'È',
\ 'É',
\ 'o',
\ 'O',
\ 'Ô',
\ 'e',
\ 'è',
\ 'é',
\ 'ô',
\ 'Œ',
\ 'œ',
\ 'z',
\ 'Z'
\ ]
\ },
\ ]
endif
endif
for t in tests
enew!
call append(0, t.input)
$delete _
setlocal nomodified
execute t.cmd
call assert_equal(t.expected, getline(1, '$'), t.name)
" Previously, the ":uniq" command would set 'modified' even if the buffer
" contents did not change. Here, we check that this problem is fixed.
if t.input == t.expected
call assert_false(&modified, t.name . ': &mod is not correct')
else
call assert_true(&modified, t.name . ': &mod is not correct')
endif
endfor
" Needs at least two lines for this test
call setline(1, ['line1', 'line2'])
call assert_fails('uniq no', 'E475:')
call assert_fails('uniq c', 'E475:')
call assert_fails('uniq #pat%', 'E654:')
call assert_fails('uniq /\%(/', 'E53:')
call assert_fails('333uniq', 'E16:')
call assert_fails('1,999uniq', 'E16:')
enew!
endfunc
func Test_uniq_cmd_report()
enew!
call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
$delete _
setlocal nomodified
let res = execute('%uniq')
call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
call assert_match("6 fewer lines", res)
enew!
call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
$delete _
setlocal nomodified report=10
let res = execute('%uniq')
call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
call assert_equal("", res)
enew!
call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
$delete _
setl report&vim
setlocal nomodified
let res = execute('1g/^/%uniq')
call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
" the output comes from the :g command, not from the :uniq
call assert_match("6 fewer lines", res)
enew!
endfunc
" Test for a :uniq command followed by another command
func Test_uniq_followed_by_cmd()
new
let var = ''
call setline(1, ['cc', 'aa', 'bb'])
%uniq | let var = "uniqcmdtest"
call assert_equal(var, "uniqcmdtest")
call assert_equal(['cc', 'aa', 'bb'], getline(1, '$'))
" Test for :uniq followed by a comment
call setline(1, ['3b', '3b', '3b', '1c', '2a'])
%uniq " uniq alphabetically
call assert_equal(['3b', '1c', '2a'], getline(1, '$'))
bw!
endfunc
" Test for retaining marks across a :uniq
func Test_uniq_with_marks()
new
call setline(1, ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb'])
call setpos("'c", [0, 1, 0, 0])
call setpos("'a", [0, 4, 0, 0])
call setpos("'b", [0, 7, 0, 0])
%uniq
call assert_equal(['cc', 'aa', 'bb'], getline(1, '$'))
call assert_equal(1, line("'c"))
call assert_equal(0, line("'a"))
call assert_equal(0, line("'b"))
bw!
endfunc
" Test for undo after a :uniq
func Test_uniq_undo()
new
let li = ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb', 'aa']
call writefile(li, 'XfileUniq', 'D')
edit XfileUniq
uniq
call assert_equal(['cc', 'aa', 'bb', 'aa'], getline(1, '$'))
call assert_true(&modified)
undo
call assert_equal(li, getline(1, '$'))
call assert_false(&modified)
bw!
endfunc
" vim: shiftwidth=2 sts=2 expandtab