feat: update unicode tables (#27317)

This commit is contained in:
zeertzjq
2024-02-04 06:28:51 +08:00
committed by GitHub
parent 2e982f1aad
commit c559ab0ae6
4 changed files with 2602 additions and 2590 deletions

View File

@ -1,6 +1,6 @@
# CaseFolding-15.0.0.txt # CaseFolding-15.1.0.txt
# Date: 2022-02-02, 23:35:35 GMT # Date: 2023-05-12, 21:53:10 GMT
# © 2022 Unicode®, Inc. # © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html # For terms of use, see https://www.unicode.org/terms_of_use.html
# #
@ -929,6 +929,7 @@
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
@ -937,6 +938,7 @@
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
@ -1328,6 +1330,7 @@ FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH

View File

@ -1,6 +1,6 @@
# EastAsianWidth-15.0.0.txt # EastAsianWidth-15.1.0.txt
# Date: 2022-05-24, 17:40:20 GMT [KW, LI] # Date: 2023-07-28, 23:34:08 GMT
# © 2022 Unicode®, Inc. # © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html # For terms of use, see https://www.unicode.org/terms_of_use.html
# #
@ -30,11 +30,10 @@
# Character ranges are specified as for other property files in the # Character ranges are specified as for other property files in the
# Unicode Character Database. # Unicode Character Database.
# #
# For legacy reasons, there are no spaces before or after the semicolon # The comments following the number sign "#" list the General_Category
# which separates the two fields. The comments following the number sign # property value or the L& alias of the derived value LC, the Unicode
# "#" list the General_Category property value or the L& alias of the # character name or names, and, in lines with ranges of code points,
# derived value LC, the Unicode character name or names, and, in lines # the code point count in square brackets.
# with ranges of code points, the code point count in square brackets.
# #
# For more information, see UAX #11: East Asian Width, # For more information, see UAX #11: East Asian Width,
# at https://www.unicode.org/reports/tr11/ # at https://www.unicode.org/reports/tr11/
@ -1430,7 +1429,7 @@
2E80..2E99 ; W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E80..2E99 ; W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2E9B..2EF3 ; W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE 2F00..2FD5 ; W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
2FF0..2FFB;W # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID 2FF0..2FFF ; W # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
3000 ; F # Zs IDEOGRAPHIC SPACE 3000 ; F # Zs IDEOGRAPHIC SPACE
3001..3003 ; W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3001..3003 ; W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; W # So JAPANESE INDUSTRIAL STANDARD SYMBOL 3004 ; W # So JAPANESE INDUSTRIAL STANDARD SYMBOL
@ -1489,6 +1488,7 @@
3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q 31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q
31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3220..3229 ; W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN 3220..3229 ; W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
@ -2602,7 +2602,9 @@ FFFD;A # So REPLACEMENT CHARACTER
2B820..2CEA1 ; W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2B820..2CEA1 ; W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEA2..2CEAF ; W # Cn [14] <reserved-2CEA2>..<reserved-2CEAF> 2CEA2..2CEAF ; W # Cn [14] <reserved-2CEA2>..<reserved-2CEAF>
2CEB0..2EBE0 ; W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2CEB0..2EBE0 ; W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBE1..2F7FF;W # Cn [3103] <reserved-2EBE1>..<reserved-2F7FF> 2EBE1..2EBEF ; W # Cn [15] <reserved-2EBE1>..<reserved-2EBEF>
2EBF0..2EE5D ; W # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2EE5E..2F7FF ; W # Cn [2466] <reserved-2EE5E>..<reserved-2F7FF>
2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
2FA1E..2FA1F ; W # Cn [2] <reserved-2FA1E>..<reserved-2FA1F> 2FA1E..2FA1F ; W # Cn [2] <reserved-2FA1E>..<reserved-2FA1F>
2FA20..2FFFD ; W # Cn [1502] <reserved-2FA20>..<reserved-2FFFD> 2FA20..2FFFD ; W # Cn [1502] <reserved-2FA20>..<reserved-2FFFD>

View File

@ -11231,6 +11231,10 @@
2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;; 2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;;
2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;; 2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;;
2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;; 2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;;
2FFC;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT;So;0;ON;;;;;N;;;;;
2FFD;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT;So;0;ON;;;;;N;;;;;
2FFE;IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION;So;0;ON;;;;;N;;;;;
2FFF;IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION;So;0;ON;;;;;N;;;;;
3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;; 3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;; 3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;;
3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;; 3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;;
@ -11705,6 +11709,7 @@
31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;; 31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;;
31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;; 31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;;
31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;; 31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;;
31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;;
31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;; 31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;;
31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;; 31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;;
31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;; 31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;;
@ -34035,6 +34040,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;; 2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;;
2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;; 2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;;
2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;; 2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;;
2EBF0;<CJK Ideograph Extension I, First>;Lo;0;L;;;;;N;;;;;
2EE5D;<CJK Ideograph Extension I, Last>;Lo;0;L;;;;;N;;;;;
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;; 2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;; 2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;;
2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;; 2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;;

View File

@ -1,11 +1,11 @@
# emoji-data.txt # emoji-data.txt
# Date: 2022-08-02, 00:26:10 GMT # Date: 2023-02-01, 02:22:54 GMT
# © 2022 Unicode®, Inc. # © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html # For terms of use, see https://www.unicode.org/terms_of_use.html
# #
# Emoji Data for UTS #51 # Emoji Data for UTS #51
# Used with Emoji Version 15.0 and subsequent minor revisions (if any) # Used with Emoji Version 15.1 and subsequent minor revisions (if any)
# #
# For documentation and usage, see https://www.unicode.org/reports/tr51 # For documentation and usage, see https://www.unicode.org/reports/tr51
# #