From a24f5be86d895981cb84266f98bae9f14407264b Mon Sep 17 00:00:00 2001 From: Rob B Date: Mon, 14 Jul 2025 22:30:59 +0200 Subject: [PATCH] runtime(python): highlight bytes in python - Highlight bytes literals - Do not highlight Unicode escape sequences in bytes literals fixes: #14033 fixes: #17726 closes: #17728 Signed-off-by: Rob B Signed-off-by: Christian Brabandt --- runtime/syntax/python.vim | 42 +++++++-- .../dumps/python_strings_bytes_00.dump | 20 +++++ .../dumps/python_strings_bytes_01.dump | 20 +++++ .../dumps/python_strings_bytes_02.dump | 20 +++++ .../dumps/python_strings_bytes_03.dump | 20 +++++ .../dumps/python_strings_bytes_04.dump | 20 +++++ .../dumps/python_strings_bytes_05.dump | 20 +++++ .../testdir/input/python_strings_bytes.py | 90 +++++++++++++++++++ 8 files changed, 245 insertions(+), 7 deletions(-) create mode 100644 runtime/syntax/testdir/dumps/python_strings_bytes_00.dump create mode 100644 runtime/syntax/testdir/dumps/python_strings_bytes_01.dump create mode 100644 runtime/syntax/testdir/dumps/python_strings_bytes_02.dump create mode 100644 runtime/syntax/testdir/dumps/python_strings_bytes_03.dump create mode 100644 runtime/syntax/testdir/dumps/python_strings_bytes_04.dump create mode 100644 runtime/syntax/testdir/dumps/python_strings_bytes_05.dump create mode 100644 runtime/syntax/testdir/input/python_strings_bytes.py diff --git a/runtime/syntax/python.vim b/runtime/syntax/python.vim index 68036f5905..759af2d4ff 100644 --- a/runtime/syntax/python.vim +++ b/runtime/syntax/python.vim @@ -1,9 +1,10 @@ " Vim syntax file " Language: Python " Maintainer: Zvezdan Petkovic -" Last Change: 2023 Feb 28 +" Last Change: 2025 Jul 14 " Credits: Neil Schemenauer " Dmitry Vasiliev +" Rob B " " This version is a major rewrite by Zvezdan Petkovic. " @@ -144,24 +145,48 @@ syn keyword pythonTodo FIXME NOTE NOTES TODO XXX contained " Triple-quoted strings can contain doctests. syn region pythonString matchgroup=pythonQuotes \ start=+[uU]\=\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1" - \ contains=pythonEscape,@Spell + \ contains=pythonEscape,pythonUnicodeEscape,@Spell syn region pythonString matchgroup=pythonTripleQuotes \ start=+[uU]\=\z('''\|"""\)+ end="\z1" keepend - \ contains=pythonEscape,pythonSpaceError,pythonDoctest,@Spell + \ contains=pythonEscape,pythonUnicodeEscape,pythonSpaceError,pythonDoctest,@Spell syn region pythonRawString matchgroup=pythonQuotes - \ start=+[uU]\=[rR]\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1" + \ start=+[rR]\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1" \ contains=@Spell syn region pythonRawString matchgroup=pythonTripleQuotes - \ start=+[uU]\=[rR]\z('''\|"""\)+ end="\z1" keepend + \ start=+[rR]\z('''\|"""\)+ end="\z1" keepend \ contains=pythonSpaceError,pythonDoctest,@Spell +" Bytes +syn region pythonBytes + \ matchgroup=pythonQuotes + \ start=+\cB\z(['"]\)+ + \ end="\z1" + \ skip="\\\\\|\\\z1" + \ contains=pythonEscape +syn region pythonBytes + \ matchgroup=pythonTripleQuotes + \ start=+\cB\z('''\|"""\)+ + \ end="\z1" + \ keepend + \ contains=pythonEscape +syn region pythonRawBytes + \ matchgroup=pythonQuotes + \ start=+\c\%(BR\|RB\)\z(['"]\)+ + \ end="\z1" + \ skip="\\\\\|\\\z1" +syn region pythonRawBytes + \ matchgroup=pythonTripleQuotes + \ start=+\c\%(BR\|RB\)\z('''\|"""\)+ + \ end="\z1" + \ keepend + syn match pythonEscape +\\[abfnrtv'"\\]+ contained syn match pythonEscape "\\\o\{1,3}" contained syn match pythonEscape "\\x\x\{2}" contained -syn match pythonEscape "\%(\\u\x\{4}\|\\U\x\{8}\)" contained +syn match pythonUnicodeEscape "\%(\\u\x\{4}\|\\U\x\{8}\)" contained " Python allows case-insensitive Unicode IDs: http://www.unicode.org/charts/ " The specification: https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-4/#G135165 -syn match pythonEscape "\\N{\a\+\%(\%(\s\a\+[[:alnum:]]*\)\|\%(-[[:alnum:]]\+\)\)*}" contained +syn match pythonUnicodeEscape "\\N{\a\+\%(\%(\s\a\+[[:alnum:]]*\)\|\%(-[[:alnum:]]\+\)\)*}" contained syn match pythonEscape "\\$" " It is very important to understand all details before changing the @@ -313,9 +338,12 @@ hi def link pythonComment Comment hi def link pythonTodo Todo hi def link pythonString String hi def link pythonRawString String +hi def link pythonBytes String +hi def link pythonRawBytes String hi def link pythonQuotes String hi def link pythonTripleQuotes pythonQuotes hi def link pythonEscape Special +hi def link pythonUnicodeEscape pythonEscape if !exists("python_no_number_highlight") hi def link pythonNumber Number endif diff --git a/runtime/syntax/testdir/dumps/python_strings_bytes_00.dump b/runtime/syntax/testdir/dumps/python_strings_bytes_00.dump new file mode 100644 index 0000000000..278acab6b5 --- /dev/null +++ b/runtime/syntax/testdir/dumps/python_strings_bytes_00.dump @@ -0,0 +1,20 @@ +>#+0#0000e05#ffffff0| |S|t|r|i|n|g| |a|n|d| |B|y|t|e|s| |l|i|t|e|r|a|l|s| +0#0000000&@47 +|#+0#0000e05&| |h|t@1|p|s|:|/@1|d|o|c|s|.|p|y|t|h|o|n|.|o|r|g|/|3|/|r|e|f|e|r|e|n|c|e|/|l|e|x|i|c|a|l|_|a|n|a|l|y|s|i|s|.|h|t|m|l|#|s|t|r|i|n|g|-|a|n|d|-|b|y|t|e +|s|-|l|i|t|e|r|a|l|s| +0#0000000&@64 +@75 +|#+0#0000e05&| |S|t|r|i|n|g|s| +0#0000000&@65 +|t|e|s|t| |=| |'+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@29 +|t|e|s|t| |=| |"+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@32 +|t|e|s|t| |=| |'+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5| +0#e000002&|a|n|d| |\+0#e000e06&|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A|T|I +|O|N| |M|A|R|K|}|'+0#e000002&| +0#0000000&@65 +|t|e|s|t| |=| |"+0#e000002&|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|d| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h| |a|n|d| |i|g|n|o|r|e|d| |\+0#e000e06&| +0#0000000&@20 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |'+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|'+0#e000002&@2| +0#0000000&@38 +|t|e|s|t| |=| |"+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|"+0#e000002&@2| +0#0000000&@38 +@75 +|#+0#0000e05&| |R|a|w| |s|t|r|i|n|g|s| +0#0000000&@61 +@57|1|,|1| @10|T|o|p| diff --git a/runtime/syntax/testdir/dumps/python_strings_bytes_01.dump b/runtime/syntax/testdir/dumps/python_strings_bytes_01.dump new file mode 100644 index 0000000000..2684b01c5c --- /dev/null +++ b/runtime/syntax/testdir/dumps/python_strings_bytes_01.dump @@ -0,0 +1,20 @@ +|a+0#e000002#ffffff0|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|'+0#e000002&@2| +0#0000000&@38 +|t|e|s|t| |=| |"+0#e000002&@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|"+0#e000002&@2| +0#0000000&@38 +@75 +>#+0#0000e05&| |R|a|w| |s|t|r|i|n|g|s| +0#0000000&@61 +|t|e|s|t| |=| |r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@24 +|t|e|s|t| |=| |R+0#e000002&|"|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@27 +|t|e|s|t| |=| |r+0#e000002&|'|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L +|A|M|A|T|I|O|N| |M|A|R|K|}|'| +0#0000000&@60 +|t|e|s|t| |=| |R+0#e000002&|"|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |l|i|t|e|r|a|l| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@13 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |r+0#e000002&|'@2|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@33 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38 +|t|e|s|t| |=| |R+0#e000002&|"@2|R|a|w| |s|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@33 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38 +@75 +@57|1|7|,|1| @9|1|5|%| diff --git a/runtime/syntax/testdir/dumps/python_strings_bytes_02.dump b/runtime/syntax/testdir/dumps/python_strings_bytes_02.dump new file mode 100644 index 0000000000..c804bc3e7e --- /dev/null +++ b/runtime/syntax/testdir/dumps/python_strings_bytes_02.dump @@ -0,0 +1,20 @@ +| +0&#ffffff0@74 +|#+0#0000e05&| |B|y|t|e|s| +0#0000000&@67 +|t|e|s|t| |=| |b+0#e000002&|'|B|y|t|e|s| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@29 +|t|e|s|t| |=| |B+0#e000002&|"|B|y|t|e|s| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@32 +|t|e|s|t| |=| |b+0#e000002&|'|B|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A|T|I +|O|N| |M|A|R|K|}|'| +0#0000000&@65 +>t|e|s|t| |=| |B+0#e000002&|"|B|y|t|e|s| |w|i|t|h| |e|s|c|a|p|e|d| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h| |a|n|d| |i|g|n|o|r|e|d| |\+0#e000e06&| +0#0000000&@20 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |b+0#e000002&|'@2|B|y|t|e|s| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38 +|t|e|s|t| |=| |B+0#e000002&|"@2|B|y|t|e|s| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@38 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38 +@75 +|#+0#0000e05&| |R|a|w| |b|y|t|e|s| +0#0000000&@63 +|t|e|s|t| |=| |b+0#e000002&|r|'|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@24 +|t|e|s|t| |=| |b+0#e000002&|R|"|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@27 +|t|e|s|t| |=| |B+0#e000002&|r|'|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|@+0#4040ff13&@2 +| +0#0000000&@56|3|4|,|1| @9|3|8|%| diff --git a/runtime/syntax/testdir/dumps/python_strings_bytes_03.dump b/runtime/syntax/testdir/dumps/python_strings_bytes_03.dump new file mode 100644 index 0000000000..27087d93b5 --- /dev/null +++ b/runtime/syntax/testdir/dumps/python_strings_bytes_03.dump @@ -0,0 +1,20 @@ +|t+0&#ffffff0|e|s|t| |=| |B+0#e000002&|r|'|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L +|A|M|A|T|I|O|N| |M|A|R|K|}|'| +0#0000000&@60 +|t|e|s|t| |=| |B+0#e000002&|R|"|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@13 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |r+0#e000002&|b|'|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@24 +>t|e|s|t| |=| |r+0#e000002&|B|"|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@27 +|t|e|s|t| |=| |R+0#e000002&|b|'|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L +|A|M|A|T|I|O|N| |M|A|R|K|}|'| +0#0000000&@60 +|t|e|s|t| |=| |R+0#e000002&|B|"|R|a|w| |b|y|t|e|s| |w|i|t|h| |l|i|t|e|r|a|l| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@13 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |b+0#e000002&|r|'@2|R|a|w| |b|y|t|e|s| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@33 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@38 +|t|e|s|t| |=| |R+0#e000002&|B|"@2|R|a|w| |b|y|t|e|s| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@33 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |l|i|t|e|r|a|l| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|"@2| +0#0000000&@38 +@75 +|#+0#0000e05&| |U|n|i|c|o|d|e| |l|i|t|e|r|a|l|s|:| |P|r|e|f|i|x| |i|s| |a|l@1|o|w|e|d| |b|u|t| |i|g|n|o|r|e|d| |(|h|t@1|p|s|:|/@1|p|e|p|s|.|p|y|t|h|o|n|.|o|r|g|/ +|p|e|p|-|0|4|1|4|)| +0#0000000&@65 +@57|5|0|,|1| @9|6|0|%| diff --git a/runtime/syntax/testdir/dumps/python_strings_bytes_04.dump b/runtime/syntax/testdir/dumps/python_strings_bytes_04.dump new file mode 100644 index 0000000000..545e722c56 --- /dev/null +++ b/runtime/syntax/testdir/dumps/python_strings_bytes_04.dump @@ -0,0 +1,20 @@ +|#+0#0000e05#ffffff0| |U|n|i|c|o|d|e| |l|i|t|e|r|a|l|s|:| |P|r|e|f|i|x| |i|s| |a|l@1|o|w|e|d| |b|u|t| |i|g|n|o|r|e|d| |(|h|t@1|p|s|:|/@1|p|e|p|s|.|p|y|t|h|o|n|.|o|r|g|/ +|p|e|p|-|0|4|1|4|)| +0#0000000&@65 +|t|e|s|t| |=| |u+0#e000002&|'|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@28 +|t|e|s|t| |=| |U+0#e000002&|"|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@31 +|t|e|s|t| |=| |u+0#e000002&|'|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5| +0#e000002&|a|n|d| |\+0#e000e06&|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A|T +|I|O|N| |M|A|R|K|}|'+0#e000002&| +0#0000000&@64 +>t|e|s|t| |=| |U+0#e000002&|"|S|t|r|i|n|g| |w|i|t|h| |e|s|c|a|p|e|d| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h| |a|n|d| |i|g|n|o|r|e|d| |\+0#e000e06&| +0#0000000&@19 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |u+0#e000002&|'@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|'+0#e000002&@2| +0#0000000&@38 +|t|e|s|t| |=| |U+0#e000002&|"@2|S|t|r|i|n|g| |w|i|t|h| |q|u|o|t|e|s| |'| |a|n|d| |"| +0#0000000&@37 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@42 +|a+0#e000002&|n|d| |e|s|c|a|p|e|s| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|"+0#e000002&@2| +0#0000000&@38 +@75 +|#+0#0000e05&| |R|a|w| |U|n|i|c|o|d|e| |l|i|t|e|r|a|l|s| |a|r|e| |n|o|t| |a|l@1|o|w|e|d| +0#0000000&@36 +|t|e|s|t| |=| |u|r+0#e000002&|'|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\|'| |a|n|d| |\|"| |a|n|d| |\|t|'| +0#0000000&@27 +|t|e|s|t| |=| |u|R+0#e000002&|"|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\|0|4|0| |a|n|d| |\|x|F@1|"| +0#0000000&@30 +|t|e|s|t| |=| |U|r+0#e000002&|'|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|@+0#4040ff13&@2 +| +0#0000000&@56|6|5|,|1| @9|8|1|%| diff --git a/runtime/syntax/testdir/dumps/python_strings_bytes_05.dump b/runtime/syntax/testdir/dumps/python_strings_bytes_05.dump new file mode 100644 index 0000000000..e403cb2576 --- /dev/null +++ b/runtime/syntax/testdir/dumps/python_strings_bytes_05.dump @@ -0,0 +1,20 @@ +|t+0&#ffffff0|e|s|t| |=| |U|r+0#e000002&|'|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5| |a|n|d| |\|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A +|T|I|O|N| |M|A|R|K|}|'| +0#0000000&@63 +|t|e|s|t| |=| |U|R+0#e000002&|"|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\@1| |b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\| +0#0000000&@16 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |r|u+0#e000002&|'|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\+0#e000e06&|'| +0#e000002&|a|n|d| |\+0#e000e06&|"| +0#e000002&|a|n|d| |\+0#e000e06&|t|'+0#e000002&| +0#0000000&@27 +>t|e|s|t| |=| |r|U+0#e000002&|"|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1|"+0#e000002&| +0#0000000&@30 +|t|e|s|t| |=| |R|u+0#e000002&|'|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5| +0#e000002&|a|n|d| |\+0#e000e06&|N|{|I|N|V|E|R|T|E|D| |E|X|C|L|A|M|A +|T|I|O|N| |M|A|R|K|}|'+0#e000002&| +0#0000000&@63 +|t|e|s|t| |=| |R|U+0#e000002&|"|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |\+0#e000e06&@1| +0#e000002&|b|a|c|k|s|l|a|s|h|e|s| |a|n|d| |l|i|t|e|r|a|l| |\+0#e000e06&| +0#0000000&@16 +|n+0#e000002&|e|w|l|i|n|e|"| +0#0000000&@66 +|t|e|s|t| |=| |u|r+0#e000002&|'@2|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |'| |a|n|d| |"| +0#0000000&@35 +|a+0#e000002&|n|d| |\|t| |a|n|d| |\|0|4|0| |a|n|d| |\|x|F@1| +0#0000000&@50 +|a+0#e000002&|n|d| |\|u|0@1|A|1| |a|n|d| |\|U|0@2|1|0|6|0|5|'@2| +0#0000000&@46 +|t|e|s|t| |=| |R|U+0#e000002&|"@2|I|n|v|a|l|i|d| |s|t|r|i|n|g| |w|i|t|h| |'| |a|n|d| |"| +0#0000000&@35 +|a+0#e000002&|n|d| |\+0#e000e06&|t| +0#e000002&|a|n|d| |\+0#e000e06&|0|4|0| +0#e000002&|a|n|d| |\+0#e000e06&|x|F@1| +0#0000000&@50 +|a+0#e000002&|n|d| |\+0#e000e06&|u|0@1|A|1| +0#e000002&|a|n|d| |\+0#e000e06&|U|0@2|1|0|6|0|5|"+0#e000002&@2| +0#0000000&@46 +|~+0#4040ff13&| @73 +|~| @73 +|~| @73 +| +0#0000000&@56|8|1|,|1| @9|B|o|t| diff --git a/runtime/syntax/testdir/input/python_strings_bytes.py b/runtime/syntax/testdir/input/python_strings_bytes.py new file mode 100644 index 0000000000..b4a536ca85 --- /dev/null +++ b/runtime/syntax/testdir/input/python_strings_bytes.py @@ -0,0 +1,90 @@ +# String and Bytes literals +# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + +# Strings +test = 'String with escapes \' and \" and \t' +test = "String with escapes \040 and \xFF" +test = 'String with escapes \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = "String with escaped \\ backslash and ignored \ +newline" +test = '''String with quotes ' and " +and escapes \t and \040 and \xFF +and escapes \u00A1 and \U00010605''' +test = """String with quotes ' and " +and escapes \t and \040 and \xFF +and escapes \u00A1 and \U00010605""" + +# Raw strings +test = r'Raw string with literal \' and \" and \t' +test = R"Raw string with literal \040 and \xFF" +test = r'Raw string with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = R"Raw string with literal \\ backslashes and literal \ +newline" +test = r'''Raw string with quotes ' and " +and literal \t and \040 and \xFF +and literal \u00A1 and \U00010605''' +test = R"""Raw string with quotes ' and " +and literal \t and \040 and \xFF +and literal \u00A1 and \U00010605""" + +# Bytes +test = b'Bytes with escapes \' and \" and \t' +test = B"Bytes with escapes \040 and \xFF" +test = b'Bytes with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = B"Bytes with escaped \\ backslash and ignored \ +newline" +test = b'''Bytes with quotes ' and " +and escapes \t and \040 and \xFF +and literal \u00A1 and \U00010605''' +test = B"""Bytes with quotes ' and " +and escapes \t and \040 and \xFF +and literal \u00A1 and \U00010605""" + +# Raw bytes +test = br'Raw bytes with literal \' and \" and \t' +test = bR"Raw bytes with literal \040 and \xFF" +test = Br'Raw bytes with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = BR"Raw bytes with literal \\ backslashes and literal \ +newline" +test = rb'Raw bytes with literal \' and \" and \t' +test = rB"Raw bytes with literal \040 and \xFF" +test = Rb'Raw bytes with literal \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = RB"Raw bytes with literal \\ backslashes and literal \ +newline" +test = br'''Raw bytes with quotes ' and " +and literal \t and \040 and \xFF +and literal \u00A1 and \U00010605''' +test = RB"""Raw bytes with quotes ' and " +and literal \t and \040 and \xFF +and literal \u00A1 and \U00010605""" + +# Unicode literals: Prefix is allowed but ignored (https://peps.python.org/pep-0414) +test = u'String with escapes \' and \" and \t' +test = U"String with escapes \040 and \xFF" +test = u'String with escapes \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = U"String with escaped \\ backslash and ignored \ +newline" +test = u'''String with quotes ' and " +and escapes \t and \040 and \xFF +and escapes \u00A1 and \U00010605''' +test = U"""String with quotes ' and " +and escapes \t and \040 and \xFF +and escapes \u00A1 and \U00010605""" + +# Raw Unicode literals are not allowed +test = ur'Invalid string with \' and \" and \t' +test = uR"Invalid string with \040 and \xFF" +test = Ur'Invalid string with \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = UR"Invalid string with \\ backslashes and literal \ +newline" +test = ru'Invalid string with \' and \" and \t' +test = rU"Invalid string with \040 and \xFF" +test = Ru'Invalid string with \u00A1 and \U00010605 and \N{INVERTED EXCLAMATION MARK}' +test = RU"Invalid string with \\ backslashes and literal \ +newline" +test = ur'''Invalid string with ' and " +and \t and \040 and \xFF +and \u00A1 and \U00010605''' +test = RU"""Invalid string with ' and " +and \t and \040 and \xFF +and \u00A1 and \U00010605"""