[clojure] Fix tokenization of escapes in strings

The notation for escapes in strings follows that of the host platforms
which is different from the notation for character literals in Clojure
(e.g., "\n" vs `\newline`).
This commit is contained in:
Abdussalam Abdurrahman 2018-12-07 12:30:10 -08:00
parent 4061962186
commit 3f16600ec1
2 changed files with 77 additions and 19 deletions

View file

@ -754,13 +754,72 @@ testTokenization('clojure', [
], 'string'), ], 'string'),
// strings // strings
createTestCases([ [
'\"I\'m a little teapot.\"', {
'\"I\'m a \\\"little\\\" teapot.\"', line: '"I\'m a little teapot."',
'\"I\'m', // this is tokens: [
'a little', // a multi-line {startIndex: 0, type: 'string.clj'},
'teapot.\"' // string ]
], 'string'), },
{
line: '"I\'m a \\"little\\" teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
{startIndex: 7, type: 'string.escape.clj'},
{startIndex: 9, type: 'string.clj'},
{startIndex: 15, type: 'string.escape.clj'},
{startIndex: 17, type: 'string.clj'},
]
}
],
// multi-line strings
[
{
line: '"I\'m',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
},
{
line: '\\"a little\\"',
tokens: [
{startIndex: 0, type: 'string.escape.clj'},
{startIndex: 2, type: 'string.clj'},
{startIndex: 10, type: 'string.escape.clj'},
]
},
{
line: 'teapot."',
tokens: [
{startIndex: 0, type: 'string.clj'},
]
}
],
// strings with other escapes in them (\" \' \\ \b \f \n \r \t)
[{
line: '"the escape \\" \\\' \\\\ \\b \\f \\n \\r \\t characters"',
tokens: [
{startIndex: 0, type: 'string.clj'},
{startIndex: 12, type: 'string.escape.clj'},
{startIndex: 14, type: 'string.clj'},
{startIndex: 15, type: 'string.escape.clj'},
{startIndex: 17, type: 'string.clj'},
{startIndex: 18, type: 'string.escape.clj'},
{startIndex: 20, type: 'string.clj'},
{startIndex: 21, type: 'string.escape.clj'},
{startIndex: 23, type: 'string.clj'},
{startIndex: 24, type: 'string.escape.clj'},
{startIndex: 26, type: 'string.clj'},
{startIndex: 27, type: 'string.escape.clj'},
{startIndex: 29, type: 'string.clj'},
{startIndex: 30, type: 'string.escape.clj'},
{startIndex: 32, type: 'string.clj'},
{startIndex: 33, type: 'string.escape.clj'},
{startIndex: 35, type: 'string.clj'},
]
}],
// comments // comments
createTestCases([ createTestCases([

View file

@ -53,7 +53,7 @@ export const language = <ILanguage>{
characters: /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/, characters: /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/,
escapes: /^\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?/, escapes: /^\\(?:["'\\bfnrt]|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/ // simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
// simple-symbol := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/ // simple-symbol := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
@ -756,16 +756,15 @@ export const language = <ILanguage>{
// symbols // symbols
[/@qualifiedSymbols/, { [/@qualifiedSymbols/, {
cases: { cases: {
'^:.+$': 'constant', // Clojure keywords (e.g., `:foo/bar`) '^:.+$': 'constant', // Clojure keywords (e.g., `:foo/bar`)
'@specialForms': 'keyword', '@specialForms': 'keyword',
'@coreSymbols': 'keyword', '@coreSymbols': 'keyword',
'@constants': 'constant', '@constants': 'constant',
'@default': 'identifier', '@default': 'identifier',
},
}, },
},
], ],
], ],
whitespace: [ whitespace: [
@ -785,9 +784,9 @@ export const language = <ILanguage>{
], ],
multiLineString: [ multiLineString: [
[/[^\\"]+/, 'string'], [/"/, 'string', '@popall'],
[/@escapes/, 'string'], [/@escapes/, 'string.escape'],
[/"/, 'string', '@pop'] [/./, 'string']
], ],
}, },
}; };