monaco-editor/src/elixir/elixir.ts
2021-04-09 18:05:42 +02:00

631 lines
17 KiB
TypeScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { languages } from '../fillers/monaco-editor-core';
export const conf: languages.LanguageConfiguration = {
comments: {
lineComment: '#'
},
brackets: [
['{', '}'],
['[', ']'],
['(', ')']
],
surroundingPairs: [
{ open: '{', close: '}' },
{ open: '[', close: ']' },
{ open: '(', close: ')' },
{ open: "'", close: "'" },
{ open: '"', close: '"' }
],
autoClosingPairs: [
{ open: "'", close: "'", notIn: ['string', 'comment'] },
{ open: '"', close: '"', notIn: ['comment'] },
{ open: '"""', close: '"""' },
{ open: '`', close: '`', notIn: ['string', 'comment'] },
{ open: '(', close: ')' },
{ open: '{', close: '}' },
{ open: '[', close: ']' },
{ open: '<<', close: '>>' }
],
indentationRules: {
increaseIndentPattern: /^\s*(after|else|catch|rescue|fn|[^#]*(do|<\-|\->|\{|\[|\=))\s*$/,
decreaseIndentPattern: /^\s*((\}|\])\s*$|(after|else|catch|rescue|end)\b)/
}
};
/**
* A Monarch lexer for the Elixir language.
*
* References:
*
* * Monarch documentation - https://microsoft.github.io/monaco-editor/monarch.html
* * Elixir lexer - https://github.com/elixir-makeup/makeup_elixir/blob/master/lib/makeup/lexers/elixir_lexer.ex
* * TextMate lexer (elixir-tmbundle) - https://github.com/elixir-editors/elixir-tmbundle/blob/master/Syntaxes/Elixir.tmLanguage
* * TextMate lexer (vscode-elixir-ls) - https://github.com/elixir-lsp/vscode-elixir-ls/blob/master/syntaxes/elixir.json
*/
export const language = <languages.IMonarchLanguage>{
defaultToken: 'source',
tokenPostfix: '.elixir',
brackets: [
{ open: '[', close: ']', token: 'delimiter.square' },
{ open: '(', close: ')', token: 'delimiter.parenthesis' },
{ open: '{', close: '}', token: 'delimiter.curly' },
{ open: '<<', close: '>>', token: 'delimiter.angle.special' }
],
// Below are lists/regexps to which we reference later.
declarationKeywords: [
'def',
'defp',
'defn',
'defnp',
'defguard',
'defguardp',
'defmacro',
'defmacrop',
'defdelegate',
'defcallback',
'defmacrocallback',
'defmodule',
'defprotocol',
'defexception',
'defimpl',
'defstruct'
],
operatorKeywords: ['and', 'in', 'not', 'or', 'when'],
namespaceKeywords: ['alias', 'import', 'require', 'use'],
otherKeywords: [
'after',
'case',
'catch',
'cond',
'do',
'else',
'end',
'fn',
'for',
'if',
'quote',
'raise',
'receive',
'rescue',
'super',
'throw',
'try',
'unless',
'unquote_splicing',
'unquote',
'with'
],
constants: ['true', 'false', 'nil'],
nameBuiltin: ['__MODULE__', '__DIR__', '__ENV__', '__CALLER__', '__STACKTRACE__'],
// Matches any of the operator names:
// <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|> == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~ :: .. = < > + - * / | . ^ & !
operator: /-[->]?|!={0,2}|\*|\/|\\\\|&{1,3}|\.\.?|\^(?:\^\^)?|\+\+?|<(?:-|<<|=|>|\|>|~>?)?|=~|={1,3}|>(?:=|>>)?|\|~>|\|>|\|{1,3}|~>>?|~~~|::/,
// See https://hexdocs.pm/elixir/syntax-reference.html#variables
variableName: /[a-z_][a-zA-Z0-9_]*[?!]?/,
// See https://hexdocs.pm/elixir/syntax-reference.html#atoms
atomName: /[a-zA-Z_][a-zA-Z0-9_@]*[?!]?|@specialAtomName|@operator/,
specialAtomName: /\.\.\.|<<>>|%\{\}|%|\{\}/,
aliasPart: /[A-Z][a-zA-Z0-9_]*/,
moduleName: /@aliasPart(?:\.@aliasPart)*/,
// Sigil pairs are: """ """, ''' ''', " ", ' ', / /, | |, < >, { }, [ ], ( )
sigilSymmetricDelimiter: /"""|'''|"|'|\/|\|/,
sigilStartDelimiter: /@sigilSymmetricDelimiter|<|\{|\[|\(/,
sigilEndDelimiter: /@sigilSymmetricDelimiter|>|\}|\]|\)/,
decimal: /\d(?:_?\d)*/,
hex: /[0-9a-fA-F](_?[0-9a-fA-F])*/,
octal: /[0-7](_?[0-7])*/,
binary: /[01](_?[01])*/,
// See https://hexdocs.pm/elixir/master/String.html#module-escape-characters
escape: /\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}|\\./,
// The keys below correspond to tokenizer states.
// We start from the root state and match against its rules
// until we explicitly transition into another state.
// The `include` simply brings in all operations from the given state
// and is useful for improving readability.
tokenizer: {
root: [
{ include: '@whitespace' },
{ include: '@comments' },
// Keywords start as either an identifier or a string,
// but end with a : so it's important to match this first.
{ include: '@keywordsShorthand' },
{ include: '@numbers' },
{ include: '@identifiers' },
{ include: '@strings' },
{ include: '@atoms' },
{ include: '@sigils' },
{ include: '@attributes' },
{ include: '@symbols' }
],
// Whitespace
whitespace: [[/\s+/, 'white']],
// Comments
comments: [[/(#)(.*)/, ['comment.punctuation', 'comment']]],
// Keyword list shorthand
keywordsShorthand: [
[/(@atomName)(:)/, ['constant', 'constant.punctuation']],
// Use positive look-ahead to ensure the string is followed by :
// and should be considered a keyword.
[
/"(?=([^"]|#\{.*?\}|\\")*":)/,
{ token: 'constant.delimiter', next: '@doubleQuotedStringKeyword' }
],
[
/'(?=([^']|#\{.*?\}|\\')*':)/,
{ token: 'constant.delimiter', next: '@singleQuotedStringKeyword' }
]
],
doubleQuotedStringKeyword: [
[/":/, { token: 'constant.delimiter', next: '@pop' }],
{ include: '@stringConstantContentInterpol' }
],
singleQuotedStringKeyword: [
[/':/, { token: 'constant.delimiter', next: '@pop' }],
{ include: '@stringConstantContentInterpol' }
],
// Numbers
numbers: [
[/0b@binary/, 'number.binary'],
[/0o@octal/, 'number.octal'],
[/0x@hex/, 'number.hex'],
[/@decimal\.@decimal([eE]-?@decimal)?/, 'number.float'],
[/@decimal/, 'number']
],
// Identifiers
identifiers: [
// Tokenize identifier name in function-like definitions.
// Note: given `def a + b, do: nil`, `a` is not a function name,
// so we use negative look-ahead to ensure there's no operator.
[
/\b(defp?|defnp?|defmacrop?|defguardp?|defdelegate)(\s+)(@variableName)(?!\s+@operator)/,
[
'keyword.declaration',
'white',
{
cases: {
unquote: 'keyword',
'@default': 'function'
}
}
]
],
// Tokenize function calls
[
// In-scope call - an identifier followed by ( or .(
/(@variableName)(?=\s*\.?\s*\()/,
{
cases: {
// Tokenize as keyword in cases like `if(..., do: ..., else: ...)`
'@declarationKeywords': 'keyword.declaration',
'@namespaceKeywords': 'keyword',
'@otherKeywords': 'keyword',
'@default': 'function.call'
}
}
],
[
// Referencing function in a module
/(@moduleName)(\s*)(\.)(\s*)(@variableName)/,
['type.identifier', 'white', 'operator', 'white', 'function.call']
],
[
// Referencing function in an Erlang module
/(:)(@atomName)(\s*)(\.)(\s*)(@variableName)/,
['constant.punctuation', 'constant', 'white', 'operator', 'white', 'function.call']
],
[
// Piping into a function (tokenized separately as it may not have parentheses)
/(\|>)(\s*)(@variableName)/,
[
'operator',
'white',
{
cases: {
'@otherKeywords': 'keyword',
'@default': 'function.call'
}
}
]
],
[
// Function reference passed to another function
/(&)(\s*)(@variableName)/,
['operator', 'white', 'function.call']
],
// Language keywords, builtins, constants and variables
[
/@variableName/,
{
cases: {
'@declarationKeywords': 'keyword.declaration',
'@operatorKeywords': 'keyword.operator',
'@namespaceKeywords': 'keyword',
'@otherKeywords': 'keyword',
'@constants': 'constant.language',
'@nameBuiltin': 'variable.language',
'_.*': 'comment.unused',
'@default': 'identifier'
}
}
],
// Module names
[/@moduleName/, 'type.identifier']
],
// Strings
strings: [
[/"""/, { token: 'string.delimiter', next: '@doubleQuotedHeredoc' }],
[/'''/, { token: 'string.delimiter', next: '@singleQuotedHeredoc' }],
[/"/, { token: 'string.delimiter', next: '@doubleQuotedString' }],
[/'/, { token: 'string.delimiter', next: '@singleQuotedString' }]
],
doubleQuotedHeredoc: [
[/"""/, { token: 'string.delimiter', next: '@pop' }],
{ include: '@stringContentInterpol' }
],
singleQuotedHeredoc: [
[/'''/, { token: 'string.delimiter', next: '@pop' }],
{ include: '@stringContentInterpol' }
],
doubleQuotedString: [
[/"/, { token: 'string.delimiter', next: '@pop' }],
{ include: '@stringContentInterpol' }
],
singleQuotedString: [
[/'/, { token: 'string.delimiter', next: '@pop' }],
{ include: '@stringContentInterpol' }
],
// Atoms
atoms: [
[/(:)(@atomName)/, ['constant.punctuation', 'constant']],
[/:"/, { token: 'constant.delimiter', next: '@doubleQuotedStringAtom' }],
[/:'/, { token: 'constant.delimiter', next: '@singleQuotedStringAtom' }]
],
doubleQuotedStringAtom: [
[/"/, { token: 'constant.delimiter', next: '@pop' }],
{ include: '@stringConstantContentInterpol' }
],
singleQuotedStringAtom: [
[/'/, { token: 'constant.delimiter', next: '@pop' }],
{ include: '@stringConstantContentInterpol' }
],
// Sigils
// See https://elixir-lang.org/getting-started/sigils.html
// Sigils allow for typing values using their textual representation.
// All sigils start with ~ followed by a letter indicating sigil type
// and then a delimiter pair enclosing the textual representation.
// Optional modifiers are allowed after the closing delimiter.
// For instance a regular expressions can be written as:
// ~r/foo|bar/ ~r{foo|bar} ~r/foo|bar/g
//
// In general lowercase sigils allow for interpolation
// and escaped characters, whereas uppercase sigils don't
//
// During tokenization we want to distinguish some
// specific sigil types, namely string and regexp,
// so that they cen be themed separately.
//
// To reasonably handle all those combinations we leverage
// dot-separated states, so if we transition to @sigilStart.interpol.s.{.}
// then "sigilStart.interpol.s" state will match and also all
// the individual dot-separated parameters can be accessed.
sigils: [
[/~[a-z]@sigilStartDelimiter/, { token: '@rematch', next: '@sigil.interpol' }],
[/~[A-Z]@sigilStartDelimiter/, { token: '@rematch', next: '@sigil.noInterpol' }]
],
sigil: [
[/~([a-zA-Z])\{/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.{.}' }],
[/~([a-zA-Z])\[/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.[.]' }],
[/~([a-zA-Z])\(/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.(.)' }],
[/~([a-zA-Z])\</, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.<.>' }],
[
/~([a-zA-Z])(@sigilSymmetricDelimiter)/,
{ token: '@rematch', switchTo: '@sigilStart.$S2.$1.$2.$2' }
]
],
// The definitions below expect states to be of the form:
//
// sigilStart.<interpol-or-noInterpol>.<sigil-letter>.<start-delimiter>.<end-delimiter>
// sigilContinue.<interpol-or-noInterpol>.<sigil-letter>.<start-delimiter>.<end-delimiter>
//
// The sigilStart state is used only to properly classify the token (as string/regex/sigil)
// and immediately switches to the sigilContinue sate, which handles the actual content
// and waits for the corresponding end delimiter.
'sigilStart.interpol.s': [
[
/~s@sigilStartDelimiter/,
{
token: 'string.delimiter',
switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
}
]
],
'sigilContinue.interpol.s': [
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
'$1==$S5': { token: 'string.delimiter', next: '@pop' },
'@default': 'string'
}
}
],
{ include: '@stringContentInterpol' }
],
'sigilStart.noInterpol.S': [
[
/~S@sigilStartDelimiter/,
{
token: 'string.delimiter',
switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
}
]
],
'sigilContinue.noInterpol.S': [
// Ignore escaped sigil end
[/(^|[^\\])\\@sigilEndDelimiter/, 'string'],
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
'$1==$S5': { token: 'string.delimiter', next: '@pop' },
'@default': 'string'
}
}
],
{ include: '@stringContent' }
],
'sigilStart.interpol.r': [
[
/~r@sigilStartDelimiter/,
{
token: 'regexp.delimiter',
switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
}
]
],
'sigilContinue.interpol.r': [
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
'$1==$S5': { token: 'regexp.delimiter', next: '@pop' },
'@default': 'regexp'
}
}
],
{ include: '@regexpContentInterpol' }
],
'sigilStart.noInterpol.R': [
[
/~R@sigilStartDelimiter/,
{
token: 'regexp.delimiter',
switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
}
]
],
'sigilContinue.noInterpol.R': [
// Ignore escaped sigil end
[/(^|[^\\])\\@sigilEndDelimiter/, 'regexp'],
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
'$1==$S5': { token: 'regexp.delimiter', next: '@pop' },
'@default': 'regexp'
}
}
],
{ include: '@regexpContent' }
],
// Fallback to the generic sigil by default
'sigilStart.interpol': [
[
/~([a-zA-Z])@sigilStartDelimiter/,
{
token: 'sigil.delimiter',
switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
}
]
],
'sigilContinue.interpol': [
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
'$1==$S5': { token: 'sigil.delimiter', next: '@pop' },
'@default': 'sigil'
}
}
],
{ include: '@sigilContentInterpol' }
],
'sigilStart.noInterpol': [
[
/~([a-zA-Z])@sigilStartDelimiter/,
{
token: 'sigil.delimiter',
switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
}
]
],
'sigilContinue.noInterpol': [
// Ignore escaped sigil end
[/(^|[^\\])\\@sigilEndDelimiter/, 'sigil'],
[
/(@sigilEndDelimiter)[a-zA-Z]*/,
{
cases: {
'$1==$S5': { token: 'sigil.delimiter', next: '@pop' },
'@default': 'sigil'
}
}
],
{ include: '@sigilContent' }
],
// Attributes
attributes: [
// Module @doc* attributes - tokenized as comments
[
/\@(module|type)?doc (~[sS])?"""/,
{
token: 'comment.block.documentation',
next: '@doubleQuotedHeredocDocstring'
}
],
[
/\@(module|type)?doc (~[sS])?"/,
{
token: 'comment.block.documentation',
next: '@doubleQuotedStringDocstring'
}
],
[/\@(module|type)?doc false/, 'comment.block.documentation'],
// Module attributes
[/\@(@variableName)/, 'variable']
],
doubleQuotedHeredocDocstring: [
[/"""/, { token: 'comment.block.documentation', next: '@pop' }],
{ include: '@docstringContent' }
],
doubleQuotedStringDocstring: [
[/"/, { token: 'comment.block.documentation', next: '@pop' }],
{ include: '@docstringContent' }
],
// Operators, punctuation, brackets
symbols: [
// Code point operator (either with regular character ?a or an escaped one ?\n)
[/\?(\\.|[^\\\s])/, 'number.constant'],
// Anonymous function arguments
[/&\d+/, 'operator'],
// Bitshift operators (must go before delimiters, so that << >> don't match first)
[/<<<|>>>/, 'operator'],
// Delimiter pairs
[/[()\[\]\{\}]|<<|>>/, '@brackets'],
// Triple dot is a valid name (must go before operators, so that .. doesn't match instead)
[/\.\.\./, 'identifier'],
// Punctuation => (must go before operators, so it's not tokenized as = then >)
[/=>/, 'punctuation'],
// Operators
[/@operator/, 'operator'],
// Punctuation
[/[:;,.%]/, 'punctuation']
],
// Generic helpers
stringContentInterpol: [
{ include: '@interpolation' },
{ include: '@escapeChar' },
{ include: '@stringContent' }
],
stringContent: [[/./, 'string']],
stringConstantContentInterpol: [
{ include: '@interpolation' },
{ include: '@escapeChar' },
{ include: '@stringConstantContent' }
],
stringConstantContent: [[/./, 'constant']],
regexpContentInterpol: [
{ include: '@interpolation' },
{ include: '@escapeChar' },
{ include: '@regexpContent' }
],
regexpContent: [
// # may be a regular regexp char, so we use a heuristic
// assuming a # surrounded by whitespace is actually a comment.
[/(\s)(#)(\s.*)$/, ['white', 'comment.punctuation', 'comment']],
[/./, 'regexp']
],
sigilContentInterpol: [
{ include: '@interpolation' },
{ include: '@escapeChar' },
{ include: '@sigilContent' }
],
sigilContent: [[/./, 'sigil']],
docstringContent: [[/./, 'comment.block.documentation']],
escapeChar: [[/@escape/, 'constant.character.escape']],
interpolation: [
[/#{/, { token: 'delimiter.bracket.embed', next: '@interpolationContinue' }]
],
interpolationContinue: [
[/}/, { token: 'delimiter.bracket.embed', next: '@pop' }],
// Interpolation brackets may contain arbitrary code,
// so we simply match against all the root rules,
// until we reach interpolation end (the above matches).
{ include: '@root' }
]
}
};