diff --git a/src/elixir/elixir.contribution.ts b/src/elixir/elixir.contribution.ts new file mode 100644 index 00000000..aaeafa94 --- /dev/null +++ b/src/elixir/elixir.contribution.ts @@ -0,0 +1,13 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { registerLanguage } from '../_.contribution'; + +registerLanguage({ + id: 'elixir', + extensions: ['.ex', '.exs'], + aliases: ['Elixir', 'elixir', 'ex'], + loader: () => import('./elixir') +}); diff --git a/src/elixir/elixir.test.ts b/src/elixir/elixir.test.ts new file mode 100644 index 00000000..e69de29b diff --git a/src/elixir/elixir.ts b/src/elixir/elixir.ts new file mode 100644 index 00000000..1316cfc2 --- /dev/null +++ b/src/elixir/elixir.ts @@ -0,0 +1,631 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { languages } from '../fillers/monaco-editor-core'; + +export const conf: languages.LanguageConfiguration = { + comments: { + lineComment: '#' + }, + brackets: [ + ['{', '}'], + ['[', ']'], + ['(', ')'] + ], + surroundingPairs: [ + { open: '{', close: '}' }, + { open: '[', close: ']' }, + { open: '(', close: ')' }, + { open: "'", close: "'" }, + { open: '"', close: '"' } + ], + autoClosingPairs: [ + { open: "'", close: "'", notIn: ['string', 'comment'] }, + { open: '"', close: '"', notIn: ['comment'] }, + { open: '"""', close: '"""' }, + { open: '`', close: '`', notIn: ['string', 'comment'] }, + { open: '(', close: ')' }, + { open: '{', close: '}' }, + { open: '[', close: ']' }, + { open: '<<', close: '>>' } + ], + indentationRules: { + increaseIndentPattern: /^\s*(after|else|catch|rescue|fn|[^#]*(do|<\-|\->|\{|\[|\=))\s*$/, + decreaseIndentPattern: /^\s*((\}|\])\s*$|(after|else|catch|rescue|end)\b)/ + } +}; + +/** + * A Monarch lexer for the Elixir language. + * + * References: + * + * * Monarch documentation - https://microsoft.github.io/monaco-editor/monarch.html + * * Elixir lexer - https://github.com/elixir-makeup/makeup_elixir/blob/master/lib/makeup/lexers/elixir_lexer.ex + * * TextMate lexer (elixir-tmbundle) - https://github.com/elixir-editors/elixir-tmbundle/blob/master/Syntaxes/Elixir.tmLanguage + * * TextMate lexer (vscode-elixir-ls) - https://github.com/elixir-lsp/vscode-elixir-ls/blob/master/syntaxes/elixir.json + */ +export const language = { + defaultToken: 'source', + tokenPostfix: '.elixir', + + brackets: [ + { open: '[', close: ']', token: 'delimiter.square' }, + { open: '(', close: ')', token: 'delimiter.parenthesis' }, + { open: '{', close: '}', token: 'delimiter.curly' }, + { open: '<<', close: '>>', token: 'delimiter.angle.special' } + ], + + // Below are lists/regexps to which we reference later. + + declarationKeywords: [ + 'def', + 'defp', + 'defn', + 'defnp', + 'defguard', + 'defguardp', + 'defmacro', + 'defmacrop', + 'defdelegate', + 'defcallback', + 'defmacrocallback', + 'defmodule', + 'defprotocol', + 'defexception', + 'defimpl', + 'defstruct' + ], + operatorKeywords: ['and', 'in', 'not', 'or', 'when'], + namespaceKeywords: ['alias', 'import', 'require', 'use'], + otherKeywords: [ + 'after', + 'case', + 'catch', + 'cond', + 'do', + 'else', + 'end', + 'fn', + 'for', + 'if', + 'quote', + 'raise', + 'receive', + 'rescue', + 'super', + 'throw', + 'try', + 'unless', + 'unquote_splicing', + 'unquote', + 'with' + ], + constants: ['true', 'false', 'nil'], + nameBuiltin: ['__MODULE__', '__DIR__', '__ENV__', '__CALLER__', '__STACKTRACE__'], + + // Matches any of the operator names: + // <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|> == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~ :: .. = < > + - * / | . ^ & ! + operator: /-[->]?|!={0,2}|\*|\/|\\\\|&{1,3}|\.\.?|\^(?:\^\^)?|\+\+?|<(?:-|<<|=|>|\|>|~>?)?|=~|={1,3}|>(?:=|>>)?|\|~>|\|>|\|{1,3}|~>>?|~~~|::/, + + // See https://hexdocs.pm/elixir/syntax-reference.html#variables + variableName: /[a-z_][a-zA-Z0-9_]*[?!]?/, + + // See https://hexdocs.pm/elixir/syntax-reference.html#atoms + atomName: /[a-zA-Z_][a-zA-Z0-9_@]*[?!]?|@specialAtomName|@operator/, + specialAtomName: /\.\.\.|<<>>|%\{\}|%|\{\}/, + + aliasPart: /[A-Z][a-zA-Z0-9_]*/, + moduleName: /@aliasPart(?:\.@aliasPart)*/, + + // Sigil pairs are: """ """, ''' ''', " ", ' ', / /, | |, < >, { }, [ ], ( ) + sigilSymmetricDelimiter: /"""|'''|"|'|\/|\|/, + sigilStartDelimiter: /@sigilSymmetricDelimiter|<|\{|\[|\(/, + sigilEndDelimiter: /@sigilSymmetricDelimiter|>|\}|\]|\)/, + + decimal: /\d(?:_?\d)*/, + hex: /[0-9a-fA-F](_?[0-9a-fA-F])*/, + octal: /[0-7](_?[0-7])*/, + binary: /[01](_?[01])*/, + + // See https://hexdocs.pm/elixir/master/String.html#module-escape-characters + escape: /\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}|\\./, + + // The keys below correspond to tokenizer states. + // We start from the root state and match against its rules + // until we explicitly transition into another state. + // The `include` simply brings in all operations from the given state + // and is useful for improving readability. + tokenizer: { + root: [ + { include: '@whitespace' }, + { include: '@comments' }, + // Keywords start as either an identifier or a string, + // but end with a : so it's important to match this first. + { include: '@keywordsShorthand' }, + { include: '@numbers' }, + { include: '@identifiers' }, + { include: '@strings' }, + { include: '@atoms' }, + { include: '@sigils' }, + { include: '@attributes' }, + { include: '@symbols' } + ], + + // Whitespace + + whitespace: [[/\s+/, 'white']], + + // Comments + + comments: [[/(#)(.*)/, ['comment.punctuation', 'comment']]], + + // Keyword list shorthand + + keywordsShorthand: [ + [/(@atomName)(:)/, ['constant', 'constant.punctuation']], + // Use positive look-ahead to ensure the string is followed by : + // and should be considered a keyword. + [ + /"(?=([^"]|#\{.*?\}|\\")*":)/, + { token: 'constant.delimiter', next: '@doubleQuotedStringKeyword' } + ], + [ + /'(?=([^']|#\{.*?\}|\\')*':)/, + { token: 'constant.delimiter', next: '@singleQuotedStringKeyword' } + ] + ], + + doubleQuotedStringKeyword: [ + [/":/, { token: 'constant.delimiter', next: '@pop' }], + { include: '@stringConstantContentInterpol' } + ], + + singleQuotedStringKeyword: [ + [/':/, { token: 'constant.delimiter', next: '@pop' }], + { include: '@stringConstantContentInterpol' } + ], + + // Numbers + + numbers: [ + [/0b@binary/, 'number.binary'], + [/0o@octal/, 'number.octal'], + [/0x@hex/, 'number.hex'], + [/@decimal\.@decimal([eE]-?@decimal)?/, 'number.float'], + [/@decimal/, 'number'] + ], + + // Identifiers + + identifiers: [ + // Tokenize identifier name in function-like definitions. + // Note: given `def a + b, do: nil`, `a` is not a function name, + // so we use negative look-ahead to ensure there's no operator. + [ + /\b(defp?|defnp?|defmacrop?|defguardp?|defdelegate)(\s+)(@variableName)(?!\s+@operator)/, + [ + 'keyword.declaration', + 'white', + { + cases: { + unquote: 'keyword', + '@default': 'function' + } + } + ] + ], + // Tokenize function calls + [ + // In-scope call - an identifier followed by ( or .( + /(@variableName)(?=\s*\.?\s*\()/, + { + cases: { + // Tokenize as keyword in cases like `if(..., do: ..., else: ...)` + '@declarationKeywords': 'keyword.declaration', + '@namespaceKeywords': 'keyword', + '@otherKeywords': 'keyword', + '@default': 'function.call' + } + } + ], + [ + // Referencing function in a module + /(@moduleName)(\s*)(\.)(\s*)(@variableName)/, + ['type.identifier', 'white', 'operator', 'white', 'function.call'] + ], + [ + // Referencing function in an Erlang module + /(:)(@atomName)(\s*)(\.)(\s*)(@variableName)/, + ['constant.punctuation', 'constant', 'white', 'operator', 'white', 'function.call'] + ], + [ + // Piping into a function (tokenized separately as it may not have parentheses) + /(\|>)(\s*)(@variableName)/, + [ + 'operator', + 'white', + { + cases: { + '@otherKeywords': 'keyword', + '@default': 'function.call' + } + } + ] + ], + [ + // Function reference passed to another function + /(&)(\s*)(@variableName)/, + ['operator', 'white', 'function.call'] + ], + // Language keywords, builtins, constants and variables + [ + /@variableName/, + { + cases: { + '@declarationKeywords': 'keyword.declaration', + '@operatorKeywords': 'keyword.operator', + '@namespaceKeywords': 'keyword', + '@otherKeywords': 'keyword', + '@constants': 'constant.language', + '@nameBuiltin': 'variable.language', + '_.*': 'comment.unused', + '@default': 'identifier' + } + } + ], + // Module names + [/@moduleName/, 'type.identifier'] + ], + + // Strings + + strings: [ + [/"""/, { token: 'string.delimiter', next: '@doubleQuotedHeredoc' }], + [/'''/, { token: 'string.delimiter', next: '@singleQuotedHeredoc' }], + [/"/, { token: 'string.delimiter', next: '@doubleQuotedString' }], + [/'/, { token: 'string.delimiter', next: '@singleQuotedString' }] + ], + + doubleQuotedHeredoc: [ + [/"""/, { token: 'string.delimiter', next: '@pop' }], + { include: '@stringContentInterpol' } + ], + + singleQuotedHeredoc: [ + [/'''/, { token: 'string.delimiter', next: '@pop' }], + { include: '@stringContentInterpol' } + ], + + doubleQuotedString: [ + [/"/, { token: 'string.delimiter', next: '@pop' }], + { include: '@stringContentInterpol' } + ], + + singleQuotedString: [ + [/'/, { token: 'string.delimiter', next: '@pop' }], + { include: '@stringContentInterpol' } + ], + + // Atoms + + atoms: [ + [/(:)(@atomName)/, ['constant.punctuation', 'constant']], + [/:"/, { token: 'constant.delimiter', next: '@doubleQuotedStringAtom' }], + [/:'/, { token: 'constant.delimiter', next: '@singleQuotedStringAtom' }] + ], + + doubleQuotedStringAtom: [ + [/"/, { token: 'constant.delimiter', next: '@pop' }], + { include: '@stringConstantContentInterpol' } + ], + + singleQuotedStringAtom: [ + [/'/, { token: 'constant.delimiter', next: '@pop' }], + { include: '@stringConstantContentInterpol' } + ], + + // Sigils + + // See https://elixir-lang.org/getting-started/sigils.html + // Sigils allow for typing values using their textual representation. + // All sigils start with ~ followed by a letter indicating sigil type + // and then a delimiter pair enclosing the textual representation. + // Optional modifiers are allowed after the closing delimiter. + // For instance a regular expressions can be written as: + // ~r/foo|bar/ ~r{foo|bar} ~r/foo|bar/g + // + // In general lowercase sigils allow for interpolation + // and escaped characters, whereas uppercase sigils don't + // + // During tokenization we want to distinguish some + // specific sigil types, namely string and regexp, + // so that they cen be themed separately. + // + // To reasonably handle all those combinations we leverage + // dot-separated states, so if we transition to @sigilStart.interpol.s.{.} + // then "sigilStart.interpol.s" state will match and also all + // the individual dot-separated parameters can be accessed. + + sigils: [ + [/~[a-z]@sigilStartDelimiter/, { token: '@rematch', next: '@sigil.interpol' }], + [/~[A-Z]@sigilStartDelimiter/, { token: '@rematch', next: '@sigil.noInterpol' }] + ], + + sigil: [ + [/~([a-zA-Z])\{/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.{.}' }], + [/~([a-zA-Z])\[/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.[.]' }], + [/~([a-zA-Z])\(/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.(.)' }], + [/~([a-zA-Z])\' }], + [ + /~([a-zA-Z])(@sigilSymmetricDelimiter)/, + { token: '@rematch', switchTo: '@sigilStart.$S2.$1.$2.$2' } + ] + ], + + // The definitions below expect states to be of the form: + // + // sigilStart.... + // sigilContinue.... + // + // The sigilStart state is used only to properly classify the token (as string/regex/sigil) + // and immediately switches to the sigilContinue sate, which handles the actual content + // and waits for the corresponding end delimiter. + + 'sigilStart.interpol.s': [ + [ + /~s@sigilStartDelimiter/, + { + token: 'string.delimiter', + switchTo: '@sigilContinue.$S2.$S3.$S4.$S5' + } + ] + ], + + 'sigilContinue.interpol.s': [ + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + '$1==$S5': { token: 'string.delimiter', next: '@pop' }, + '@default': 'string' + } + } + ], + { include: '@stringContentInterpol' } + ], + + 'sigilStart.noInterpol.S': [ + [ + /~S@sigilStartDelimiter/, + { + token: 'string.delimiter', + switchTo: '@sigilContinue.$S2.$S3.$S4.$S5' + } + ] + ], + + 'sigilContinue.noInterpol.S': [ + // Ignore escaped sigil end + [/(^|[^\\])\\@sigilEndDelimiter/, 'string'], + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + '$1==$S5': { token: 'string.delimiter', next: '@pop' }, + '@default': 'string' + } + } + ], + { include: '@stringContent' } + ], + + 'sigilStart.interpol.r': [ + [ + /~r@sigilStartDelimiter/, + { + token: 'regexp.delimiter', + switchTo: '@sigilContinue.$S2.$S3.$S4.$S5' + } + ] + ], + + 'sigilContinue.interpol.r': [ + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + '$1==$S5': { token: 'regexp.delimiter', next: '@pop' }, + '@default': 'regexp' + } + } + ], + { include: '@regexpContentInterpol' } + ], + + 'sigilStart.noInterpol.R': [ + [ + /~R@sigilStartDelimiter/, + { + token: 'regexp.delimiter', + switchTo: '@sigilContinue.$S2.$S3.$S4.$S5' + } + ] + ], + + 'sigilContinue.noInterpol.R': [ + // Ignore escaped sigil end + [/(^|[^\\])\\@sigilEndDelimiter/, 'regexp'], + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + '$1==$S5': { token: 'regexp.delimiter', next: '@pop' }, + '@default': 'regexp' + } + } + ], + { include: '@regexpContent' } + ], + + // Fallback to the generic sigil by default + 'sigilStart.interpol': [ + [ + /~([a-zA-Z])@sigilStartDelimiter/, + { + token: 'sigil.delimiter', + switchTo: '@sigilContinue.$S2.$S3.$S4.$S5' + } + ] + ], + + 'sigilContinue.interpol': [ + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + '$1==$S5': { token: 'sigil.delimiter', next: '@pop' }, + '@default': 'sigil' + } + } + ], + { include: '@sigilContentInterpol' } + ], + + 'sigilStart.noInterpol': [ + [ + /~([a-zA-Z])@sigilStartDelimiter/, + { + token: 'sigil.delimiter', + switchTo: '@sigilContinue.$S2.$S3.$S4.$S5' + } + ] + ], + + 'sigilContinue.noInterpol': [ + // Ignore escaped sigil end + [/(^|[^\\])\\@sigilEndDelimiter/, 'sigil'], + [ + /(@sigilEndDelimiter)[a-zA-Z]*/, + { + cases: { + '$1==$S5': { token: 'sigil.delimiter', next: '@pop' }, + '@default': 'sigil' + } + } + ], + { include: '@sigilContent' } + ], + + // Attributes + + attributes: [ + // Module @doc* attributes - tokenized as comments + [ + /\@(module|type)?doc (~[sS])?"""/, + { + token: 'comment.block.documentation', + next: '@doubleQuotedHeredocDocstring' + } + ], + [ + /\@(module|type)?doc (~[sS])?"/, + { + token: 'comment.block.documentation', + next: '@doubleQuotedStringDocstring' + } + ], + [/\@(module|type)?doc false/, 'comment.block.documentation'], + // Module attributes + [/\@(@variableName)/, 'variable'] + ], + + doubleQuotedHeredocDocstring: [ + [/"""/, { token: 'comment.block.documentation', next: '@pop' }], + { include: '@docstringContent' } + ], + + doubleQuotedStringDocstring: [ + [/"/, { token: 'comment.block.documentation', next: '@pop' }], + { include: '@docstringContent' } + ], + + // Operators, punctuation, brackets + + symbols: [ + // Code point operator (either with regular character ?a or an escaped one ?\n) + [/\?(\\.|[^\\\s])/, 'number.constant'], + // Anonymous function arguments + [/&\d+/, 'operator'], + // Bitshift operators (must go before delimiters, so that << >> don't match first) + [/<<<|>>>/, 'operator'], + // Delimiter pairs + [/[()\[\]\{\}]|<<|>>/, '@brackets'], + // Triple dot is a valid name (must go before operators, so that .. doesn't match instead) + [/\.\.\./, 'identifier'], + // Punctuation => (must go before operators, so it's not tokenized as = then >) + [/=>/, 'punctuation'], + // Operators + [/@operator/, 'operator'], + // Punctuation + [/[:;,.%]/, 'punctuation'] + ], + + // Generic helpers + + stringContentInterpol: [ + { include: '@interpolation' }, + { include: '@escapeChar' }, + { include: '@stringContent' } + ], + + stringContent: [[/./, 'string']], + + stringConstantContentInterpol: [ + { include: '@interpolation' }, + { include: '@escapeChar' }, + { include: '@stringConstantContent' } + ], + + stringConstantContent: [[/./, 'constant']], + + regexpContentInterpol: [ + { include: '@interpolation' }, + { include: '@escapeChar' }, + { include: '@regexpContent' } + ], + + regexpContent: [ + // # may be a regular regexp char, so we use a heuristic + // assuming a # surrounded by whitespace is actually a comment. + [/(\s)(#)(\s.*)$/, ['white', 'comment.punctuation', 'comment']], + [/./, 'regexp'] + ], + + sigilContentInterpol: [ + { include: '@interpolation' }, + { include: '@escapeChar' }, + { include: '@sigilContent' } + ], + + sigilContent: [[/./, 'sigil']], + + docstringContent: [[/./, 'comment.block.documentation']], + + escapeChar: [[/@escape/, 'constant.character.escape']], + + interpolation: [ + [/#{/, { token: 'delimiter.bracket.embed', next: '@interpolationContinue' }] + ], + + interpolationContinue: [ + [/}/, { token: 'delimiter.bracket.embed', next: '@pop' }], + // Interpolation brackets may contain arbitrary code, + // so we simply match against all the root rules, + // until we reach interpolation end (the above matches). + { include: '@root' } + ] + } +}; diff --git a/src/monaco.contribution.ts b/src/monaco.contribution.ts index cf90f679..9e6f52c6 100644 --- a/src/monaco.contribution.ts +++ b/src/monaco.contribution.ts @@ -17,6 +17,7 @@ import './css/css.contribution'; import './dart/dart.contribution'; import './dockerfile/dockerfile.contribution'; import './ecl/ecl.contribution'; +import './elixir/elixir.contribution'; import './fsharp/fsharp.contribution'; import './go/go.contribution'; import './graphql/graphql.contribution';