Implement Syntax-Highlighting for SPARQL

2025-12-22 11:35:40 +01:00 · 2021-07-04 20:02:16 +02:00 · 2021-07-04 20:02:16 +02:00 · eda372028c
commit eda372028c
parent 0b018cd216
4 changed files with 423 additions and 0 deletions
--- a/src/monaco.contribution.ts
+++ b/src/monaco.contribution.ts
@ -64,6 +64,7 @@ import './scss/scss.contribution';
 import './shell/shell.contribution';
 import './solidity/solidity.contribution';
 import './sophia/sophia.contribution';
+import './sparql/sparql.contribution';
 import './sql/sql.contribution';
 import './st/st.contribution';
 import './swift/swift.contribution';
--- a/src/sparql/sparql.contribution.ts
+++ b/src/sparql/sparql.contribution.ts
@ -0,0 +1,13 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { registerLanguage } from '../_.contribution';
+
+registerLanguage({
+	id: 'sparql',
+	extensions: ['.rq'],
+	aliases: ['sparql', 'SPARQL'],
+	loader: () => import('./sparql')
+});
--- a/src/sparql/sparql.test.ts
+++ b/src/sparql/sparql.test.ts
@ -0,0 +1,187 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { testTokenization } from '../test/testRunner';
+
+testTokenization('sparql', [
+	// Comments
+	[
+		{
+			line: '# a comment',
+			tokens: [{ startIndex: 0, type: 'comment.rq' }]
+		}
+	],
+
+	[
+		{
+			line: '##sticky # comment',
+			tokens: [{ startIndex: 0, type: 'comment.rq' }]
+		}
+	],
+
+	[
+		{
+			line: '"lex"^^<https://test/ns#not-a-comment>',
+			tokens: [
+				{ startIndex: 0, type: 'string.sql.rq' },
+				{ startIndex: 5, type: 'operator.sql.rq' },
+				{ startIndex: 7, type: 'tag.rq' }
+			]
+		}
+	],
+
+	// strings
+	[
+		{
+			line: '(?x ns:p "abc")',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.parenthesis.rq' },
+				{ startIndex: 1, type: 'identifier.rq' },
+				{ startIndex: 3, type: 'white.rq' },
+				{ startIndex: 4, type: 'tag.rq' },
+				{ startIndex: 8, type: 'white.rq' },
+				{ startIndex: 9, type: 'string.sql.rq' },
+				{ startIndex: 14, type: 'delimiter.parenthesis.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: "'escaped single-quote: \\', normal double-quote: \"'",
+			tokens: [
+				{ startIndex: 0, type: 'string.sql.rq' },
+				{ startIndex: 23, type: 'string.escape.rq' },
+				{ startIndex: 25, type: 'string.sql.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: '("escaped \\" and \\\' and also not escaped \'.")',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.parenthesis.rq' },
+				{ startIndex: 1, type: 'string.sql.rq' },
+				{ startIndex: 10, type: 'string.escape.rq' },
+				{ startIndex: 12, type: 'string.sql.rq' },
+				{ startIndex: 17, type: 'string.escape.rq' },
+				{ startIndex: 19, type: 'string.sql.rq' },
+				{ startIndex: 44, type: 'delimiter.parenthesis.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: "'Invalid single string",
+			tokens: [{ startIndex: 0, type: 'string.invalid.rq' }]
+		}
+	],
+
+	[
+		{
+			line: '"Invalid double string',
+			tokens: [{ startIndex: 0, type: 'string.invalid.rq' }]
+		}
+	],
+
+	// identifiers, builtinFunctions and keywords
+	[
+		{
+			line: 'PREFIX a: <http://www.w3.org/2000/10/annotation-ns#>',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.rq' },
+				{ startIndex: 6, type: 'white.rq' },
+				{ startIndex: 7, type: 'tag.rq' },
+				{ startIndex: 9, type: 'white.rq' },
+				{ startIndex: 10, type: 'tag.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: 'SELECT DISTINCT ?name ?nick',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.rq' },
+				{ startIndex: 6, type: 'white.rq' },
+				{ startIndex: 7, type: 'keyword.rq' },
+				{ startIndex: 15, type: 'white.rq' },
+				{ startIndex: 16, type: 'identifier.rq' },
+				{ startIndex: 21, type: 'white.rq' },
+				{ startIndex: 22, type: 'identifier.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: '(BGP [triple ?x foaf:nick ?nick])',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.parenthesis.rq' },
+				{ startIndex: 1, type: 'identifier.rq' },
+				{ startIndex: 4, type: 'white.rq' },
+				{ startIndex: 5, type: 'delimiter.square.rq' },
+				{ startIndex: 6, type: 'identifier.rq' },
+				{ startIndex: 12, type: 'white.rq' },
+				{ startIndex: 13, type: 'identifier.rq' },
+				{ startIndex: 15, type: 'white.rq' },
+				{ startIndex: 16, type: 'tag.rq' },
+				{ startIndex: 25, type: 'white.rq' },
+				{ startIndex: 26, type: 'identifier.rq' },
+				{ startIndex: 31, type: 'delimiter.square.rq' },
+				{ startIndex: 32, type: 'delimiter.parenthesis.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: 'SELECT*{ GRAPH :g1 { ?x } }',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.rq' },
+				{ startIndex: 6, type: 'operator.sql.rq' },
+				{ startIndex: 7, type: 'delimiter.curly.rq' },
+				{ startIndex: 8, type: 'white.rq' },
+				{ startIndex: 9, type: 'keyword.rq' },
+				{ startIndex: 14, type: 'white.rq' },
+				{ startIndex: 15, type: 'tag.rq' },
+				{ startIndex: 18, type: 'white.rq' },
+				{ startIndex: 19, type: 'delimiter.curly.rq' },
+				{ startIndex: 20, type: 'white.rq' },
+				{ startIndex: 21, type: 'identifier.rq' },
+				{ startIndex: 23, type: 'white.rq' },
+				{ startIndex: 24, type: 'delimiter.curly.rq' },
+				{ startIndex: 25, type: 'white.rq' },
+				{ startIndex: 26, type: 'delimiter.curly.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: 'FILTER isBlank(?c)',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.rq' },
+				{ startIndex: 6, type: 'white.rq' },
+				{ startIndex: 7, type: 'predefined.sql.rq' },
+				{ startIndex: 14, type: 'delimiter.parenthesis.rq' },
+				{ startIndex: 15, type: 'identifier.rq' },
+				{ startIndex: 17, type: 'delimiter.parenthesis.rq' }
+			]
+		}
+	],
+
+	[
+		{
+			line: '"text"@en',
+			tokens: [
+				{ startIndex: 0, type: 'string.sql.rq' },
+				{ startIndex: 6, type: 'metatag.html.rq' }
+			]
+		}
+	]
+]);
--- a/src/sparql/sparql.ts
+++ b/src/sparql/sparql.ts
@ -0,0 +1,222 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import type { languages } from 'monaco-editor-core';
+
+export const conf: languages.LanguageConfiguration = {
+	comments: {
+		lineComment: '#'
+	},
+
+	brackets: [
+		['{', '}'],
+		['[', ']'],
+		['(', ')']
+	],
+
+	autoClosingPairs: [
+		{ open: "'", close: "'", notIn: ['string'] },
+		{ open: '"', close: '"', notIn: ['string'] },
+		{ open: '{', close: '}' },
+		{ open: '[', close: ']' },
+		{ open: '(', close: ')' }
+	]
+};
+
+export const language = <languages.IMonarchLanguage>{
+	defaultToken: '',
+	tokenPostfix: '.rq',
+
+	brackets: [
+		{ token: 'delimiter.curly', open: '{', close: '}' },
+		{ token: 'delimiter.parenthesis', open: '(', close: ')' },
+		{ token: 'delimiter.square', open: '[', close: ']' },
+		{ token: 'delimiter.angle', open: '<', close: '>' }
+	],
+
+	keywords: [
+		'add',
+		'as',
+		'asc',
+		'ask',
+		'base',
+		'by',
+		'clear',
+		'construct',
+		'copy',
+		'create',
+		'data',
+		'delete',
+		'desc',
+		'describe',
+		'distinct',
+		'drop',
+		'false',
+		'filter',
+		'from',
+		'graph',
+		'group',
+		'having',
+		'in',
+		'insert',
+		'limit',
+		'load',
+		'minus',
+		'move',
+		'named',
+		'not',
+		'offset',
+		'optional',
+		'order',
+		'prefix',
+		'reduced',
+		'select',
+		'service',
+		'silent',
+		'to',
+		'true',
+		'undef',
+		'union',
+		'using',
+		'values',
+		'where',
+		'with'
+	],
+
+	builtinFunctions: [
+		'a',
+		'abs',
+		'avg',
+		'bind',
+		'bnode',
+		'bound',
+		'ceil',
+		'coalesce',
+		'concat',
+		'contains',
+		'count',
+		'datatype',
+		'day',
+		'encode_for_uri',
+		'exists',
+		'floor',
+		'group_concat',
+		'hours',
+		'if',
+		'iri',
+		'isblank',
+		'isiri',
+		'isliteral',
+		'isnumeric',
+		'isuri',
+		'lang',
+		'langmatches',
+		'lcase',
+		'max',
+		'md5',
+		'min',
+		'minutes',
+		'month',
+		'now',
+		'rand',
+		'regex',
+		'replace',
+		'round',
+		'sameterm',
+		'sample',
+		'seconds',
+		'sha1',
+		'sha256',
+		'sha384',
+		'sha512',
+		'str',
+		'strafter',
+		'strbefore',
+		'strdt',
+		'strends',
+		'strlang',
+		'strlen',
+		'strstarts',
+		'struuid',
+		'substr',
+		'sum',
+		'timezone',
+		'tz',
+		'ucase',
+		'uri',
+		'uuid',
+		'year'
+	],
+
+	// describe tokens
+	ignoreCase: true,
+	tokenizer: {
+		root: [
+			// resource indicators
+			[/<[^\s\u00a0>]*>?/, 'tag'],
+
+			// strings
+			{ include: '@strings' },
+
+			// line comment
+			[/#.*/, 'comment'],
+
+			// special chars with special meaning
+			[/[{}()\[\]]/, '@brackets'],
+			[/[;,.]/, 'delimiter'],
+
+			// (prefixed) name
+			[
+				/[_\w\d]+:(\.(?=[\w_\-\\%])|[:\w_-]|\\[-\\_~.!$&'()*+,;=/?#@%]|%[a-f\d][a-f\d])*/,
+				'tag'
+			],
+			[/:(\.(?=[\w_\-\\%])|[:\w_-]|\\[-\\_~.!$&'()*+,;=/?#@%]|%[a-f\d][a-f\d])+/, 'tag'],
+
+			// identifiers, builtinFunctions and keywords
+			[
+				/[$?]?[_\w\d]+/,
+				{
+					cases: {
+						'@keywords': { token: 'keyword' },
+						'@builtinFunctions': { token: 'predefined.sql' },
+						'@default': 'identifier'
+					}
+				}
+			],
+
+			// operators
+			[/\^\^/, 'operator.sql'],
+			[/\^[*+\-<>=&|^\/!?]*/, 'operator.sql'],
+			[/[*+\-<>=&|\/!?]/, 'operator.sql'],
+
+			// symbol
+			[/@[a-z\d\-]*/, 'metatag.html'],
+
+			// whitespaces
+			[/\s+/, 'white']
+		],
+
+		strings: [
+			[/'([^'\\]|\\.)*$/, 'string.invalid'], // non-terminated single-quoted string
+			[/'$/, 'string.sql', '@pop'],
+			[/'/, 'string.sql', '@stringBody'],
+			[/"([^"\\]|\\.)*$/, 'string.invalid'], // non-terminated single-quoted string
+			[/"$/, 'string.sql', '@pop'],
+			[/"/, 'string.sql', '@dblStringBody']
+		],
+		// single-quoted strings
+		stringBody: [
+			[/[^\\']+/, 'string.sql'],
+			[/\\./, 'string.escape'],
+			[/'/, 'string.sql', '@pop']
+		],
+		// double-quoted strings
+		dblStringBody: [
+			[/[^\\"]+/, 'string.sql'],
+			[/\\./, 'string.escape'],
+			[/"/, 'string.sql', '@pop']
+		]
+	}
+};