monaco-editor/src/basic-languages/python/python.test.ts
Luca Chiodini 14efdca3b3
Improve Python's tokenizer for numeric literals
This improves Python's tokenizer for numeric literals with respect to several aspects:
- Support underscores between digits and after prefixes (fixes #4745)
- Support octal and binary literals
- Support case-insensitive prefixes for hex/octal/binary literals
- Recognize a possible leading minus sign as a separate token, instead of mistakenly treating it as part of the numeric literal

Reference: https://docs.python.org/3/reference/lexical_analysis.html#numeric-literals

Add tests to cover several of the above cases and their combinations.
2025-11-05 09:28:09 +01:00

337 lines
7.2 KiB
TypeScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { testTokenization } from '../test/testRunner';
testTokenization('python', [
// Keywords
[
{
line: 'def func():',
tokens: [
{ startIndex: 0, type: 'keyword.python' },
{ startIndex: 3, type: 'white.python' },
{ startIndex: 4, type: 'identifier.python' },
{ startIndex: 8, type: 'delimiter.parenthesis.python' },
{ startIndex: 10, type: 'delimiter.python' }
]
}
],
[
{
line: 'func(str Y3)',
tokens: [
{ startIndex: 0, type: 'identifier.python' },
{ startIndex: 4, type: 'delimiter.parenthesis.python' },
{ startIndex: 5, type: 'keyword.python' },
{ startIndex: 8, type: 'white.python' },
{ startIndex: 9, type: 'identifier.python' },
{ startIndex: 11, type: 'delimiter.parenthesis.python' }
]
}
],
[
{
line: '@Dec0_rator:',
tokens: [
{ startIndex: 0, type: 'tag.python' },
{ startIndex: 11, type: 'delimiter.python' }
]
}
],
// Comments
[
{
line: ' # Comments! ## "jfkd" ',
tokens: [
{ startIndex: 0, type: 'white.python' },
{ startIndex: 1, type: 'comment.python' }
]
}
],
// Strings
[
{
line: "'s0'",
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 1, type: 'string.python' },
{ startIndex: 3, type: 'string.escape.python' }
]
}
],
[
{
line: '"\' " "',
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 1, type: 'string.python' },
{ startIndex: 3, type: 'string.escape.python' },
{ startIndex: 4, type: 'white.python' },
{ startIndex: 5, type: 'string.escape.python' }
]
}
],
[
{
line: "'''Lots of string'''",
tokens: [{ startIndex: 0, type: 'string.python' }]
}
],
[
{
line: '"""Lots \'\'\' \'\'\'"""',
tokens: [{ startIndex: 0, type: 'string.python' }]
}
],
[
{
line: "'''Lots '''0.3e-5",
tokens: [
{ startIndex: 0, type: 'string.python' },
{ startIndex: 11, type: 'number.float.python' }
]
}
],
// https://github.com/microsoft/monaco-editor/issues/1170
[
{
line: 'def f():',
tokens: [
{ startIndex: 0, type: 'keyword.python' },
{ startIndex: 3, type: 'white.python' },
{ startIndex: 4, type: 'identifier.python' },
{ startIndex: 5, type: 'delimiter.parenthesis.python' },
{ startIndex: 7, type: 'delimiter.python' }
]
},
{
line: ' """multi',
tokens: [
{ startIndex: 0, type: 'white.python' },
{ startIndex: 3, type: 'string.python' }
]
},
{
line: ' line',
tokens: [{ startIndex: 0, type: 'string.python' }]
},
{
line: ' comment',
tokens: [{ startIndex: 0, type: 'string.python' }]
},
{
line: ' """ + """',
tokens: [
{ startIndex: 0, type: 'string.python' },
{ startIndex: 6, type: 'white.python' },
{ startIndex: 7, type: '' },
{ startIndex: 8, type: 'white.python' },
{ startIndex: 9, type: 'string.python' }
]
},
{
line: ' another',
tokens: [{ startIndex: 0, type: 'string.python' }]
},
{
line: ' multi',
tokens: [{ startIndex: 0, type: 'string.python' }]
},
{
line: ' line',
tokens: [{ startIndex: 0, type: 'string.python' }]
},
{
line: ' comment"""',
tokens: [{ startIndex: 0, type: 'string.python' }]
},
{
line: ' code',
tokens: [
{ startIndex: 0, type: 'white.python' },
{ startIndex: 3, type: 'identifier.python' }
]
}
],
// Numbers
[
{
line: '0xAcBFd',
tokens: [{ startIndex: 0, type: 'number.hex.python' }]
},
{
line: '0X_1234_ABCD',
tokens: [{ startIndex: 0, type: 'number.hex.python' }]
},
{
line: '0x0cH',
tokens: [
{ startIndex: 0, type: 'number.hex.python' },
{ startIndex: 4, type: 'identifier.python' }
]
}
],
[
{
line: '0o7501',
tokens: [{ startIndex: 0, type: 'number.octal.python' }]
},
{
line: '0O_1_2_3_4_5_6_7',
tokens: [{ startIndex: 0, type: 'number.octal.python' }]
}
],
[
{
line: '0b0',
tokens: [{ startIndex: 0, type: 'number.binary.python' }]
},
{
line: '0B_1010_0101',
tokens: [{ startIndex: 0, type: 'number.binary.python' }]
}
],
[
{
line: '3.14',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '456.7j',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '0.34J',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '.999_999',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '1.',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
}
],
[
{
line: '456.7e-7j',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '0.1234e+1J',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '.12e-0j',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '0E0',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
},
{
line: '1e1_0',
tokens: [{ startIndex: 0, type: 'number.float.python' }]
}
],
[
{
line: '123456',
tokens: [{ startIndex: 0, type: 'number.python' }]
},
{
line: '-1L',
tokens: [
{ startIndex: 0, type: '' },
{ startIndex: 1, type: 'number.python' }
]
},
{
line: '1_000_000_000',
tokens: [{ startIndex: 0, type: 'number.python' }]
}
],
// F-Strings
[
{
line: 'f"str {var} str"',
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 2, type: 'string.python' },
{ startIndex: 6, type: 'identifier.python' },
{ startIndex: 11, type: 'string.python' },
{ startIndex: 15, type: 'string.escape.python' }
]
}
],
[
{
line: `f'''str {var} str'''`,
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 4, type: 'string.python' },
{ startIndex: 8, type: 'identifier.python' },
{ startIndex: 13, type: 'string.python' },
{ startIndex: 17, type: 'string.escape.python' }
]
}
],
[
{
line: 'f"{var:.3f}{var!r}{var=}"',
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 2, type: 'identifier.python' },
{ startIndex: 6, type: 'string.python' },
{ startIndex: 10, type: 'identifier.python' },
{ startIndex: 15, type: 'string.python' },
{ startIndex: 17, type: 'identifier.python' },
{ startIndex: 22, type: 'string.python' },
{ startIndex: 23, type: 'identifier.python' },
{ startIndex: 24, type: 'string.escape.python' }
]
}
],
[
{
line: 'f"\' " "',
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 2, type: 'string.python' },
{ startIndex: 4, type: 'string.escape.python' },
{ startIndex: 5, type: 'white.python' },
{ startIndex: 6, type: 'string.escape.python' }
]
}
],
[
{
line: '"{var}"',
tokens: [
{ startIndex: 0, type: 'string.escape.python' },
{ startIndex: 1, type: 'string.python' },
{ startIndex: 6, type: 'string.escape.python' }
]
}
]
]);