123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
- 'use strict';
- /**
- * Creates a JSON scanner on the given text.
- * If ignoreTrivia is set, whitespaces or comments are ignored.
- */
- export function createScanner(text, ignoreTrivia) {
- if (ignoreTrivia === void 0) { ignoreTrivia = false; }
- var len = text.length;
- var pos = 0, value = '', tokenOffset = 0, token = 16 /* Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* None */;
- function scanHexDigits(count, exact) {
- var digits = 0;
- var value = 0;
- while (digits < count || !exact) {
- var ch = text.charCodeAt(pos);
- if (ch >= 48 /* _0 */ && ch <= 57 /* _9 */) {
- value = value * 16 + ch - 48 /* _0 */;
- }
- else if (ch >= 65 /* A */ && ch <= 70 /* F */) {
- value = value * 16 + ch - 65 /* A */ + 10;
- }
- else if (ch >= 97 /* a */ && ch <= 102 /* f */) {
- value = value * 16 + ch - 97 /* a */ + 10;
- }
- else {
- break;
- }
- pos++;
- digits++;
- }
- if (digits < count) {
- value = -1;
- }
- return value;
- }
- function setPosition(newPosition) {
- pos = newPosition;
- value = '';
- tokenOffset = 0;
- token = 16 /* Unknown */;
- scanError = 0 /* None */;
- }
- function scanNumber() {
- var start = pos;
- if (text.charCodeAt(pos) === 48 /* _0 */) {
- pos++;
- }
- else {
- pos++;
- while (pos < text.length && isDigit(text.charCodeAt(pos))) {
- pos++;
- }
- }
- if (pos < text.length && text.charCodeAt(pos) === 46 /* dot */) {
- pos++;
- if (pos < text.length && isDigit(text.charCodeAt(pos))) {
- pos++;
- while (pos < text.length && isDigit(text.charCodeAt(pos))) {
- pos++;
- }
- }
- else {
- scanError = 3 /* UnexpectedEndOfNumber */;
- return text.substring(start, pos);
- }
- }
- var end = pos;
- if (pos < text.length && (text.charCodeAt(pos) === 69 /* E */ || text.charCodeAt(pos) === 101 /* e */)) {
- pos++;
- if (pos < text.length && text.charCodeAt(pos) === 43 /* plus */ || text.charCodeAt(pos) === 45 /* minus */) {
- pos++;
- }
- if (pos < text.length && isDigit(text.charCodeAt(pos))) {
- pos++;
- while (pos < text.length && isDigit(text.charCodeAt(pos))) {
- pos++;
- }
- end = pos;
- }
- else {
- scanError = 3 /* UnexpectedEndOfNumber */;
- }
- }
- return text.substring(start, end);
- }
- function scanString() {
- var result = '', start = pos;
- while (true) {
- if (pos >= len) {
- result += text.substring(start, pos);
- scanError = 2 /* UnexpectedEndOfString */;
- break;
- }
- var ch = text.charCodeAt(pos);
- if (ch === 34 /* doubleQuote */) {
- result += text.substring(start, pos);
- pos++;
- break;
- }
- if (ch === 92 /* backslash */) {
- result += text.substring(start, pos);
- pos++;
- if (pos >= len) {
- scanError = 2 /* UnexpectedEndOfString */;
- break;
- }
- var ch2 = text.charCodeAt(pos++);
- switch (ch2) {
- case 34 /* doubleQuote */:
- result += '\"';
- break;
- case 92 /* backslash */:
- result += '\\';
- break;
- case 47 /* slash */:
- result += '/';
- break;
- case 98 /* b */:
- result += '\b';
- break;
- case 102 /* f */:
- result += '\f';
- break;
- case 110 /* n */:
- result += '\n';
- break;
- case 114 /* r */:
- result += '\r';
- break;
- case 116 /* t */:
- result += '\t';
- break;
- case 117 /* u */:
- var ch3 = scanHexDigits(4, true);
- if (ch3 >= 0) {
- result += String.fromCharCode(ch3);
- }
- else {
- scanError = 4 /* InvalidUnicode */;
- }
- break;
- default:
- scanError = 5 /* InvalidEscapeCharacter */;
- }
- start = pos;
- continue;
- }
- if (ch >= 0 && ch <= 0x1f) {
- if (isLineBreak(ch)) {
- result += text.substring(start, pos);
- scanError = 2 /* UnexpectedEndOfString */;
- break;
- }
- else {
- scanError = 6 /* InvalidCharacter */;
- // mark as error but continue with string
- }
- }
- pos++;
- }
- return result;
- }
- function scanNext() {
- value = '';
- scanError = 0 /* None */;
- tokenOffset = pos;
- lineStartOffset = lineNumber;
- prevTokenLineStartOffset = tokenLineStartOffset;
- if (pos >= len) {
- // at the end
- tokenOffset = len;
- return token = 17 /* EOF */;
- }
- var code = text.charCodeAt(pos);
- // trivia: whitespace
- if (isWhiteSpace(code)) {
- do {
- pos++;
- value += String.fromCharCode(code);
- code = text.charCodeAt(pos);
- } while (isWhiteSpace(code));
- return token = 15 /* Trivia */;
- }
- // trivia: newlines
- if (isLineBreak(code)) {
- pos++;
- value += String.fromCharCode(code);
- if (code === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
- pos++;
- value += '\n';
- }
- lineNumber++;
- tokenLineStartOffset = pos;
- return token = 14 /* LineBreakTrivia */;
- }
- switch (code) {
- // tokens: []{}:,
- case 123 /* openBrace */:
- pos++;
- return token = 1 /* OpenBraceToken */;
- case 125 /* closeBrace */:
- pos++;
- return token = 2 /* CloseBraceToken */;
- case 91 /* openBracket */:
- pos++;
- return token = 3 /* OpenBracketToken */;
- case 93 /* closeBracket */:
- pos++;
- return token = 4 /* CloseBracketToken */;
- case 58 /* colon */:
- pos++;
- return token = 6 /* ColonToken */;
- case 44 /* comma */:
- pos++;
- return token = 5 /* CommaToken */;
- // strings
- case 34 /* doubleQuote */:
- pos++;
- value = scanString();
- return token = 10 /* StringLiteral */;
- // comments
- case 47 /* slash */:
- var start = pos - 1;
- // Single-line comment
- if (text.charCodeAt(pos + 1) === 47 /* slash */) {
- pos += 2;
- while (pos < len) {
- if (isLineBreak(text.charCodeAt(pos))) {
- break;
- }
- pos++;
- }
- value = text.substring(start, pos);
- return token = 12 /* LineCommentTrivia */;
- }
- // Multi-line comment
- if (text.charCodeAt(pos + 1) === 42 /* asterisk */) {
- pos += 2;
- var safeLength = len - 1; // For lookahead.
- var commentClosed = false;
- while (pos < safeLength) {
- var ch = text.charCodeAt(pos);
- if (ch === 42 /* asterisk */ && text.charCodeAt(pos + 1) === 47 /* slash */) {
- pos += 2;
- commentClosed = true;
- break;
- }
- pos++;
- if (isLineBreak(ch)) {
- if (ch === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
- pos++;
- }
- lineNumber++;
- tokenLineStartOffset = pos;
- }
- }
- if (!commentClosed) {
- pos++;
- scanError = 1 /* UnexpectedEndOfComment */;
- }
- value = text.substring(start, pos);
- return token = 13 /* BlockCommentTrivia */;
- }
- // just a single slash
- value += String.fromCharCode(code);
- pos++;
- return token = 16 /* Unknown */;
- // numbers
- case 45 /* minus */:
- value += String.fromCharCode(code);
- pos++;
- if (pos === len || !isDigit(text.charCodeAt(pos))) {
- return token = 16 /* Unknown */;
- }
- // found a minus, followed by a number so
- // we fall through to proceed with scanning
- // numbers
- case 48 /* _0 */:
- case 49 /* _1 */:
- case 50 /* _2 */:
- case 51 /* _3 */:
- case 52 /* _4 */:
- case 53 /* _5 */:
- case 54 /* _6 */:
- case 55 /* _7 */:
- case 56 /* _8 */:
- case 57 /* _9 */:
- value += scanNumber();
- return token = 11 /* NumericLiteral */;
- // literals and unknown symbols
- default:
- // is a literal? Read the full word.
- while (pos < len && isUnknownContentCharacter(code)) {
- pos++;
- code = text.charCodeAt(pos);
- }
- if (tokenOffset !== pos) {
- value = text.substring(tokenOffset, pos);
- // keywords: true, false, null
- switch (value) {
- case 'true': return token = 8 /* TrueKeyword */;
- case 'false': return token = 9 /* FalseKeyword */;
- case 'null': return token = 7 /* NullKeyword */;
- }
- return token = 16 /* Unknown */;
- }
- // some
- value += String.fromCharCode(code);
- pos++;
- return token = 16 /* Unknown */;
- }
- }
- function isUnknownContentCharacter(code) {
- if (isWhiteSpace(code) || isLineBreak(code)) {
- return false;
- }
- switch (code) {
- case 125 /* closeBrace */:
- case 93 /* closeBracket */:
- case 123 /* openBrace */:
- case 91 /* openBracket */:
- case 34 /* doubleQuote */:
- case 58 /* colon */:
- case 44 /* comma */:
- case 47 /* slash */:
- return false;
- }
- return true;
- }
- function scanNextNonTrivia() {
- var result;
- do {
- result = scanNext();
- } while (result >= 12 /* LineCommentTrivia */ && result <= 15 /* Trivia */);
- return result;
- }
- return {
- setPosition: setPosition,
- getPosition: function () { return pos; },
- scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
- getToken: function () { return token; },
- getTokenValue: function () { return value; },
- getTokenOffset: function () { return tokenOffset; },
- getTokenLength: function () { return pos - tokenOffset; },
- getTokenStartLine: function () { return lineStartOffset; },
- getTokenStartCharacter: function () { return tokenOffset - prevTokenLineStartOffset; },
- getTokenError: function () { return scanError; },
- };
- }
- function isWhiteSpace(ch) {
- return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 11 /* verticalTab */ || ch === 12 /* formFeed */ ||
- ch === 160 /* nonBreakingSpace */ || ch === 5760 /* ogham */ || ch >= 8192 /* enQuad */ && ch <= 8203 /* zeroWidthSpace */ ||
- ch === 8239 /* narrowNoBreakSpace */ || ch === 8287 /* mathematicalSpace */ || ch === 12288 /* ideographicSpace */ || ch === 65279 /* byteOrderMark */;
- }
- function isLineBreak(ch) {
- return ch === 10 /* lineFeed */ || ch === 13 /* carriageReturn */ || ch === 8232 /* lineSeparator */ || ch === 8233 /* paragraphSeparator */;
- }
- function isDigit(ch) {
- return ch >= 48 /* _0 */ && ch <= 57 /* _9 */;
- }
|