scanner.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. (function (factory) {
  2. if (typeof module === "object" && typeof module.exports === "object") {
  3. var v = factory(require, exports);
  4. if (v !== undefined) module.exports = v;
  5. }
  6. else if (typeof define === "function" && define.amd) {
  7. define(["require", "exports"], factory);
  8. }
  9. })(function (require, exports) {
  10. /*---------------------------------------------------------------------------------------------
  11. * Copyright (c) Microsoft Corporation. All rights reserved.
  12. * Licensed under the MIT License. See License.txt in the project root for license information.
  13. *--------------------------------------------------------------------------------------------*/
  14. 'use strict';
  15. Object.defineProperty(exports, "__esModule", { value: true });
  16. exports.createScanner = void 0;
  17. /**
  18. * Creates a JSON scanner on the given text.
  19. * If ignoreTrivia is set, whitespaces or comments are ignored.
  20. */
  21. function createScanner(text, ignoreTrivia) {
  22. if (ignoreTrivia === void 0) { ignoreTrivia = false; }
  23. var len = text.length;
  24. var pos = 0, value = '', tokenOffset = 0, token = 16 /* Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* None */;
  25. function scanHexDigits(count, exact) {
  26. var digits = 0;
  27. var value = 0;
  28. while (digits < count || !exact) {
  29. var ch = text.charCodeAt(pos);
  30. if (ch >= 48 /* _0 */ && ch <= 57 /* _9 */) {
  31. value = value * 16 + ch - 48 /* _0 */;
  32. }
  33. else if (ch >= 65 /* A */ && ch <= 70 /* F */) {
  34. value = value * 16 + ch - 65 /* A */ + 10;
  35. }
  36. else if (ch >= 97 /* a */ && ch <= 102 /* f */) {
  37. value = value * 16 + ch - 97 /* a */ + 10;
  38. }
  39. else {
  40. break;
  41. }
  42. pos++;
  43. digits++;
  44. }
  45. if (digits < count) {
  46. value = -1;
  47. }
  48. return value;
  49. }
  50. function setPosition(newPosition) {
  51. pos = newPosition;
  52. value = '';
  53. tokenOffset = 0;
  54. token = 16 /* Unknown */;
  55. scanError = 0 /* None */;
  56. }
  57. function scanNumber() {
  58. var start = pos;
  59. if (text.charCodeAt(pos) === 48 /* _0 */) {
  60. pos++;
  61. }
  62. else {
  63. pos++;
  64. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  65. pos++;
  66. }
  67. }
  68. if (pos < text.length && text.charCodeAt(pos) === 46 /* dot */) {
  69. pos++;
  70. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  71. pos++;
  72. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  73. pos++;
  74. }
  75. }
  76. else {
  77. scanError = 3 /* UnexpectedEndOfNumber */;
  78. return text.substring(start, pos);
  79. }
  80. }
  81. var end = pos;
  82. if (pos < text.length && (text.charCodeAt(pos) === 69 /* E */ || text.charCodeAt(pos) === 101 /* e */)) {
  83. pos++;
  84. if (pos < text.length && text.charCodeAt(pos) === 43 /* plus */ || text.charCodeAt(pos) === 45 /* minus */) {
  85. pos++;
  86. }
  87. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  88. pos++;
  89. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  90. pos++;
  91. }
  92. end = pos;
  93. }
  94. else {
  95. scanError = 3 /* UnexpectedEndOfNumber */;
  96. }
  97. }
  98. return text.substring(start, end);
  99. }
  100. function scanString() {
  101. var result = '', start = pos;
  102. while (true) {
  103. if (pos >= len) {
  104. result += text.substring(start, pos);
  105. scanError = 2 /* UnexpectedEndOfString */;
  106. break;
  107. }
  108. var ch = text.charCodeAt(pos);
  109. if (ch === 34 /* doubleQuote */) {
  110. result += text.substring(start, pos);
  111. pos++;
  112. break;
  113. }
  114. if (ch === 92 /* backslash */) {
  115. result += text.substring(start, pos);
  116. pos++;
  117. if (pos >= len) {
  118. scanError = 2 /* UnexpectedEndOfString */;
  119. break;
  120. }
  121. var ch2 = text.charCodeAt(pos++);
  122. switch (ch2) {
  123. case 34 /* doubleQuote */:
  124. result += '\"';
  125. break;
  126. case 92 /* backslash */:
  127. result += '\\';
  128. break;
  129. case 47 /* slash */:
  130. result += '/';
  131. break;
  132. case 98 /* b */:
  133. result += '\b';
  134. break;
  135. case 102 /* f */:
  136. result += '\f';
  137. break;
  138. case 110 /* n */:
  139. result += '\n';
  140. break;
  141. case 114 /* r */:
  142. result += '\r';
  143. break;
  144. case 116 /* t */:
  145. result += '\t';
  146. break;
  147. case 117 /* u */:
  148. var ch3 = scanHexDigits(4, true);
  149. if (ch3 >= 0) {
  150. result += String.fromCharCode(ch3);
  151. }
  152. else {
  153. scanError = 4 /* InvalidUnicode */;
  154. }
  155. break;
  156. default:
  157. scanError = 5 /* InvalidEscapeCharacter */;
  158. }
  159. start = pos;
  160. continue;
  161. }
  162. if (ch >= 0 && ch <= 0x1f) {
  163. if (isLineBreak(ch)) {
  164. result += text.substring(start, pos);
  165. scanError = 2 /* UnexpectedEndOfString */;
  166. break;
  167. }
  168. else {
  169. scanError = 6 /* InvalidCharacter */;
  170. // mark as error but continue with string
  171. }
  172. }
  173. pos++;
  174. }
  175. return result;
  176. }
  177. function scanNext() {
  178. value = '';
  179. scanError = 0 /* None */;
  180. tokenOffset = pos;
  181. lineStartOffset = lineNumber;
  182. prevTokenLineStartOffset = tokenLineStartOffset;
  183. if (pos >= len) {
  184. // at the end
  185. tokenOffset = len;
  186. return token = 17 /* EOF */;
  187. }
  188. var code = text.charCodeAt(pos);
  189. // trivia: whitespace
  190. if (isWhiteSpace(code)) {
  191. do {
  192. pos++;
  193. value += String.fromCharCode(code);
  194. code = text.charCodeAt(pos);
  195. } while (isWhiteSpace(code));
  196. return token = 15 /* Trivia */;
  197. }
  198. // trivia: newlines
  199. if (isLineBreak(code)) {
  200. pos++;
  201. value += String.fromCharCode(code);
  202. if (code === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
  203. pos++;
  204. value += '\n';
  205. }
  206. lineNumber++;
  207. tokenLineStartOffset = pos;
  208. return token = 14 /* LineBreakTrivia */;
  209. }
  210. switch (code) {
  211. // tokens: []{}:,
  212. case 123 /* openBrace */:
  213. pos++;
  214. return token = 1 /* OpenBraceToken */;
  215. case 125 /* closeBrace */:
  216. pos++;
  217. return token = 2 /* CloseBraceToken */;
  218. case 91 /* openBracket */:
  219. pos++;
  220. return token = 3 /* OpenBracketToken */;
  221. case 93 /* closeBracket */:
  222. pos++;
  223. return token = 4 /* CloseBracketToken */;
  224. case 58 /* colon */:
  225. pos++;
  226. return token = 6 /* ColonToken */;
  227. case 44 /* comma */:
  228. pos++;
  229. return token = 5 /* CommaToken */;
  230. // strings
  231. case 34 /* doubleQuote */:
  232. pos++;
  233. value = scanString();
  234. return token = 10 /* StringLiteral */;
  235. // comments
  236. case 47 /* slash */:
  237. var start = pos - 1;
  238. // Single-line comment
  239. if (text.charCodeAt(pos + 1) === 47 /* slash */) {
  240. pos += 2;
  241. while (pos < len) {
  242. if (isLineBreak(text.charCodeAt(pos))) {
  243. break;
  244. }
  245. pos++;
  246. }
  247. value = text.substring(start, pos);
  248. return token = 12 /* LineCommentTrivia */;
  249. }
  250. // Multi-line comment
  251. if (text.charCodeAt(pos + 1) === 42 /* asterisk */) {
  252. pos += 2;
  253. var safeLength = len - 1; // For lookahead.
  254. var commentClosed = false;
  255. while (pos < safeLength) {
  256. var ch = text.charCodeAt(pos);
  257. if (ch === 42 /* asterisk */ && text.charCodeAt(pos + 1) === 47 /* slash */) {
  258. pos += 2;
  259. commentClosed = true;
  260. break;
  261. }
  262. pos++;
  263. if (isLineBreak(ch)) {
  264. if (ch === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
  265. pos++;
  266. }
  267. lineNumber++;
  268. tokenLineStartOffset = pos;
  269. }
  270. }
  271. if (!commentClosed) {
  272. pos++;
  273. scanError = 1 /* UnexpectedEndOfComment */;
  274. }
  275. value = text.substring(start, pos);
  276. return token = 13 /* BlockCommentTrivia */;
  277. }
  278. // just a single slash
  279. value += String.fromCharCode(code);
  280. pos++;
  281. return token = 16 /* Unknown */;
  282. // numbers
  283. case 45 /* minus */:
  284. value += String.fromCharCode(code);
  285. pos++;
  286. if (pos === len || !isDigit(text.charCodeAt(pos))) {
  287. return token = 16 /* Unknown */;
  288. }
  289. // found a minus, followed by a number so
  290. // we fall through to proceed with scanning
  291. // numbers
  292. case 48 /* _0 */:
  293. case 49 /* _1 */:
  294. case 50 /* _2 */:
  295. case 51 /* _3 */:
  296. case 52 /* _4 */:
  297. case 53 /* _5 */:
  298. case 54 /* _6 */:
  299. case 55 /* _7 */:
  300. case 56 /* _8 */:
  301. case 57 /* _9 */:
  302. value += scanNumber();
  303. return token = 11 /* NumericLiteral */;
  304. // literals and unknown symbols
  305. default:
  306. // is a literal? Read the full word.
  307. while (pos < len && isUnknownContentCharacter(code)) {
  308. pos++;
  309. code = text.charCodeAt(pos);
  310. }
  311. if (tokenOffset !== pos) {
  312. value = text.substring(tokenOffset, pos);
  313. // keywords: true, false, null
  314. switch (value) {
  315. case 'true': return token = 8 /* TrueKeyword */;
  316. case 'false': return token = 9 /* FalseKeyword */;
  317. case 'null': return token = 7 /* NullKeyword */;
  318. }
  319. return token = 16 /* Unknown */;
  320. }
  321. // some
  322. value += String.fromCharCode(code);
  323. pos++;
  324. return token = 16 /* Unknown */;
  325. }
  326. }
  327. function isUnknownContentCharacter(code) {
  328. if (isWhiteSpace(code) || isLineBreak(code)) {
  329. return false;
  330. }
  331. switch (code) {
  332. case 125 /* closeBrace */:
  333. case 93 /* closeBracket */:
  334. case 123 /* openBrace */:
  335. case 91 /* openBracket */:
  336. case 34 /* doubleQuote */:
  337. case 58 /* colon */:
  338. case 44 /* comma */:
  339. case 47 /* slash */:
  340. return false;
  341. }
  342. return true;
  343. }
  344. function scanNextNonTrivia() {
  345. var result;
  346. do {
  347. result = scanNext();
  348. } while (result >= 12 /* LineCommentTrivia */ && result <= 15 /* Trivia */);
  349. return result;
  350. }
  351. return {
  352. setPosition: setPosition,
  353. getPosition: function () { return pos; },
  354. scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
  355. getToken: function () { return token; },
  356. getTokenValue: function () { return value; },
  357. getTokenOffset: function () { return tokenOffset; },
  358. getTokenLength: function () { return pos - tokenOffset; },
  359. getTokenStartLine: function () { return lineStartOffset; },
  360. getTokenStartCharacter: function () { return tokenOffset - prevTokenLineStartOffset; },
  361. getTokenError: function () { return scanError; },
  362. };
  363. }
  364. exports.createScanner = createScanner;
  365. function isWhiteSpace(ch) {
  366. return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 11 /* verticalTab */ || ch === 12 /* formFeed */ ||
  367. ch === 160 /* nonBreakingSpace */ || ch === 5760 /* ogham */ || ch >= 8192 /* enQuad */ && ch <= 8203 /* zeroWidthSpace */ ||
  368. ch === 8239 /* narrowNoBreakSpace */ || ch === 8287 /* mathematicalSpace */ || ch === 12288 /* ideographicSpace */ || ch === 65279 /* byteOrderMark */;
  369. }
  370. function isLineBreak(ch) {
  371. return ch === 10 /* lineFeed */ || ch === 13 /* carriageReturn */ || ch === 8232 /* lineSeparator */ || ch === 8233 /* paragraphSeparator */;
  372. }
  373. function isDigit(ch) {
  374. return ch >= 48 /* _0 */ && ch <= 57 /* _9 */;
  375. }
  376. });