scanner.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. 'use strict';
  6. /**
  7. * Creates a JSON scanner on the given text.
  8. * If ignoreTrivia is set, whitespaces or comments are ignored.
  9. */
  10. export function createScanner(text, ignoreTrivia) {
  11. if (ignoreTrivia === void 0) { ignoreTrivia = false; }
  12. var len = text.length;
  13. var pos = 0, value = '', tokenOffset = 0, token = 16 /* Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* None */;
  14. function scanHexDigits(count, exact) {
  15. var digits = 0;
  16. var value = 0;
  17. while (digits < count || !exact) {
  18. var ch = text.charCodeAt(pos);
  19. if (ch >= 48 /* _0 */ && ch <= 57 /* _9 */) {
  20. value = value * 16 + ch - 48 /* _0 */;
  21. }
  22. else if (ch >= 65 /* A */ && ch <= 70 /* F */) {
  23. value = value * 16 + ch - 65 /* A */ + 10;
  24. }
  25. else if (ch >= 97 /* a */ && ch <= 102 /* f */) {
  26. value = value * 16 + ch - 97 /* a */ + 10;
  27. }
  28. else {
  29. break;
  30. }
  31. pos++;
  32. digits++;
  33. }
  34. if (digits < count) {
  35. value = -1;
  36. }
  37. return value;
  38. }
  39. function setPosition(newPosition) {
  40. pos = newPosition;
  41. value = '';
  42. tokenOffset = 0;
  43. token = 16 /* Unknown */;
  44. scanError = 0 /* None */;
  45. }
  46. function scanNumber() {
  47. var start = pos;
  48. if (text.charCodeAt(pos) === 48 /* _0 */) {
  49. pos++;
  50. }
  51. else {
  52. pos++;
  53. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  54. pos++;
  55. }
  56. }
  57. if (pos < text.length && text.charCodeAt(pos) === 46 /* dot */) {
  58. pos++;
  59. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  60. pos++;
  61. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  62. pos++;
  63. }
  64. }
  65. else {
  66. scanError = 3 /* UnexpectedEndOfNumber */;
  67. return text.substring(start, pos);
  68. }
  69. }
  70. var end = pos;
  71. if (pos < text.length && (text.charCodeAt(pos) === 69 /* E */ || text.charCodeAt(pos) === 101 /* e */)) {
  72. pos++;
  73. if (pos < text.length && text.charCodeAt(pos) === 43 /* plus */ || text.charCodeAt(pos) === 45 /* minus */) {
  74. pos++;
  75. }
  76. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  77. pos++;
  78. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  79. pos++;
  80. }
  81. end = pos;
  82. }
  83. else {
  84. scanError = 3 /* UnexpectedEndOfNumber */;
  85. }
  86. }
  87. return text.substring(start, end);
  88. }
  89. function scanString() {
  90. var result = '', start = pos;
  91. while (true) {
  92. if (pos >= len) {
  93. result += text.substring(start, pos);
  94. scanError = 2 /* UnexpectedEndOfString */;
  95. break;
  96. }
  97. var ch = text.charCodeAt(pos);
  98. if (ch === 34 /* doubleQuote */) {
  99. result += text.substring(start, pos);
  100. pos++;
  101. break;
  102. }
  103. if (ch === 92 /* backslash */) {
  104. result += text.substring(start, pos);
  105. pos++;
  106. if (pos >= len) {
  107. scanError = 2 /* UnexpectedEndOfString */;
  108. break;
  109. }
  110. var ch2 = text.charCodeAt(pos++);
  111. switch (ch2) {
  112. case 34 /* doubleQuote */:
  113. result += '\"';
  114. break;
  115. case 92 /* backslash */:
  116. result += '\\';
  117. break;
  118. case 47 /* slash */:
  119. result += '/';
  120. break;
  121. case 98 /* b */:
  122. result += '\b';
  123. break;
  124. case 102 /* f */:
  125. result += '\f';
  126. break;
  127. case 110 /* n */:
  128. result += '\n';
  129. break;
  130. case 114 /* r */:
  131. result += '\r';
  132. break;
  133. case 116 /* t */:
  134. result += '\t';
  135. break;
  136. case 117 /* u */:
  137. var ch3 = scanHexDigits(4, true);
  138. if (ch3 >= 0) {
  139. result += String.fromCharCode(ch3);
  140. }
  141. else {
  142. scanError = 4 /* InvalidUnicode */;
  143. }
  144. break;
  145. default:
  146. scanError = 5 /* InvalidEscapeCharacter */;
  147. }
  148. start = pos;
  149. continue;
  150. }
  151. if (ch >= 0 && ch <= 0x1f) {
  152. if (isLineBreak(ch)) {
  153. result += text.substring(start, pos);
  154. scanError = 2 /* UnexpectedEndOfString */;
  155. break;
  156. }
  157. else {
  158. scanError = 6 /* InvalidCharacter */;
  159. // mark as error but continue with string
  160. }
  161. }
  162. pos++;
  163. }
  164. return result;
  165. }
  166. function scanNext() {
  167. value = '';
  168. scanError = 0 /* None */;
  169. tokenOffset = pos;
  170. lineStartOffset = lineNumber;
  171. prevTokenLineStartOffset = tokenLineStartOffset;
  172. if (pos >= len) {
  173. // at the end
  174. tokenOffset = len;
  175. return token = 17 /* EOF */;
  176. }
  177. var code = text.charCodeAt(pos);
  178. // trivia: whitespace
  179. if (isWhiteSpace(code)) {
  180. do {
  181. pos++;
  182. value += String.fromCharCode(code);
  183. code = text.charCodeAt(pos);
  184. } while (isWhiteSpace(code));
  185. return token = 15 /* Trivia */;
  186. }
  187. // trivia: newlines
  188. if (isLineBreak(code)) {
  189. pos++;
  190. value += String.fromCharCode(code);
  191. if (code === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
  192. pos++;
  193. value += '\n';
  194. }
  195. lineNumber++;
  196. tokenLineStartOffset = pos;
  197. return token = 14 /* LineBreakTrivia */;
  198. }
  199. switch (code) {
  200. // tokens: []{}:,
  201. case 123 /* openBrace */:
  202. pos++;
  203. return token = 1 /* OpenBraceToken */;
  204. case 125 /* closeBrace */:
  205. pos++;
  206. return token = 2 /* CloseBraceToken */;
  207. case 91 /* openBracket */:
  208. pos++;
  209. return token = 3 /* OpenBracketToken */;
  210. case 93 /* closeBracket */:
  211. pos++;
  212. return token = 4 /* CloseBracketToken */;
  213. case 58 /* colon */:
  214. pos++;
  215. return token = 6 /* ColonToken */;
  216. case 44 /* comma */:
  217. pos++;
  218. return token = 5 /* CommaToken */;
  219. // strings
  220. case 34 /* doubleQuote */:
  221. pos++;
  222. value = scanString();
  223. return token = 10 /* StringLiteral */;
  224. // comments
  225. case 47 /* slash */:
  226. var start = pos - 1;
  227. // Single-line comment
  228. if (text.charCodeAt(pos + 1) === 47 /* slash */) {
  229. pos += 2;
  230. while (pos < len) {
  231. if (isLineBreak(text.charCodeAt(pos))) {
  232. break;
  233. }
  234. pos++;
  235. }
  236. value = text.substring(start, pos);
  237. return token = 12 /* LineCommentTrivia */;
  238. }
  239. // Multi-line comment
  240. if (text.charCodeAt(pos + 1) === 42 /* asterisk */) {
  241. pos += 2;
  242. var safeLength = len - 1; // For lookahead.
  243. var commentClosed = false;
  244. while (pos < safeLength) {
  245. var ch = text.charCodeAt(pos);
  246. if (ch === 42 /* asterisk */ && text.charCodeAt(pos + 1) === 47 /* slash */) {
  247. pos += 2;
  248. commentClosed = true;
  249. break;
  250. }
  251. pos++;
  252. if (isLineBreak(ch)) {
  253. if (ch === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
  254. pos++;
  255. }
  256. lineNumber++;
  257. tokenLineStartOffset = pos;
  258. }
  259. }
  260. if (!commentClosed) {
  261. pos++;
  262. scanError = 1 /* UnexpectedEndOfComment */;
  263. }
  264. value = text.substring(start, pos);
  265. return token = 13 /* BlockCommentTrivia */;
  266. }
  267. // just a single slash
  268. value += String.fromCharCode(code);
  269. pos++;
  270. return token = 16 /* Unknown */;
  271. // numbers
  272. case 45 /* minus */:
  273. value += String.fromCharCode(code);
  274. pos++;
  275. if (pos === len || !isDigit(text.charCodeAt(pos))) {
  276. return token = 16 /* Unknown */;
  277. }
  278. // found a minus, followed by a number so
  279. // we fall through to proceed with scanning
  280. // numbers
  281. case 48 /* _0 */:
  282. case 49 /* _1 */:
  283. case 50 /* _2 */:
  284. case 51 /* _3 */:
  285. case 52 /* _4 */:
  286. case 53 /* _5 */:
  287. case 54 /* _6 */:
  288. case 55 /* _7 */:
  289. case 56 /* _8 */:
  290. case 57 /* _9 */:
  291. value += scanNumber();
  292. return token = 11 /* NumericLiteral */;
  293. // literals and unknown symbols
  294. default:
  295. // is a literal? Read the full word.
  296. while (pos < len && isUnknownContentCharacter(code)) {
  297. pos++;
  298. code = text.charCodeAt(pos);
  299. }
  300. if (tokenOffset !== pos) {
  301. value = text.substring(tokenOffset, pos);
  302. // keywords: true, false, null
  303. switch (value) {
  304. case 'true': return token = 8 /* TrueKeyword */;
  305. case 'false': return token = 9 /* FalseKeyword */;
  306. case 'null': return token = 7 /* NullKeyword */;
  307. }
  308. return token = 16 /* Unknown */;
  309. }
  310. // some
  311. value += String.fromCharCode(code);
  312. pos++;
  313. return token = 16 /* Unknown */;
  314. }
  315. }
  316. function isUnknownContentCharacter(code) {
  317. if (isWhiteSpace(code) || isLineBreak(code)) {
  318. return false;
  319. }
  320. switch (code) {
  321. case 125 /* closeBrace */:
  322. case 93 /* closeBracket */:
  323. case 123 /* openBrace */:
  324. case 91 /* openBracket */:
  325. case 34 /* doubleQuote */:
  326. case 58 /* colon */:
  327. case 44 /* comma */:
  328. case 47 /* slash */:
  329. return false;
  330. }
  331. return true;
  332. }
  333. function scanNextNonTrivia() {
  334. var result;
  335. do {
  336. result = scanNext();
  337. } while (result >= 12 /* LineCommentTrivia */ && result <= 15 /* Trivia */);
  338. return result;
  339. }
  340. return {
  341. setPosition: setPosition,
  342. getPosition: function () { return pos; },
  343. scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
  344. getToken: function () { return token; },
  345. getTokenValue: function () { return value; },
  346. getTokenOffset: function () { return tokenOffset; },
  347. getTokenLength: function () { return pos - tokenOffset; },
  348. getTokenStartLine: function () { return lineStartOffset; },
  349. getTokenStartCharacter: function () { return tokenOffset - prevTokenLineStartOffset; },
  350. getTokenError: function () { return scanError; },
  351. };
  352. }
  353. function isWhiteSpace(ch) {
  354. return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 11 /* verticalTab */ || ch === 12 /* formFeed */ ||
  355. ch === 160 /* nonBreakingSpace */ || ch === 5760 /* ogham */ || ch >= 8192 /* enQuad */ && ch <= 8203 /* zeroWidthSpace */ ||
  356. ch === 8239 /* narrowNoBreakSpace */ || ch === 8287 /* mathematicalSpace */ || ch === 12288 /* ideographicSpace */ || ch === 65279 /* byteOrderMark */;
  357. }
  358. function isLineBreak(ch) {
  359. return ch === 10 /* lineFeed */ || ch === 13 /* carriageReturn */ || ch === 8232 /* lineSeparator */ || ch === 8233 /* paragraphSeparator */;
  360. }
  361. function isDigit(ch) {
  362. return ch >= 48 /* _0 */ && ch <= 57 /* _9 */;
  363. }