Parser.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. "use strict";
  2. var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
  3. if (k2 === undefined) k2 = k;
  4. var desc = Object.getOwnPropertyDescriptor(m, k);
  5. if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
  6. desc = { enumerable: true, get: function() { return m[k]; } };
  7. }
  8. Object.defineProperty(o, k2, desc);
  9. }) : (function(o, m, k, k2) {
  10. if (k2 === undefined) k2 = k;
  11. o[k2] = m[k];
  12. }));
  13. var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
  14. Object.defineProperty(o, "default", { enumerable: true, value: v });
  15. }) : function(o, v) {
  16. o["default"] = v;
  17. });
  18. var __importStar = (this && this.__importStar) || function (mod) {
  19. if (mod && mod.__esModule) return mod;
  20. var result = {};
  21. if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
  22. __setModuleDefault(result, mod);
  23. return result;
  24. };
  25. Object.defineProperty(exports, "__esModule", { value: true });
  26. exports.Parser = void 0;
  27. var Tokenizer_js_1 = __importStar(require("./Tokenizer.js"));
  28. var decode_js_1 = require("entities/lib/decode.js");
  29. var formTags = new Set([
  30. "input",
  31. "option",
  32. "optgroup",
  33. "select",
  34. "button",
  35. "datalist",
  36. "textarea",
  37. ]);
  38. var pTag = new Set(["p"]);
  39. var tableSectionTags = new Set(["thead", "tbody"]);
  40. var ddtTags = new Set(["dd", "dt"]);
  41. var rtpTags = new Set(["rt", "rp"]);
  42. var openImpliesClose = new Map([
  43. ["tr", new Set(["tr", "th", "td"])],
  44. ["th", new Set(["th"])],
  45. ["td", new Set(["thead", "th", "td"])],
  46. ["body", new Set(["head", "link", "script"])],
  47. ["li", new Set(["li"])],
  48. ["p", pTag],
  49. ["h1", pTag],
  50. ["h2", pTag],
  51. ["h3", pTag],
  52. ["h4", pTag],
  53. ["h5", pTag],
  54. ["h6", pTag],
  55. ["select", formTags],
  56. ["input", formTags],
  57. ["output", formTags],
  58. ["button", formTags],
  59. ["datalist", formTags],
  60. ["textarea", formTags],
  61. ["option", new Set(["option"])],
  62. ["optgroup", new Set(["optgroup", "option"])],
  63. ["dd", ddtTags],
  64. ["dt", ddtTags],
  65. ["address", pTag],
  66. ["article", pTag],
  67. ["aside", pTag],
  68. ["blockquote", pTag],
  69. ["details", pTag],
  70. ["div", pTag],
  71. ["dl", pTag],
  72. ["fieldset", pTag],
  73. ["figcaption", pTag],
  74. ["figure", pTag],
  75. ["footer", pTag],
  76. ["form", pTag],
  77. ["header", pTag],
  78. ["hr", pTag],
  79. ["main", pTag],
  80. ["nav", pTag],
  81. ["ol", pTag],
  82. ["pre", pTag],
  83. ["section", pTag],
  84. ["table", pTag],
  85. ["ul", pTag],
  86. ["rt", rtpTags],
  87. ["rp", rtpTags],
  88. ["tbody", tableSectionTags],
  89. ["tfoot", tableSectionTags],
  90. ]);
  91. var voidElements = new Set([
  92. "area",
  93. "base",
  94. "basefont",
  95. "br",
  96. "col",
  97. "command",
  98. "embed",
  99. "frame",
  100. "hr",
  101. "img",
  102. "input",
  103. "isindex",
  104. "keygen",
  105. "link",
  106. "meta",
  107. "param",
  108. "source",
  109. "track",
  110. "wbr",
  111. ]);
  112. var foreignContextElements = new Set(["math", "svg"]);
  113. var htmlIntegrationElements = new Set([
  114. "mi",
  115. "mo",
  116. "mn",
  117. "ms",
  118. "mtext",
  119. "annotation-xml",
  120. "foreignobject",
  121. "desc",
  122. "title",
  123. ]);
  124. var reNameEnd = /\s|\//;
  125. var Parser = /** @class */ (function () {
  126. function Parser(cbs, options) {
  127. if (options === void 0) { options = {}; }
  128. var _a, _b, _c, _d, _e;
  129. this.options = options;
  130. /** The start index of the last event. */
  131. this.startIndex = 0;
  132. /** The end index of the last event. */
  133. this.endIndex = 0;
  134. /**
  135. * Store the start index of the current open tag,
  136. * so we can update the start index for attributes.
  137. */
  138. this.openTagStart = 0;
  139. this.tagname = "";
  140. this.attribname = "";
  141. this.attribvalue = "";
  142. this.attribs = null;
  143. this.stack = [];
  144. this.foreignContext = [];
  145. this.buffers = [];
  146. this.bufferOffset = 0;
  147. /** The index of the last written buffer. Used when resuming after a `pause()`. */
  148. this.writeIndex = 0;
  149. /** Indicates whether the parser has finished running / `.end` has been called. */
  150. this.ended = false;
  151. this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
  152. this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
  153. this.lowerCaseAttributeNames =
  154. (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
  155. this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this);
  156. (_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
  157. }
  158. // Tokenizer event handlers
  159. /** @internal */
  160. Parser.prototype.ontext = function (start, endIndex) {
  161. var _a, _b;
  162. var data = this.getSlice(start, endIndex);
  163. this.endIndex = endIndex - 1;
  164. (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
  165. this.startIndex = endIndex;
  166. };
  167. /** @internal */
  168. Parser.prototype.ontextentity = function (cp) {
  169. var _a, _b;
  170. /*
  171. * Entities can be emitted on the character, or directly after.
  172. * We use the section start here to get accurate indices.
  173. */
  174. var idx = this.tokenizer.getSectionStart();
  175. this.endIndex = idx - 1;
  176. (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
  177. this.startIndex = idx;
  178. };
  179. Parser.prototype.isVoidElement = function (name) {
  180. return !this.options.xmlMode && voidElements.has(name);
  181. };
  182. /** @internal */
  183. Parser.prototype.onopentagname = function (start, endIndex) {
  184. this.endIndex = endIndex;
  185. var name = this.getSlice(start, endIndex);
  186. if (this.lowerCaseTagNames) {
  187. name = name.toLowerCase();
  188. }
  189. this.emitOpenTag(name);
  190. };
  191. Parser.prototype.emitOpenTag = function (name) {
  192. var _a, _b, _c, _d;
  193. this.openTagStart = this.startIndex;
  194. this.tagname = name;
  195. var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
  196. if (impliesClose) {
  197. while (this.stack.length > 0 &&
  198. impliesClose.has(this.stack[this.stack.length - 1])) {
  199. var el = this.stack.pop();
  200. (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el, true);
  201. }
  202. }
  203. if (!this.isVoidElement(name)) {
  204. this.stack.push(name);
  205. if (foreignContextElements.has(name)) {
  206. this.foreignContext.push(true);
  207. }
  208. else if (htmlIntegrationElements.has(name)) {
  209. this.foreignContext.push(false);
  210. }
  211. }
  212. (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
  213. if (this.cbs.onopentag)
  214. this.attribs = {};
  215. };
  216. Parser.prototype.endOpenTag = function (isImplied) {
  217. var _a, _b;
  218. this.startIndex = this.openTagStart;
  219. if (this.attribs) {
  220. (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
  221. this.attribs = null;
  222. }
  223. if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
  224. this.cbs.onclosetag(this.tagname, true);
  225. }
  226. this.tagname = "";
  227. };
  228. /** @internal */
  229. Parser.prototype.onopentagend = function (endIndex) {
  230. this.endIndex = endIndex;
  231. this.endOpenTag(false);
  232. // Set `startIndex` for next node
  233. this.startIndex = endIndex + 1;
  234. };
  235. /** @internal */
  236. Parser.prototype.onclosetag = function (start, endIndex) {
  237. var _a, _b, _c, _d, _e, _f;
  238. this.endIndex = endIndex;
  239. var name = this.getSlice(start, endIndex);
  240. if (this.lowerCaseTagNames) {
  241. name = name.toLowerCase();
  242. }
  243. if (foreignContextElements.has(name) ||
  244. htmlIntegrationElements.has(name)) {
  245. this.foreignContext.pop();
  246. }
  247. if (!this.isVoidElement(name)) {
  248. var pos = this.stack.lastIndexOf(name);
  249. if (pos !== -1) {
  250. if (this.cbs.onclosetag) {
  251. var count = this.stack.length - pos;
  252. while (count--) {
  253. // We know the stack has sufficient elements.
  254. this.cbs.onclosetag(this.stack.pop(), count !== 0);
  255. }
  256. }
  257. else
  258. this.stack.length = pos;
  259. }
  260. else if (!this.options.xmlMode && name === "p") {
  261. // Implicit open before close
  262. this.emitOpenTag("p");
  263. this.closeCurrentTag(true);
  264. }
  265. }
  266. else if (!this.options.xmlMode && name === "br") {
  267. // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
  268. (_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br");
  269. (_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
  270. (_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false);
  271. }
  272. // Set `startIndex` for next node
  273. this.startIndex = endIndex + 1;
  274. };
  275. /** @internal */
  276. Parser.prototype.onselfclosingtag = function (endIndex) {
  277. this.endIndex = endIndex;
  278. if (this.options.xmlMode ||
  279. this.options.recognizeSelfClosing ||
  280. this.foreignContext[this.foreignContext.length - 1]) {
  281. this.closeCurrentTag(false);
  282. // Set `startIndex` for next node
  283. this.startIndex = endIndex + 1;
  284. }
  285. else {
  286. // Ignore the fact that the tag is self-closing.
  287. this.onopentagend(endIndex);
  288. }
  289. };
  290. Parser.prototype.closeCurrentTag = function (isOpenImplied) {
  291. var _a, _b;
  292. var name = this.tagname;
  293. this.endOpenTag(isOpenImplied);
  294. // Self-closing tags will be on the top of the stack
  295. if (this.stack[this.stack.length - 1] === name) {
  296. // If the opening tag isn't implied, the closing tag has to be implied.
  297. (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
  298. this.stack.pop();
  299. }
  300. };
  301. /** @internal */
  302. Parser.prototype.onattribname = function (start, endIndex) {
  303. this.startIndex = start;
  304. var name = this.getSlice(start, endIndex);
  305. this.attribname = this.lowerCaseAttributeNames
  306. ? name.toLowerCase()
  307. : name;
  308. };
  309. /** @internal */
  310. Parser.prototype.onattribdata = function (start, endIndex) {
  311. this.attribvalue += this.getSlice(start, endIndex);
  312. };
  313. /** @internal */
  314. Parser.prototype.onattribentity = function (cp) {
  315. this.attribvalue += (0, decode_js_1.fromCodePoint)(cp);
  316. };
  317. /** @internal */
  318. Parser.prototype.onattribend = function (quote, endIndex) {
  319. var _a, _b;
  320. this.endIndex = endIndex;
  321. (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === Tokenizer_js_1.QuoteType.Double
  322. ? '"'
  323. : quote === Tokenizer_js_1.QuoteType.Single
  324. ? "'"
  325. : quote === Tokenizer_js_1.QuoteType.NoValue
  326. ? undefined
  327. : null);
  328. if (this.attribs &&
  329. !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
  330. this.attribs[this.attribname] = this.attribvalue;
  331. }
  332. this.attribvalue = "";
  333. };
  334. Parser.prototype.getInstructionName = function (value) {
  335. var idx = value.search(reNameEnd);
  336. var name = idx < 0 ? value : value.substr(0, idx);
  337. if (this.lowerCaseTagNames) {
  338. name = name.toLowerCase();
  339. }
  340. return name;
  341. };
  342. /** @internal */
  343. Parser.prototype.ondeclaration = function (start, endIndex) {
  344. this.endIndex = endIndex;
  345. var value = this.getSlice(start, endIndex);
  346. if (this.cbs.onprocessinginstruction) {
  347. var name = this.getInstructionName(value);
  348. this.cbs.onprocessinginstruction("!".concat(name), "!".concat(value));
  349. }
  350. // Set `startIndex` for next node
  351. this.startIndex = endIndex + 1;
  352. };
  353. /** @internal */
  354. Parser.prototype.onprocessinginstruction = function (start, endIndex) {
  355. this.endIndex = endIndex;
  356. var value = this.getSlice(start, endIndex);
  357. if (this.cbs.onprocessinginstruction) {
  358. var name = this.getInstructionName(value);
  359. this.cbs.onprocessinginstruction("?".concat(name), "?".concat(value));
  360. }
  361. // Set `startIndex` for next node
  362. this.startIndex = endIndex + 1;
  363. };
  364. /** @internal */
  365. Parser.prototype.oncomment = function (start, endIndex, offset) {
  366. var _a, _b, _c, _d;
  367. this.endIndex = endIndex;
  368. (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset));
  369. (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
  370. // Set `startIndex` for next node
  371. this.startIndex = endIndex + 1;
  372. };
  373. /** @internal */
  374. Parser.prototype.oncdata = function (start, endIndex, offset) {
  375. var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
  376. this.endIndex = endIndex;
  377. var value = this.getSlice(start, endIndex - offset);
  378. if (this.options.xmlMode || this.options.recognizeCDATA) {
  379. (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
  380. (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
  381. (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
  382. }
  383. else {
  384. (_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[".concat(value, "]]"));
  385. (_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
  386. }
  387. // Set `startIndex` for next node
  388. this.startIndex = endIndex + 1;
  389. };
  390. /** @internal */
  391. Parser.prototype.onend = function () {
  392. var _a, _b;
  393. if (this.cbs.onclosetag) {
  394. // Set the end index for all remaining tags
  395. this.endIndex = this.startIndex;
  396. for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i], true))
  397. ;
  398. }
  399. (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
  400. };
  401. /**
  402. * Resets the parser to a blank state, ready to parse a new HTML document
  403. */
  404. Parser.prototype.reset = function () {
  405. var _a, _b, _c, _d;
  406. (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
  407. this.tokenizer.reset();
  408. this.tagname = "";
  409. this.attribname = "";
  410. this.attribs = null;
  411. this.stack.length = 0;
  412. this.startIndex = 0;
  413. this.endIndex = 0;
  414. (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
  415. this.buffers.length = 0;
  416. this.bufferOffset = 0;
  417. this.writeIndex = 0;
  418. this.ended = false;
  419. };
  420. /**
  421. * Resets the parser, then parses a complete document and
  422. * pushes it to the handler.
  423. *
  424. * @param data Document to parse.
  425. */
  426. Parser.prototype.parseComplete = function (data) {
  427. this.reset();
  428. this.end(data);
  429. };
  430. Parser.prototype.getSlice = function (start, end) {
  431. while (start - this.bufferOffset >= this.buffers[0].length) {
  432. this.shiftBuffer();
  433. }
  434. var str = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset);
  435. while (end - this.bufferOffset > this.buffers[0].length) {
  436. this.shiftBuffer();
  437. str += this.buffers[0].slice(0, end - this.bufferOffset);
  438. }
  439. return str;
  440. };
  441. Parser.prototype.shiftBuffer = function () {
  442. this.bufferOffset += this.buffers[0].length;
  443. this.writeIndex--;
  444. this.buffers.shift();
  445. };
  446. /**
  447. * Parses a chunk of data and calls the corresponding callbacks.
  448. *
  449. * @param chunk Chunk to parse.
  450. */
  451. Parser.prototype.write = function (chunk) {
  452. var _a, _b;
  453. if (this.ended) {
  454. (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!"));
  455. return;
  456. }
  457. this.buffers.push(chunk);
  458. if (this.tokenizer.running) {
  459. this.tokenizer.write(chunk);
  460. this.writeIndex++;
  461. }
  462. };
  463. /**
  464. * Parses the end of the buffer and clears the stack, calls onend.
  465. *
  466. * @param chunk Optional final chunk to parse.
  467. */
  468. Parser.prototype.end = function (chunk) {
  469. var _a, _b;
  470. if (this.ended) {
  471. (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, Error(".end() after done!"));
  472. return;
  473. }
  474. if (chunk)
  475. this.write(chunk);
  476. this.ended = true;
  477. this.tokenizer.end();
  478. };
  479. /**
  480. * Pauses parsing. The parser won't emit events until `resume` is called.
  481. */
  482. Parser.prototype.pause = function () {
  483. this.tokenizer.pause();
  484. };
  485. /**
  486. * Resumes parsing after `pause` was called.
  487. */
  488. Parser.prototype.resume = function () {
  489. this.tokenizer.resume();
  490. while (this.tokenizer.running &&
  491. this.writeIndex < this.buffers.length) {
  492. this.tokenizer.write(this.buffers[this.writeIndex++]);
  493. }
  494. if (this.ended)
  495. this.tokenizer.end();
  496. };
  497. /**
  498. * Alias of `write`, for backwards compatibility.
  499. *
  500. * @param chunk Chunk to parse.
  501. * @deprecated
  502. */
  503. Parser.prototype.parseChunk = function (chunk) {
  504. this.write(chunk);
  505. };
  506. /**
  507. * Alias of `end`, for backwards compatibility.
  508. *
  509. * @param chunk Optional final chunk to parse.
  510. * @deprecated
  511. */
  512. Parser.prototype.done = function (chunk) {
  513. this.end(chunk);
  514. };
  515. return Parser;
  516. }());
  517. exports.Parser = Parser;
  518. //# sourceMappingURL=Parser.js.map