Parser.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. "use strict";
  2. var __importDefault = (this && this.__importDefault) || function (mod) {
  3. return (mod && mod.__esModule) ? mod : { "default": mod };
  4. };
  5. Object.defineProperty(exports, "__esModule", { value: true });
  6. exports.Parser = void 0;
  7. var Tokenizer_1 = __importDefault(require("./Tokenizer"));
  8. var formTags = new Set([
  9. "input",
  10. "option",
  11. "optgroup",
  12. "select",
  13. "button",
  14. "datalist",
  15. "textarea",
  16. ]);
  17. var pTag = new Set(["p"]);
  18. var openImpliesClose = {
  19. tr: new Set(["tr", "th", "td"]),
  20. th: new Set(["th"]),
  21. td: new Set(["thead", "th", "td"]),
  22. body: new Set(["head", "link", "script"]),
  23. li: new Set(["li"]),
  24. p: pTag,
  25. h1: pTag,
  26. h2: pTag,
  27. h3: pTag,
  28. h4: pTag,
  29. h5: pTag,
  30. h6: pTag,
  31. select: formTags,
  32. input: formTags,
  33. output: formTags,
  34. button: formTags,
  35. datalist: formTags,
  36. textarea: formTags,
  37. option: new Set(["option"]),
  38. optgroup: new Set(["optgroup", "option"]),
  39. dd: new Set(["dt", "dd"]),
  40. dt: new Set(["dt", "dd"]),
  41. address: pTag,
  42. article: pTag,
  43. aside: pTag,
  44. blockquote: pTag,
  45. details: pTag,
  46. div: pTag,
  47. dl: pTag,
  48. fieldset: pTag,
  49. figcaption: pTag,
  50. figure: pTag,
  51. footer: pTag,
  52. form: pTag,
  53. header: pTag,
  54. hr: pTag,
  55. main: pTag,
  56. nav: pTag,
  57. ol: pTag,
  58. pre: pTag,
  59. section: pTag,
  60. table: pTag,
  61. ul: pTag,
  62. rt: new Set(["rt", "rp"]),
  63. rp: new Set(["rt", "rp"]),
  64. tbody: new Set(["thead", "tbody"]),
  65. tfoot: new Set(["thead", "tbody"]),
  66. };
  67. var voidElements = new Set([
  68. "area",
  69. "base",
  70. "basefont",
  71. "br",
  72. "col",
  73. "command",
  74. "embed",
  75. "frame",
  76. "hr",
  77. "img",
  78. "input",
  79. "isindex",
  80. "keygen",
  81. "link",
  82. "meta",
  83. "param",
  84. "source",
  85. "track",
  86. "wbr",
  87. ]);
  88. var foreignContextElements = new Set(["math", "svg"]);
  89. var htmlIntegrationElements = new Set([
  90. "mi",
  91. "mo",
  92. "mn",
  93. "ms",
  94. "mtext",
  95. "annotation-xml",
  96. "foreignObject",
  97. "desc",
  98. "title",
  99. ]);
  100. var reNameEnd = /\s|\//;
  101. var Parser = /** @class */ (function () {
  102. function Parser(cbs, options) {
  103. if (options === void 0) { options = {}; }
  104. var _a, _b, _c, _d, _e;
  105. /** The start index of the last event. */
  106. this.startIndex = 0;
  107. /** The end index of the last event. */
  108. this.endIndex = null;
  109. this.tagname = "";
  110. this.attribname = "";
  111. this.attribvalue = "";
  112. this.attribs = null;
  113. this.stack = [];
  114. this.foreignContext = [];
  115. this.options = options;
  116. this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
  117. this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
  118. this.lowerCaseAttributeNames =
  119. (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
  120. this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_1.default)(this.options, this);
  121. (_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
  122. }
  123. Parser.prototype.updatePosition = function (initialOffset) {
  124. if (this.endIndex === null) {
  125. if (this.tokenizer.sectionStart <= initialOffset) {
  126. this.startIndex = 0;
  127. }
  128. else {
  129. this.startIndex = this.tokenizer.sectionStart - initialOffset;
  130. }
  131. }
  132. else {
  133. this.startIndex = this.endIndex + 1;
  134. }
  135. this.endIndex = this.tokenizer.getAbsoluteIndex();
  136. };
  137. // Tokenizer event handlers
  138. Parser.prototype.ontext = function (data) {
  139. var _a, _b;
  140. this.updatePosition(1);
  141. this.endIndex--;
  142. (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
  143. };
  144. Parser.prototype.onopentagname = function (name) {
  145. var _a, _b;
  146. if (this.lowerCaseTagNames) {
  147. name = name.toLowerCase();
  148. }
  149. this.tagname = name;
  150. if (!this.options.xmlMode &&
  151. Object.prototype.hasOwnProperty.call(openImpliesClose, name)) {
  152. var el = void 0;
  153. while (this.stack.length > 0 &&
  154. openImpliesClose[name].has((el = this.stack[this.stack.length - 1]))) {
  155. this.onclosetag(el);
  156. }
  157. }
  158. if (this.options.xmlMode || !voidElements.has(name)) {
  159. this.stack.push(name);
  160. if (foreignContextElements.has(name)) {
  161. this.foreignContext.push(true);
  162. }
  163. else if (htmlIntegrationElements.has(name)) {
  164. this.foreignContext.push(false);
  165. }
  166. }
  167. (_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
  168. if (this.cbs.onopentag)
  169. this.attribs = {};
  170. };
  171. Parser.prototype.onopentagend = function () {
  172. var _a, _b;
  173. this.updatePosition(1);
  174. if (this.attribs) {
  175. (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs);
  176. this.attribs = null;
  177. }
  178. if (!this.options.xmlMode &&
  179. this.cbs.onclosetag &&
  180. voidElements.has(this.tagname)) {
  181. this.cbs.onclosetag(this.tagname);
  182. }
  183. this.tagname = "";
  184. };
  185. Parser.prototype.onclosetag = function (name) {
  186. this.updatePosition(1);
  187. if (this.lowerCaseTagNames) {
  188. name = name.toLowerCase();
  189. }
  190. if (foreignContextElements.has(name) ||
  191. htmlIntegrationElements.has(name)) {
  192. this.foreignContext.pop();
  193. }
  194. if (this.stack.length &&
  195. (this.options.xmlMode || !voidElements.has(name))) {
  196. var pos = this.stack.lastIndexOf(name);
  197. if (pos !== -1) {
  198. if (this.cbs.onclosetag) {
  199. pos = this.stack.length - pos;
  200. while (pos--) {
  201. // We know the stack has sufficient elements.
  202. this.cbs.onclosetag(this.stack.pop());
  203. }
  204. }
  205. else
  206. this.stack.length = pos;
  207. }
  208. else if (name === "p" && !this.options.xmlMode) {
  209. this.onopentagname(name);
  210. this.closeCurrentTag();
  211. }
  212. }
  213. else if (!this.options.xmlMode && (name === "br" || name === "p")) {
  214. this.onopentagname(name);
  215. this.closeCurrentTag();
  216. }
  217. };
  218. Parser.prototype.onselfclosingtag = function () {
  219. if (this.options.xmlMode ||
  220. this.options.recognizeSelfClosing ||
  221. this.foreignContext[this.foreignContext.length - 1]) {
  222. this.closeCurrentTag();
  223. }
  224. else {
  225. this.onopentagend();
  226. }
  227. };
  228. Parser.prototype.closeCurrentTag = function () {
  229. var _a, _b;
  230. var name = this.tagname;
  231. this.onopentagend();
  232. /*
  233. * Self-closing tags will be on the top of the stack
  234. * (cheaper check than in onclosetag)
  235. */
  236. if (this.stack[this.stack.length - 1] === name) {
  237. (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name);
  238. this.stack.pop();
  239. }
  240. };
  241. Parser.prototype.onattribname = function (name) {
  242. if (this.lowerCaseAttributeNames) {
  243. name = name.toLowerCase();
  244. }
  245. this.attribname = name;
  246. };
  247. Parser.prototype.onattribdata = function (value) {
  248. this.attribvalue += value;
  249. };
  250. Parser.prototype.onattribend = function (quote) {
  251. var _a, _b;
  252. (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote);
  253. if (this.attribs &&
  254. !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
  255. this.attribs[this.attribname] = this.attribvalue;
  256. }
  257. this.attribname = "";
  258. this.attribvalue = "";
  259. };
  260. Parser.prototype.getInstructionName = function (value) {
  261. var idx = value.search(reNameEnd);
  262. var name = idx < 0 ? value : value.substr(0, idx);
  263. if (this.lowerCaseTagNames) {
  264. name = name.toLowerCase();
  265. }
  266. return name;
  267. };
  268. Parser.prototype.ondeclaration = function (value) {
  269. if (this.cbs.onprocessinginstruction) {
  270. var name_1 = this.getInstructionName(value);
  271. this.cbs.onprocessinginstruction("!" + name_1, "!" + value);
  272. }
  273. };
  274. Parser.prototype.onprocessinginstruction = function (value) {
  275. if (this.cbs.onprocessinginstruction) {
  276. var name_2 = this.getInstructionName(value);
  277. this.cbs.onprocessinginstruction("?" + name_2, "?" + value);
  278. }
  279. };
  280. Parser.prototype.oncomment = function (value) {
  281. var _a, _b, _c, _d;
  282. this.updatePosition(4);
  283. (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value);
  284. (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
  285. };
  286. Parser.prototype.oncdata = function (value) {
  287. var _a, _b, _c, _d, _e, _f;
  288. this.updatePosition(1);
  289. if (this.options.xmlMode || this.options.recognizeCDATA) {
  290. (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
  291. (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
  292. (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
  293. }
  294. else {
  295. this.oncomment("[CDATA[" + value + "]]");
  296. }
  297. };
  298. Parser.prototype.onerror = function (err) {
  299. var _a, _b;
  300. (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, err);
  301. };
  302. Parser.prototype.onend = function () {
  303. var _a, _b;
  304. if (this.cbs.onclosetag) {
  305. for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i]))
  306. ;
  307. }
  308. (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
  309. };
  310. /**
  311. * Resets the parser to a blank state, ready to parse a new HTML document
  312. */
  313. Parser.prototype.reset = function () {
  314. var _a, _b, _c, _d;
  315. (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
  316. this.tokenizer.reset();
  317. this.tagname = "";
  318. this.attribname = "";
  319. this.attribs = null;
  320. this.stack = [];
  321. (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
  322. };
  323. /**
  324. * Resets the parser, then parses a complete document and
  325. * pushes it to the handler.
  326. *
  327. * @param data Document to parse.
  328. */
  329. Parser.prototype.parseComplete = function (data) {
  330. this.reset();
  331. this.end(data);
  332. };
  333. /**
  334. * Parses a chunk of data and calls the corresponding callbacks.
  335. *
  336. * @param chunk Chunk to parse.
  337. */
  338. Parser.prototype.write = function (chunk) {
  339. this.tokenizer.write(chunk);
  340. };
  341. /**
  342. * Parses the end of the buffer and clears the stack, calls onend.
  343. *
  344. * @param chunk Optional final chunk to parse.
  345. */
  346. Parser.prototype.end = function (chunk) {
  347. this.tokenizer.end(chunk);
  348. };
  349. /**
  350. * Pauses parsing. The parser won't emit events until `resume` is called.
  351. */
  352. Parser.prototype.pause = function () {
  353. this.tokenizer.pause();
  354. };
  355. /**
  356. * Resumes parsing after `pause` was called.
  357. */
  358. Parser.prototype.resume = function () {
  359. this.tokenizer.resume();
  360. };
  361. /**
  362. * Alias of `write`, for backwards compatibility.
  363. *
  364. * @param chunk Chunk to parse.
  365. * @deprecated
  366. */
  367. Parser.prototype.parseChunk = function (chunk) {
  368. this.write(chunk);
  369. };
  370. /**
  371. * Alias of `end`, for backwards compatibility.
  372. *
  373. * @param chunk Optional final chunk to parse.
  374. * @deprecated
  375. */
  376. Parser.prototype.done = function (chunk) {
  377. this.end(chunk);
  378. };
  379. return Parser;
  380. }());
  381. exports.Parser = Parser;