123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512 |
- /* Copyright 2018 Mozilla Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- // The code for XMLParserBase copied from
- // https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
- import { encodeToXmlString } from "./core_utils.js";
- const XMLParserErrorCode = {
- NoError: 0,
- EndOfDocument: -1,
- UnterminatedCdat: -2,
- UnterminatedXmlDeclaration: -3,
- UnterminatedDoctypeDeclaration: -4,
- UnterminatedComment: -5,
- MalformedElement: -6,
- OutOfMemory: -7,
- UnterminatedAttributeValue: -8,
- UnterminatedElement: -9,
- ElementNeverBegun: -10,
- };
- function isWhitespace(s, index) {
- const ch = s[index];
- return ch === " " || ch === "\n" || ch === "\r" || ch === "\t";
- }
- function isWhitespaceString(s) {
- for (let i = 0, ii = s.length; i < ii; i++) {
- if (!isWhitespace(s, i)) {
- return false;
- }
- }
- return true;
- }
- class XMLParserBase {
- _resolveEntities(s) {
- return s.replace(/&([^;]+);/g, (all, entity) => {
- if (entity.substring(0, 2) === "#x") {
- return String.fromCodePoint(parseInt(entity.substring(2), 16));
- } else if (entity.substring(0, 1) === "#") {
- return String.fromCodePoint(parseInt(entity.substring(1), 10));
- }
- switch (entity) {
- case "lt":
- return "<";
- case "gt":
- return ">";
- case "amp":
- return "&";
- case "quot":
- return '"';
- case "apos":
- return "'";
- }
- return this.onResolveEntity(entity);
- });
- }
- _parseContent(s, start) {
- const attributes = [];
- let pos = start;
- function skipWs() {
- while (pos < s.length && isWhitespace(s, pos)) {
- ++pos;
- }
- }
- while (
- pos < s.length &&
- !isWhitespace(s, pos) &&
- s[pos] !== ">" &&
- s[pos] !== "/"
- ) {
- ++pos;
- }
- const name = s.substring(start, pos);
- skipWs();
- while (
- pos < s.length &&
- s[pos] !== ">" &&
- s[pos] !== "/" &&
- s[pos] !== "?"
- ) {
- skipWs();
- let attrName = "",
- attrValue = "";
- while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== "=") {
- attrName += s[pos];
- ++pos;
- }
- skipWs();
- if (s[pos] !== "=") {
- return null;
- }
- ++pos;
- skipWs();
- const attrEndChar = s[pos];
- if (attrEndChar !== '"' && attrEndChar !== "'") {
- return null;
- }
- const attrEndIndex = s.indexOf(attrEndChar, ++pos);
- if (attrEndIndex < 0) {
- return null;
- }
- attrValue = s.substring(pos, attrEndIndex);
- attributes.push({
- name: attrName,
- value: this._resolveEntities(attrValue),
- });
- pos = attrEndIndex + 1;
- skipWs();
- }
- return {
- name,
- attributes,
- parsed: pos - start,
- };
- }
- _parseProcessingInstruction(s, start) {
- let pos = start;
- function skipWs() {
- while (pos < s.length && isWhitespace(s, pos)) {
- ++pos;
- }
- }
- while (
- pos < s.length &&
- !isWhitespace(s, pos) &&
- s[pos] !== ">" &&
- s[pos] !== "?" &&
- s[pos] !== "/"
- ) {
- ++pos;
- }
- const name = s.substring(start, pos);
- skipWs();
- const attrStart = pos;
- while (pos < s.length && (s[pos] !== "?" || s[pos + 1] !== ">")) {
- ++pos;
- }
- const value = s.substring(attrStart, pos);
- return {
- name,
- value,
- parsed: pos - start,
- };
- }
- parseXml(s) {
- let i = 0;
- while (i < s.length) {
- const ch = s[i];
- let j = i;
- if (ch === "<") {
- ++j;
- const ch2 = s[j];
- let q;
- switch (ch2) {
- case "/":
- ++j;
- q = s.indexOf(">", j);
- if (q < 0) {
- this.onError(XMLParserErrorCode.UnterminatedElement);
- return;
- }
- this.onEndElement(s.substring(j, q));
- j = q + 1;
- break;
- case "?":
- ++j;
- const pi = this._parseProcessingInstruction(s, j);
- if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== "?>") {
- this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration);
- return;
- }
- this.onPi(pi.name, pi.value);
- j += pi.parsed + 2;
- break;
- case "!":
- if (s.substring(j + 1, j + 3) === "--") {
- q = s.indexOf("-->", j + 3);
- if (q < 0) {
- this.onError(XMLParserErrorCode.UnterminatedComment);
- return;
- }
- this.onComment(s.substring(j + 3, q));
- j = q + 3;
- } else if (s.substring(j + 1, j + 8) === "[CDATA[") {
- q = s.indexOf("]]>", j + 8);
- if (q < 0) {
- this.onError(XMLParserErrorCode.UnterminatedCdat);
- return;
- }
- this.onCdata(s.substring(j + 8, q));
- j = q + 3;
- } else if (s.substring(j + 1, j + 8) === "DOCTYPE") {
- const q2 = s.indexOf("[", j + 8);
- let complexDoctype = false;
- q = s.indexOf(">", j + 8);
- if (q < 0) {
- this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
- return;
- }
- if (q2 > 0 && q > q2) {
- q = s.indexOf("]>", j + 8);
- if (q < 0) {
- this.onError(
- XMLParserErrorCode.UnterminatedDoctypeDeclaration
- );
- return;
- }
- complexDoctype = true;
- }
- const doctypeContent = s.substring(
- j + 8,
- q + (complexDoctype ? 1 : 0)
- );
- this.onDoctype(doctypeContent);
- j = q + (complexDoctype ? 2 : 1);
- } else {
- this.onError(XMLParserErrorCode.MalformedElement);
- return;
- }
- break;
- default:
- const content = this._parseContent(s, j);
- if (content === null) {
- this.onError(XMLParserErrorCode.MalformedElement);
- return;
- }
- let isClosed = false;
- if (
- s.substring(j + content.parsed, j + content.parsed + 2) === "/>"
- ) {
- isClosed = true;
- } else if (
- s.substring(j + content.parsed, j + content.parsed + 1) !== ">"
- ) {
- this.onError(XMLParserErrorCode.UnterminatedElement);
- return;
- }
- this.onBeginElement(content.name, content.attributes, isClosed);
- j += content.parsed + (isClosed ? 2 : 1);
- break;
- }
- } else {
- while (j < s.length && s[j] !== "<") {
- j++;
- }
- const text = s.substring(i, j);
- this.onText(this._resolveEntities(text));
- }
- i = j;
- }
- }
- onResolveEntity(name) {
- return `&${name};`;
- }
- onPi(name, value) {}
- onComment(text) {}
- onCdata(text) {}
- onDoctype(doctypeContent) {}
- onText(text) {}
- onBeginElement(name, attributes, isEmpty) {}
- onEndElement(name) {}
- onError(code) {}
- }
- class SimpleDOMNode {
- constructor(nodeName, nodeValue) {
- this.nodeName = nodeName;
- this.nodeValue = nodeValue;
- Object.defineProperty(this, "parentNode", { value: null, writable: true });
- }
- get firstChild() {
- return this.childNodes && this.childNodes[0];
- }
- get nextSibling() {
- const childNodes = this.parentNode.childNodes;
- if (!childNodes) {
- return undefined;
- }
- const index = childNodes.indexOf(this);
- if (index === -1) {
- return undefined;
- }
- return childNodes[index + 1];
- }
- get textContent() {
- if (!this.childNodes) {
- return this.nodeValue || "";
- }
- return this.childNodes
- .map(function (child) {
- return child.textContent;
- })
- .join("");
- }
- get children() {
- return this.childNodes || [];
- }
- hasChildNodes() {
- return this.childNodes && this.childNodes.length > 0;
- }
- /**
- * Search a node in the tree with the given path
- * foo.bar[nnn], i.e. find the nnn-th node named
- * bar under a node named foo.
- *
- * @param {Array} paths - an array of objects as
- * returned by {parseXFAPath}.
- * @param {number} pos - the current position in
- * the paths array.
- * @returns {SimpleDOMNode} The node corresponding
- * to the path or null if not found.
- */
- searchNode(paths, pos) {
- if (pos >= paths.length) {
- return this;
- }
- const component = paths[pos];
- const stack = [];
- let node = this;
- while (true) {
- if (component.name === node.nodeName) {
- if (component.pos === 0) {
- const res = node.searchNode(paths, pos + 1);
- if (res !== null) {
- return res;
- }
- } else if (stack.length === 0) {
- return null;
- } else {
- const [parent] = stack.pop();
- let siblingPos = 0;
- for (const child of parent.childNodes) {
- if (component.name === child.nodeName) {
- if (siblingPos === component.pos) {
- return child.searchNode(paths, pos + 1);
- }
- siblingPos++;
- }
- }
- // We didn't find the correct sibling
- // so just return the first found node
- return node.searchNode(paths, pos + 1);
- }
- }
- if (node.childNodes && node.childNodes.length !== 0) {
- stack.push([node, 0]);
- node = node.childNodes[0];
- } else if (stack.length === 0) {
- return null;
- } else {
- while (stack.length !== 0) {
- const [parent, currentPos] = stack.pop();
- const newPos = currentPos + 1;
- if (newPos < parent.childNodes.length) {
- stack.push([parent, newPos]);
- node = parent.childNodes[newPos];
- break;
- }
- }
- if (stack.length === 0) {
- return null;
- }
- }
- }
- }
- dump(buffer) {
- if (this.nodeName === "#text") {
- buffer.push(encodeToXmlString(this.nodeValue));
- return;
- }
- buffer.push(`<${this.nodeName}`);
- if (this.attributes) {
- for (const attribute of this.attributes) {
- buffer.push(
- ` ${attribute.name}="${encodeToXmlString(attribute.value)}"`
- );
- }
- }
- if (this.hasChildNodes()) {
- buffer.push(">");
- for (const child of this.childNodes) {
- child.dump(buffer);
- }
- buffer.push(`</${this.nodeName}>`);
- } else if (this.nodeValue) {
- buffer.push(`>${encodeToXmlString(this.nodeValue)}</${this.nodeName}>`);
- } else {
- buffer.push("/>");
- }
- }
- }
- class SimpleXMLParser extends XMLParserBase {
- constructor({ hasAttributes = false, lowerCaseName = false }) {
- super();
- this._currentFragment = null;
- this._stack = null;
- this._errorCode = XMLParserErrorCode.NoError;
- this._hasAttributes = hasAttributes;
- this._lowerCaseName = lowerCaseName;
- }
- parseFromString(data) {
- this._currentFragment = [];
- this._stack = [];
- this._errorCode = XMLParserErrorCode.NoError;
- this.parseXml(data);
- if (this._errorCode !== XMLParserErrorCode.NoError) {
- return undefined; // return undefined on error
- }
- // We should only have one root.
- const [documentElement] = this._currentFragment;
- if (!documentElement) {
- return undefined; // Return undefined if no root was found.
- }
- return { documentElement };
- }
- onText(text) {
- if (isWhitespaceString(text)) {
- return;
- }
- const node = new SimpleDOMNode("#text", text);
- this._currentFragment.push(node);
- }
- onCdata(text) {
- const node = new SimpleDOMNode("#text", text);
- this._currentFragment.push(node);
- }
- onBeginElement(name, attributes, isEmpty) {
- if (this._lowerCaseName) {
- name = name.toLowerCase();
- }
- const node = new SimpleDOMNode(name);
- node.childNodes = [];
- if (this._hasAttributes) {
- node.attributes = attributes;
- }
- this._currentFragment.push(node);
- if (isEmpty) {
- return;
- }
- this._stack.push(this._currentFragment);
- this._currentFragment = node.childNodes;
- }
- onEndElement(name) {
- this._currentFragment = this._stack.pop() || [];
- const lastElement = this._currentFragment.at(-1);
- if (!lastElement) {
- return null;
- }
- for (const childNode of lastElement.childNodes) {
- childNode.parentNode = lastElement;
- }
- return lastElement;
- }
- onError(code) {
- this._errorCode = code;
- }
- }
- export { SimpleDOMNode, SimpleXMLParser, XMLParserBase, XMLParserErrorCode };
|