123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- "use strict";
- var __importDefault = (this && this.__importDefault) || function (mod) {
- return (mod && mod.__esModule) ? mod : { "default": mod };
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
- var xml_json_1 = __importDefault(require("./maps/xml.json"));
- var encode_trie_1 = require("./encode-trie");
- var entities_json_1 = __importDefault(require("./maps/entities.json"));
- var htmlReplacer = getCharRegExp(entities_json_1.default, true);
- var xmlReplacer = getCharRegExp(xml_json_1.default, true);
- var xmlInvalidChars = getCharRegExp(xml_json_1.default, false);
- var xmlCodeMap = new Map(Object.keys(xml_json_1.default).map(function (k) { return [
- xml_json_1.default[k].charCodeAt(0),
- "&" + k + ";",
- ]; }));
- /**
- * Encodes all non-ASCII characters, as well as characters not valid in XML
- * documents using XML entities.
- *
- * If a character has no equivalent entity, a
- * numeric hexadecimal reference (eg. `ü`) will be used.
- */
- function encodeXML(str) {
- var ret = "";
- var lastIdx = 0;
- var match;
- while ((match = xmlReplacer.exec(str)) !== null) {
- var i = match.index;
- var char = str.charCodeAt(i);
- var next = xmlCodeMap.get(char);
- if (next) {
- ret += str.substring(lastIdx, i) + next;
- lastIdx = i + 1;
- }
- else {
- ret += str.substring(lastIdx, i) + "&#x" + encode_trie_1.getCodePoint(str, i).toString(16) + ";";
- // Increase by 1 if we have a surrogate pair
- lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
- }
- }
- return ret + str.substr(lastIdx);
- }
- exports.encodeXML = encodeXML;
- /**
- * Encodes all entities and non-ASCII characters in the input.
- *
- * This includes characters that are valid ASCII characters in HTML documents.
- * For example `#` will be encoded as `#`. To get a more compact output,
- * consider using the `encodeNonAsciiHTML` function.
- *
- * If a character has no equivalent entity, a
- * numeric hexadecimal reference (eg. `ü`) will be used.
- */
- function encodeHTML(data) {
- return encode_trie_1.encodeHTMLTrieRe(htmlReplacer, data);
- }
- exports.encodeHTML = encodeHTML;
- /**
- * Encodes all non-ASCII characters, as well as characters not valid in HTML
- * documents using HTML entities.
- *
- * If a character has no equivalent entity, a
- * numeric hexadecimal reference (eg. `ü`) will be used.
- */
- function encodeNonAsciiHTML(data) {
- return encode_trie_1.encodeHTMLTrieRe(xmlReplacer, data);
- }
- exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
- function getCharRegExp(map, nonAscii) {
- // Collect the start characters of all entities
- var chars = Object.keys(map)
- .map(function (k) { return "\\" + map[k].charAt(0); })
- .filter(function (v) { return !nonAscii || v.charCodeAt(1) < 128; })
- .sort(function (a, b) { return a.charCodeAt(1) - b.charCodeAt(1); })
- // Remove duplicates
- .filter(function (v, i, a) { return v !== a[i + 1]; });
- // Add ranges to single characters.
- for (var start = 0; start < chars.length - 1; start++) {
- // Find the end of a run of characters
- var end = start;
- while (end < chars.length - 1 &&
- chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)) {
- end += 1;
- }
- var count = 1 + end - start;
- // We want to replace at least three characters
- if (count < 3)
- continue;
- chars.splice(start, count, chars[start] + "-" + chars[end]);
- }
- return new RegExp("[" + chars.join("") + (nonAscii ? "\\x80-\\uFFFF" : "") + "]", "g");
- }
- /**
- * Encodes all non-ASCII characters, as well as characters not valid in XML
- * documents using numeric hexadecimal reference (eg. `ü`).
- *
- * Have a look at `escapeUTF8` if you want a more concise output at the expense
- * of reduced transportability.
- *
- * @param data String to escape.
- */
- exports.escape = encodeXML;
- /**
- * Encodes all characters not valid in XML documents using XML entities.
- *
- * Note that the output will be character-set dependent.
- *
- * @param data String to escape.
- */
- function escapeUTF8(data) {
- var match;
- var lastIdx = 0;
- var result = "";
- while ((match = xmlInvalidChars.exec(data))) {
- if (lastIdx !== match.index) {
- result += data.substring(lastIdx, match.index);
- }
- // We know that this chararcter will be in `inverseXML`
- result += xmlCodeMap.get(match[0].charCodeAt(0));
- // Every match will be of length 1
- lastIdx = match.index + 1;
- }
- return result + data.substring(lastIdx);
- }
- exports.escapeUTF8 = escapeUTF8;
|