123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661 |
- /* Copyright 2019 Mozilla Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- import {
- AnnotationEditorPrefix,
- assert,
- BaseException,
- FontType,
- objectSize,
- StreamType,
- stringToPDFString,
- warn,
- } from "../shared/util.js";
- import { Dict, isName, Ref, RefSet } from "./primitives.js";
- import { BaseStream } from "./base_stream.js";
- const PDF_VERSION_REGEXP = /^[1-9]\.\d$/;
- function getLookupTableFactory(initializer) {
- let lookup;
- return function () {
- if (initializer) {
- lookup = Object.create(null);
- initializer(lookup);
- initializer = null;
- }
- return lookup;
- };
- }
- function getArrayLookupTableFactory(initializer) {
- let lookup;
- return function () {
- if (initializer) {
- let arr = initializer();
- initializer = null;
- lookup = Object.create(null);
- for (let i = 0, ii = arr.length; i < ii; i += 2) {
- lookup[arr[i]] = arr[i + 1];
- }
- arr = null;
- }
- return lookup;
- };
- }
- class MissingDataException extends BaseException {
- constructor(begin, end) {
- super(`Missing data [${begin}, ${end})`, "MissingDataException");
- this.begin = begin;
- this.end = end;
- }
- }
- class ParserEOFException extends BaseException {
- constructor(msg) {
- super(msg, "ParserEOFException");
- }
- }
- class XRefEntryException extends BaseException {
- constructor(msg) {
- super(msg, "XRefEntryException");
- }
- }
- class XRefParseException extends BaseException {
- constructor(msg) {
- super(msg, "XRefParseException");
- }
- }
- class DocStats {
- constructor(handler) {
- this._handler = handler;
- this._streamTypes = new Set();
- this._fontTypes = new Set();
- }
- _send() {
- const streamTypes = Object.create(null),
- fontTypes = Object.create(null);
- for (const type of this._streamTypes) {
- streamTypes[type] = true;
- }
- for (const type of this._fontTypes) {
- fontTypes[type] = true;
- }
- this._handler.send("DocStats", { streamTypes, fontTypes });
- }
- addStreamType(type) {
- if (
- typeof PDFJSDev === "undefined" ||
- PDFJSDev.test("!PRODUCTION || TESTING")
- ) {
- assert(StreamType[type] === type, 'addStreamType: Invalid "type" value.');
- }
- if (this._streamTypes.has(type)) {
- return;
- }
- this._streamTypes.add(type);
- this._send();
- }
- addFontType(type) {
- if (
- typeof PDFJSDev === "undefined" ||
- PDFJSDev.test("!PRODUCTION || TESTING")
- ) {
- assert(FontType[type] === type, 'addFontType: Invalid "type" value.');
- }
- if (this._fontTypes.has(type)) {
- return;
- }
- this._fontTypes.add(type);
- this._send();
- }
- }
- /**
- * Get the value of an inheritable property.
- *
- * If the PDF specification explicitly lists a property in a dictionary as
- * inheritable, then the value of the property may be present in the dictionary
- * itself or in one or more parents of the dictionary.
- *
- * If the key is not found in the tree, `undefined` is returned. Otherwise,
- * the value for the key is returned or, if `stopWhenFound` is `false`, a list
- * of values is returned.
- *
- * @param {Dict} dict - Dictionary from where to start the traversal.
- * @param {string} key - The key of the property to find the value for.
- * @param {boolean} getArray - Whether or not the value should be fetched as an
- * array. The default value is `false`.
- * @param {boolean} stopWhenFound - Whether or not to stop the traversal when
- * the key is found. If set to `false`, we always walk up the entire parent
- * chain, for example to be able to find `\Resources` placed on multiple
- * levels of the tree. The default value is `true`.
- */
- function getInheritableProperty({
- dict,
- key,
- getArray = false,
- stopWhenFound = true,
- }) {
- let values;
- const visited = new RefSet();
- while (dict instanceof Dict && !(dict.objId && visited.has(dict.objId))) {
- if (dict.objId) {
- visited.put(dict.objId);
- }
- const value = getArray ? dict.getArray(key) : dict.get(key);
- if (value !== undefined) {
- if (stopWhenFound) {
- return value;
- }
- if (!values) {
- values = [];
- }
- values.push(value);
- }
- dict = dict.get("Parent");
- }
- return values;
- }
- // prettier-ignore
- const ROMAN_NUMBER_MAP = [
- "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM",
- "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC",
- "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"
- ];
- /**
- * Converts positive integers to (upper case) Roman numerals.
- * @param {number} number - The number that should be converted.
- * @param {boolean} lowerCase - Indicates if the result should be converted
- * to lower case letters. The default value is `false`.
- * @returns {string} The resulting Roman number.
- */
- function toRomanNumerals(number, lowerCase = false) {
- assert(
- Number.isInteger(number) && number > 0,
- "The number should be a positive integer."
- );
- const romanBuf = [];
- let pos;
- // Thousands
- while (number >= 1000) {
- number -= 1000;
- romanBuf.push("M");
- }
- // Hundreds
- pos = (number / 100) | 0;
- number %= 100;
- romanBuf.push(ROMAN_NUMBER_MAP[pos]);
- // Tens
- pos = (number / 10) | 0;
- number %= 10;
- romanBuf.push(ROMAN_NUMBER_MAP[10 + pos]);
- // Ones
- romanBuf.push(ROMAN_NUMBER_MAP[20 + number]); // eslint-disable-line unicorn/no-array-push-push
- const romanStr = romanBuf.join("");
- return lowerCase ? romanStr.toLowerCase() : romanStr;
- }
- // Calculate the base 2 logarithm of the number `x`. This differs from the
- // native function in the sense that it returns the ceiling value and that it
- // returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0.
- function log2(x) {
- if (x <= 0) {
- return 0;
- }
- return Math.ceil(Math.log2(x));
- }
- function readInt8(data, offset) {
- return (data[offset] << 24) >> 24;
- }
- function readUint16(data, offset) {
- return (data[offset] << 8) | data[offset + 1];
- }
- function readUint32(data, offset) {
- return (
- ((data[offset] << 24) |
- (data[offset + 1] << 16) |
- (data[offset + 2] << 8) |
- data[offset + 3]) >>>
- 0
- );
- }
- // Checks if ch is one of the following characters: SPACE, TAB, CR or LF.
- function isWhiteSpace(ch) {
- return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a;
- }
- /**
- * AcroForm field names use an array like notation to refer to
- * repeated XFA elements e.g. foo.bar[nnn].
- * see: XFA Spec Chapter 3 - Repeated Elements
- *
- * @param {string} path - XFA path name.
- * @returns {Array} - Array of Objects with the name and pos of
- * each part of the path.
- */
- function parseXFAPath(path) {
- const positionPattern = /(.+)\[(\d+)\]$/;
- return path.split(".").map(component => {
- const m = component.match(positionPattern);
- if (m) {
- return { name: m[1], pos: parseInt(m[2], 10) };
- }
- return { name: component, pos: 0 };
- });
- }
- function escapePDFName(str) {
- const buffer = [];
- let start = 0;
- for (let i = 0, ii = str.length; i < ii; i++) {
- const char = str.charCodeAt(i);
- // Whitespace or delimiters aren't regular chars, so escape them.
- if (
- char < 0x21 ||
- char > 0x7e ||
- char === 0x23 /* # */ ||
- char === 0x28 /* ( */ ||
- char === 0x29 /* ) */ ||
- char === 0x3c /* < */ ||
- char === 0x3e /* > */ ||
- char === 0x5b /* [ */ ||
- char === 0x5d /* ] */ ||
- char === 0x7b /* { */ ||
- char === 0x7d /* } */ ||
- char === 0x2f /* / */ ||
- char === 0x25 /* % */
- ) {
- if (start < i) {
- buffer.push(str.substring(start, i));
- }
- buffer.push(`#${char.toString(16)}`);
- start = i + 1;
- }
- }
- if (buffer.length === 0) {
- return str;
- }
- if (start < str.length) {
- buffer.push(str.substring(start, str.length));
- }
- return buffer.join("");
- }
- // Replace "(", ")", "\n", "\r" and "\" by "\(", "\)", "\\n", "\\r" and "\\"
- // in order to write it in a PDF file.
- function escapeString(str) {
- return str.replace(/([()\\\n\r])/g, match => {
- if (match === "\n") {
- return "\\n";
- } else if (match === "\r") {
- return "\\r";
- }
- return `\\${match}`;
- });
- }
- function _collectJS(entry, xref, list, parents) {
- if (!entry) {
- return;
- }
- let parent = null;
- if (entry instanceof Ref) {
- if (parents.has(entry)) {
- // If we've already found entry then we've a cycle.
- return;
- }
- parent = entry;
- parents.put(parent);
- entry = xref.fetch(entry);
- }
- if (Array.isArray(entry)) {
- for (const element of entry) {
- _collectJS(element, xref, list, parents);
- }
- } else if (entry instanceof Dict) {
- if (isName(entry.get("S"), "JavaScript")) {
- const js = entry.get("JS");
- let code;
- if (js instanceof BaseStream) {
- code = js.getString();
- } else if (typeof js === "string") {
- code = js;
- }
- code = code && stringToPDFString(code).replace(/\u0000/g, "");
- if (code) {
- list.push(code);
- }
- }
- _collectJS(entry.getRaw("Next"), xref, list, parents);
- }
- if (parent) {
- parents.remove(parent);
- }
- }
- function collectActions(xref, dict, eventType) {
- const actions = Object.create(null);
- const additionalActionsDicts = getInheritableProperty({
- dict,
- key: "AA",
- stopWhenFound: false,
- });
- if (additionalActionsDicts) {
- // additionalActionsDicts contains dicts from ancestors
- // as they're found in the tree from bottom to top.
- // So the dicts are visited in reverse order to guarantee
- // that actions from elder ancestors will be overwritten
- // by ones from younger ancestors.
- for (let i = additionalActionsDicts.length - 1; i >= 0; i--) {
- const additionalActions = additionalActionsDicts[i];
- if (!(additionalActions instanceof Dict)) {
- continue;
- }
- for (const key of additionalActions.getKeys()) {
- const action = eventType[key];
- if (!action) {
- continue;
- }
- const actionDict = additionalActions.getRaw(key);
- const parents = new RefSet();
- const list = [];
- _collectJS(actionDict, xref, list, parents);
- if (list.length > 0) {
- actions[action] = list;
- }
- }
- }
- }
- // Collect the Action if any (we may have one on pushbutton).
- if (dict.has("A")) {
- const actionDict = dict.get("A");
- const parents = new RefSet();
- const list = [];
- _collectJS(actionDict, xref, list, parents);
- if (list.length > 0) {
- actions.Action = list;
- }
- }
- return objectSize(actions) > 0 ? actions : null;
- }
- const XMLEntities = {
- /* < */ 0x3c: "<",
- /* > */ 0x3e: ">",
- /* & */ 0x26: "&",
- /* " */ 0x22: """,
- /* ' */ 0x27: "'",
- };
- function encodeToXmlString(str) {
- const buffer = [];
- let start = 0;
- for (let i = 0, ii = str.length; i < ii; i++) {
- const char = str.codePointAt(i);
- if (0x20 <= char && char <= 0x7e) {
- // ascii
- const entity = XMLEntities[char];
- if (entity) {
- if (start < i) {
- buffer.push(str.substring(start, i));
- }
- buffer.push(entity);
- start = i + 1;
- }
- } else {
- if (start < i) {
- buffer.push(str.substring(start, i));
- }
- buffer.push(`&#x${char.toString(16).toUpperCase()};`);
- if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
- // char is represented by two u16
- i++;
- }
- start = i + 1;
- }
- }
- if (buffer.length === 0) {
- return str;
- }
- if (start < str.length) {
- buffer.push(str.substring(start, str.length));
- }
- return buffer.join("");
- }
- function validateCSSFont(cssFontInfo) {
- // See https://developer.mozilla.org/en-US/docs/Web/CSS/font-style.
- const DEFAULT_CSS_FONT_OBLIQUE = "14";
- // See https://developer.mozilla.org/en-US/docs/Web/CSS/font-weight.
- const DEFAULT_CSS_FONT_WEIGHT = "400";
- const CSS_FONT_WEIGHT_VALUES = new Set([
- "100",
- "200",
- "300",
- "400",
- "500",
- "600",
- "700",
- "800",
- "900",
- "1000",
- "normal",
- "bold",
- "bolder",
- "lighter",
- ]);
- const { fontFamily, fontWeight, italicAngle } = cssFontInfo;
- // See https://developer.mozilla.org/en-US/docs/Web/CSS/string.
- if (/^".*"$/.test(fontFamily)) {
- if (/[^\\]"/.test(fontFamily.slice(1, fontFamily.length - 1))) {
- warn(`XFA - FontFamily contains some unescaped ": ${fontFamily}.`);
- return false;
- }
- } else if (/^'.*'$/.test(fontFamily)) {
- if (/[^\\]'/.test(fontFamily.slice(1, fontFamily.length - 1))) {
- warn(`XFA - FontFamily contains some unescaped ': ${fontFamily}.`);
- return false;
- }
- } else {
- // See https://developer.mozilla.org/en-US/docs/Web/CSS/custom-ident.
- for (const ident of fontFamily.split(/[ \t]+/)) {
- if (/^(\d|(-(\d|-)))/.test(ident) || !/^[\w-\\]+$/.test(ident)) {
- warn(
- `XFA - FontFamily contains some invalid <custom-ident>: ${fontFamily}.`
- );
- return false;
- }
- }
- }
- const weight = fontWeight ? fontWeight.toString() : "";
- cssFontInfo.fontWeight = CSS_FONT_WEIGHT_VALUES.has(weight)
- ? weight
- : DEFAULT_CSS_FONT_WEIGHT;
- const angle = parseFloat(italicAngle);
- cssFontInfo.italicAngle =
- isNaN(angle) || angle < -90 || angle > 90
- ? DEFAULT_CSS_FONT_OBLIQUE
- : italicAngle.toString();
- return true;
- }
- function recoverJsURL(str) {
- // Attempt to recover valid URLs from `JS` entries with certain
- // white-listed formats:
- // - window.open('http://example.com')
- // - app.launchURL('http://example.com', true)
- // - xfa.host.gotoURL('http://example.com')
- const URL_OPEN_METHODS = ["app.launchURL", "window.open", "xfa.host.gotoURL"];
- const regex = new RegExp(
- "^\\s*(" +
- URL_OPEN_METHODS.join("|").split(".").join("\\.") +
- ")\\((?:'|\")([^'\"]*)(?:'|\")(?:,\\s*(\\w+)\\)|\\))",
- "i"
- );
- const jsUrl = regex.exec(str);
- if (jsUrl && jsUrl[2]) {
- const url = jsUrl[2];
- let newWindow = false;
- if (jsUrl[3] === "true" && jsUrl[1] === "app.launchURL") {
- newWindow = true;
- }
- return { url, newWindow };
- }
- return null;
- }
- function numberToString(value) {
- if (Number.isInteger(value)) {
- return value.toString();
- }
- const roundedValue = Math.round(value * 100);
- if (roundedValue % 100 === 0) {
- return (roundedValue / 100).toString();
- }
- if (roundedValue % 10 === 0) {
- return value.toFixed(1);
- }
- return value.toFixed(2);
- }
- function getNewAnnotationsMap(annotationStorage) {
- if (!annotationStorage) {
- return null;
- }
- const newAnnotationsByPage = new Map();
- // The concept of page in a XFA is very different, so
- // editing is just not implemented.
- for (const [key, value] of annotationStorage) {
- if (!key.startsWith(AnnotationEditorPrefix)) {
- continue;
- }
- let annotations = newAnnotationsByPage.get(value.pageIndex);
- if (!annotations) {
- annotations = [];
- newAnnotationsByPage.set(value.pageIndex, annotations);
- }
- annotations.push(value);
- }
- return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
- }
- function isAscii(str) {
- return /^[\x00-\x7F]*$/.test(str);
- }
- function stringToUTF16HexString(str) {
- const buf = [];
- for (let i = 0, ii = str.length; i < ii; i++) {
- const char = str.charCodeAt(i);
- buf.push(
- ((char >> 8) & 0xff).toString(16).padStart(2, "0"),
- (char & 0xff).toString(16).padStart(2, "0")
- );
- }
- return buf.join("");
- }
- function stringToUTF16String(str, bigEndian = false) {
- const buf = [];
- if (bigEndian) {
- buf.push("\xFE\xFF");
- }
- for (let i = 0, ii = str.length; i < ii; i++) {
- const char = str.charCodeAt(i);
- buf.push(
- String.fromCharCode((char >> 8) & 0xff),
- String.fromCharCode(char & 0xff)
- );
- }
- return buf.join("");
- }
- function getRotationMatrix(rotation, width, height) {
- switch (rotation) {
- case 90:
- return [0, 1, -1, 0, width, 0];
- case 180:
- return [-1, 0, 0, -1, width, height];
- case 270:
- return [0, -1, 1, 0, 0, height];
- default:
- throw new Error("Invalid rotation");
- }
- }
- export {
- collectActions,
- DocStats,
- encodeToXmlString,
- escapePDFName,
- escapeString,
- getArrayLookupTableFactory,
- getInheritableProperty,
- getLookupTableFactory,
- getNewAnnotationsMap,
- getRotationMatrix,
- isAscii,
- isWhiteSpace,
- log2,
- MissingDataException,
- numberToString,
- ParserEOFException,
- parseXFAPath,
- PDF_VERSION_REGEXP,
- readInt8,
- readUint16,
- readUint32,
- recoverJsURL,
- stringToUTF16HexString,
- stringToUTF16String,
- toRomanNumerals,
- validateCSSFont,
- XRefEntryException,
- XRefParseException,
- };
|