core_utils.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. /* Copyright 2019 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import {
  16. AnnotationEditorPrefix,
  17. assert,
  18. BaseException,
  19. FontType,
  20. objectSize,
  21. StreamType,
  22. stringToPDFString,
  23. warn,
  24. } from "../shared/util.js";
  25. import { Dict, isName, Ref, RefSet } from "./primitives.js";
  26. import { BaseStream } from "./base_stream.js";
  27. const PDF_VERSION_REGEXP = /^[1-9]\.\d$/;
  28. function getLookupTableFactory(initializer) {
  29. let lookup;
  30. return function () {
  31. if (initializer) {
  32. lookup = Object.create(null);
  33. initializer(lookup);
  34. initializer = null;
  35. }
  36. return lookup;
  37. };
  38. }
  39. function getArrayLookupTableFactory(initializer) {
  40. let lookup;
  41. return function () {
  42. if (initializer) {
  43. let arr = initializer();
  44. initializer = null;
  45. lookup = Object.create(null);
  46. for (let i = 0, ii = arr.length; i < ii; i += 2) {
  47. lookup[arr[i]] = arr[i + 1];
  48. }
  49. arr = null;
  50. }
  51. return lookup;
  52. };
  53. }
  54. class MissingDataException extends BaseException {
  55. constructor(begin, end) {
  56. super(`Missing data [${begin}, ${end})`, "MissingDataException");
  57. this.begin = begin;
  58. this.end = end;
  59. }
  60. }
  61. class ParserEOFException extends BaseException {
  62. constructor(msg) {
  63. super(msg, "ParserEOFException");
  64. }
  65. }
  66. class XRefEntryException extends BaseException {
  67. constructor(msg) {
  68. super(msg, "XRefEntryException");
  69. }
  70. }
  71. class XRefParseException extends BaseException {
  72. constructor(msg) {
  73. super(msg, "XRefParseException");
  74. }
  75. }
  76. class DocStats {
  77. constructor(handler) {
  78. this._handler = handler;
  79. this._streamTypes = new Set();
  80. this._fontTypes = new Set();
  81. }
  82. _send() {
  83. const streamTypes = Object.create(null),
  84. fontTypes = Object.create(null);
  85. for (const type of this._streamTypes) {
  86. streamTypes[type] = true;
  87. }
  88. for (const type of this._fontTypes) {
  89. fontTypes[type] = true;
  90. }
  91. this._handler.send("DocStats", { streamTypes, fontTypes });
  92. }
  93. addStreamType(type) {
  94. if (
  95. typeof PDFJSDev === "undefined" ||
  96. PDFJSDev.test("!PRODUCTION || TESTING")
  97. ) {
  98. assert(StreamType[type] === type, 'addStreamType: Invalid "type" value.');
  99. }
  100. if (this._streamTypes.has(type)) {
  101. return;
  102. }
  103. this._streamTypes.add(type);
  104. this._send();
  105. }
  106. addFontType(type) {
  107. if (
  108. typeof PDFJSDev === "undefined" ||
  109. PDFJSDev.test("!PRODUCTION || TESTING")
  110. ) {
  111. assert(FontType[type] === type, 'addFontType: Invalid "type" value.');
  112. }
  113. if (this._fontTypes.has(type)) {
  114. return;
  115. }
  116. this._fontTypes.add(type);
  117. this._send();
  118. }
  119. }
  120. /**
  121. * Get the value of an inheritable property.
  122. *
  123. * If the PDF specification explicitly lists a property in a dictionary as
  124. * inheritable, then the value of the property may be present in the dictionary
  125. * itself or in one or more parents of the dictionary.
  126. *
  127. * If the key is not found in the tree, `undefined` is returned. Otherwise,
  128. * the value for the key is returned or, if `stopWhenFound` is `false`, a list
  129. * of values is returned.
  130. *
  131. * @param {Dict} dict - Dictionary from where to start the traversal.
  132. * @param {string} key - The key of the property to find the value for.
  133. * @param {boolean} getArray - Whether or not the value should be fetched as an
  134. * array. The default value is `false`.
  135. * @param {boolean} stopWhenFound - Whether or not to stop the traversal when
  136. * the key is found. If set to `false`, we always walk up the entire parent
  137. * chain, for example to be able to find `\Resources` placed on multiple
  138. * levels of the tree. The default value is `true`.
  139. */
  140. function getInheritableProperty({
  141. dict,
  142. key,
  143. getArray = false,
  144. stopWhenFound = true,
  145. }) {
  146. let values;
  147. const visited = new RefSet();
  148. while (dict instanceof Dict && !(dict.objId && visited.has(dict.objId))) {
  149. if (dict.objId) {
  150. visited.put(dict.objId);
  151. }
  152. const value = getArray ? dict.getArray(key) : dict.get(key);
  153. if (value !== undefined) {
  154. if (stopWhenFound) {
  155. return value;
  156. }
  157. if (!values) {
  158. values = [];
  159. }
  160. values.push(value);
  161. }
  162. dict = dict.get("Parent");
  163. }
  164. return values;
  165. }
  166. // prettier-ignore
  167. const ROMAN_NUMBER_MAP = [
  168. "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM",
  169. "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC",
  170. "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"
  171. ];
  172. /**
  173. * Converts positive integers to (upper case) Roman numerals.
  174. * @param {number} number - The number that should be converted.
  175. * @param {boolean} lowerCase - Indicates if the result should be converted
  176. * to lower case letters. The default value is `false`.
  177. * @returns {string} The resulting Roman number.
  178. */
  179. function toRomanNumerals(number, lowerCase = false) {
  180. assert(
  181. Number.isInteger(number) && number > 0,
  182. "The number should be a positive integer."
  183. );
  184. const romanBuf = [];
  185. let pos;
  186. // Thousands
  187. while (number >= 1000) {
  188. number -= 1000;
  189. romanBuf.push("M");
  190. }
  191. // Hundreds
  192. pos = (number / 100) | 0;
  193. number %= 100;
  194. romanBuf.push(ROMAN_NUMBER_MAP[pos]);
  195. // Tens
  196. pos = (number / 10) | 0;
  197. number %= 10;
  198. romanBuf.push(ROMAN_NUMBER_MAP[10 + pos]);
  199. // Ones
  200. romanBuf.push(ROMAN_NUMBER_MAP[20 + number]); // eslint-disable-line unicorn/no-array-push-push
  201. const romanStr = romanBuf.join("");
  202. return lowerCase ? romanStr.toLowerCase() : romanStr;
  203. }
  204. // Calculate the base 2 logarithm of the number `x`. This differs from the
  205. // native function in the sense that it returns the ceiling value and that it
  206. // returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0.
  207. function log2(x) {
  208. if (x <= 0) {
  209. return 0;
  210. }
  211. return Math.ceil(Math.log2(x));
  212. }
  213. function readInt8(data, offset) {
  214. return (data[offset] << 24) >> 24;
  215. }
  216. function readUint16(data, offset) {
  217. return (data[offset] << 8) | data[offset + 1];
  218. }
  219. function readUint32(data, offset) {
  220. return (
  221. ((data[offset] << 24) |
  222. (data[offset + 1] << 16) |
  223. (data[offset + 2] << 8) |
  224. data[offset + 3]) >>>
  225. 0
  226. );
  227. }
  228. // Checks if ch is one of the following characters: SPACE, TAB, CR or LF.
  229. function isWhiteSpace(ch) {
  230. return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a;
  231. }
  232. /**
  233. * AcroForm field names use an array like notation to refer to
  234. * repeated XFA elements e.g. foo.bar[nnn].
  235. * see: XFA Spec Chapter 3 - Repeated Elements
  236. *
  237. * @param {string} path - XFA path name.
  238. * @returns {Array} - Array of Objects with the name and pos of
  239. * each part of the path.
  240. */
  241. function parseXFAPath(path) {
  242. const positionPattern = /(.+)\[(\d+)\]$/;
  243. return path.split(".").map(component => {
  244. const m = component.match(positionPattern);
  245. if (m) {
  246. return { name: m[1], pos: parseInt(m[2], 10) };
  247. }
  248. return { name: component, pos: 0 };
  249. });
  250. }
  251. function escapePDFName(str) {
  252. const buffer = [];
  253. let start = 0;
  254. for (let i = 0, ii = str.length; i < ii; i++) {
  255. const char = str.charCodeAt(i);
  256. // Whitespace or delimiters aren't regular chars, so escape them.
  257. if (
  258. char < 0x21 ||
  259. char > 0x7e ||
  260. char === 0x23 /* # */ ||
  261. char === 0x28 /* ( */ ||
  262. char === 0x29 /* ) */ ||
  263. char === 0x3c /* < */ ||
  264. char === 0x3e /* > */ ||
  265. char === 0x5b /* [ */ ||
  266. char === 0x5d /* ] */ ||
  267. char === 0x7b /* { */ ||
  268. char === 0x7d /* } */ ||
  269. char === 0x2f /* / */ ||
  270. char === 0x25 /* % */
  271. ) {
  272. if (start < i) {
  273. buffer.push(str.substring(start, i));
  274. }
  275. buffer.push(`#${char.toString(16)}`);
  276. start = i + 1;
  277. }
  278. }
  279. if (buffer.length === 0) {
  280. return str;
  281. }
  282. if (start < str.length) {
  283. buffer.push(str.substring(start, str.length));
  284. }
  285. return buffer.join("");
  286. }
  287. // Replace "(", ")", "\n", "\r" and "\" by "\(", "\)", "\\n", "\\r" and "\\"
  288. // in order to write it in a PDF file.
  289. function escapeString(str) {
  290. return str.replace(/([()\\\n\r])/g, match => {
  291. if (match === "\n") {
  292. return "\\n";
  293. } else if (match === "\r") {
  294. return "\\r";
  295. }
  296. return `\\${match}`;
  297. });
  298. }
  299. function _collectJS(entry, xref, list, parents) {
  300. if (!entry) {
  301. return;
  302. }
  303. let parent = null;
  304. if (entry instanceof Ref) {
  305. if (parents.has(entry)) {
  306. // If we've already found entry then we've a cycle.
  307. return;
  308. }
  309. parent = entry;
  310. parents.put(parent);
  311. entry = xref.fetch(entry);
  312. }
  313. if (Array.isArray(entry)) {
  314. for (const element of entry) {
  315. _collectJS(element, xref, list, parents);
  316. }
  317. } else if (entry instanceof Dict) {
  318. if (isName(entry.get("S"), "JavaScript")) {
  319. const js = entry.get("JS");
  320. let code;
  321. if (js instanceof BaseStream) {
  322. code = js.getString();
  323. } else if (typeof js === "string") {
  324. code = js;
  325. }
  326. code = code && stringToPDFString(code).replace(/\u0000/g, "");
  327. if (code) {
  328. list.push(code);
  329. }
  330. }
  331. _collectJS(entry.getRaw("Next"), xref, list, parents);
  332. }
  333. if (parent) {
  334. parents.remove(parent);
  335. }
  336. }
  337. function collectActions(xref, dict, eventType) {
  338. const actions = Object.create(null);
  339. const additionalActionsDicts = getInheritableProperty({
  340. dict,
  341. key: "AA",
  342. stopWhenFound: false,
  343. });
  344. if (additionalActionsDicts) {
  345. // additionalActionsDicts contains dicts from ancestors
  346. // as they're found in the tree from bottom to top.
  347. // So the dicts are visited in reverse order to guarantee
  348. // that actions from elder ancestors will be overwritten
  349. // by ones from younger ancestors.
  350. for (let i = additionalActionsDicts.length - 1; i >= 0; i--) {
  351. const additionalActions = additionalActionsDicts[i];
  352. if (!(additionalActions instanceof Dict)) {
  353. continue;
  354. }
  355. for (const key of additionalActions.getKeys()) {
  356. const action = eventType[key];
  357. if (!action) {
  358. continue;
  359. }
  360. const actionDict = additionalActions.getRaw(key);
  361. const parents = new RefSet();
  362. const list = [];
  363. _collectJS(actionDict, xref, list, parents);
  364. if (list.length > 0) {
  365. actions[action] = list;
  366. }
  367. }
  368. }
  369. }
  370. // Collect the Action if any (we may have one on pushbutton).
  371. if (dict.has("A")) {
  372. const actionDict = dict.get("A");
  373. const parents = new RefSet();
  374. const list = [];
  375. _collectJS(actionDict, xref, list, parents);
  376. if (list.length > 0) {
  377. actions.Action = list;
  378. }
  379. }
  380. return objectSize(actions) > 0 ? actions : null;
  381. }
  382. const XMLEntities = {
  383. /* < */ 0x3c: "&lt;",
  384. /* > */ 0x3e: "&gt;",
  385. /* & */ 0x26: "&amp;",
  386. /* " */ 0x22: "&quot;",
  387. /* ' */ 0x27: "&apos;",
  388. };
  389. function encodeToXmlString(str) {
  390. const buffer = [];
  391. let start = 0;
  392. for (let i = 0, ii = str.length; i < ii; i++) {
  393. const char = str.codePointAt(i);
  394. if (0x20 <= char && char <= 0x7e) {
  395. // ascii
  396. const entity = XMLEntities[char];
  397. if (entity) {
  398. if (start < i) {
  399. buffer.push(str.substring(start, i));
  400. }
  401. buffer.push(entity);
  402. start = i + 1;
  403. }
  404. } else {
  405. if (start < i) {
  406. buffer.push(str.substring(start, i));
  407. }
  408. buffer.push(`&#x${char.toString(16).toUpperCase()};`);
  409. if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
  410. // char is represented by two u16
  411. i++;
  412. }
  413. start = i + 1;
  414. }
  415. }
  416. if (buffer.length === 0) {
  417. return str;
  418. }
  419. if (start < str.length) {
  420. buffer.push(str.substring(start, str.length));
  421. }
  422. return buffer.join("");
  423. }
  424. function validateCSSFont(cssFontInfo) {
  425. // See https://developer.mozilla.org/en-US/docs/Web/CSS/font-style.
  426. const DEFAULT_CSS_FONT_OBLIQUE = "14";
  427. // See https://developer.mozilla.org/en-US/docs/Web/CSS/font-weight.
  428. const DEFAULT_CSS_FONT_WEIGHT = "400";
  429. const CSS_FONT_WEIGHT_VALUES = new Set([
  430. "100",
  431. "200",
  432. "300",
  433. "400",
  434. "500",
  435. "600",
  436. "700",
  437. "800",
  438. "900",
  439. "1000",
  440. "normal",
  441. "bold",
  442. "bolder",
  443. "lighter",
  444. ]);
  445. const { fontFamily, fontWeight, italicAngle } = cssFontInfo;
  446. // See https://developer.mozilla.org/en-US/docs/Web/CSS/string.
  447. if (/^".*"$/.test(fontFamily)) {
  448. if (/[^\\]"/.test(fontFamily.slice(1, fontFamily.length - 1))) {
  449. warn(`XFA - FontFamily contains some unescaped ": ${fontFamily}.`);
  450. return false;
  451. }
  452. } else if (/^'.*'$/.test(fontFamily)) {
  453. if (/[^\\]'/.test(fontFamily.slice(1, fontFamily.length - 1))) {
  454. warn(`XFA - FontFamily contains some unescaped ': ${fontFamily}.`);
  455. return false;
  456. }
  457. } else {
  458. // See https://developer.mozilla.org/en-US/docs/Web/CSS/custom-ident.
  459. for (const ident of fontFamily.split(/[ \t]+/)) {
  460. if (/^(\d|(-(\d|-)))/.test(ident) || !/^[\w-\\]+$/.test(ident)) {
  461. warn(
  462. `XFA - FontFamily contains some invalid <custom-ident>: ${fontFamily}.`
  463. );
  464. return false;
  465. }
  466. }
  467. }
  468. const weight = fontWeight ? fontWeight.toString() : "";
  469. cssFontInfo.fontWeight = CSS_FONT_WEIGHT_VALUES.has(weight)
  470. ? weight
  471. : DEFAULT_CSS_FONT_WEIGHT;
  472. const angle = parseFloat(italicAngle);
  473. cssFontInfo.italicAngle =
  474. isNaN(angle) || angle < -90 || angle > 90
  475. ? DEFAULT_CSS_FONT_OBLIQUE
  476. : italicAngle.toString();
  477. return true;
  478. }
  479. function recoverJsURL(str) {
  480. // Attempt to recover valid URLs from `JS` entries with certain
  481. // white-listed formats:
  482. // - window.open('http://example.com')
  483. // - app.launchURL('http://example.com', true)
  484. // - xfa.host.gotoURL('http://example.com')
  485. const URL_OPEN_METHODS = ["app.launchURL", "window.open", "xfa.host.gotoURL"];
  486. const regex = new RegExp(
  487. "^\\s*(" +
  488. URL_OPEN_METHODS.join("|").split(".").join("\\.") +
  489. ")\\((?:'|\")([^'\"]*)(?:'|\")(?:,\\s*(\\w+)\\)|\\))",
  490. "i"
  491. );
  492. const jsUrl = regex.exec(str);
  493. if (jsUrl && jsUrl[2]) {
  494. const url = jsUrl[2];
  495. let newWindow = false;
  496. if (jsUrl[3] === "true" && jsUrl[1] === "app.launchURL") {
  497. newWindow = true;
  498. }
  499. return { url, newWindow };
  500. }
  501. return null;
  502. }
  503. function numberToString(value) {
  504. if (Number.isInteger(value)) {
  505. return value.toString();
  506. }
  507. const roundedValue = Math.round(value * 100);
  508. if (roundedValue % 100 === 0) {
  509. return (roundedValue / 100).toString();
  510. }
  511. if (roundedValue % 10 === 0) {
  512. return value.toFixed(1);
  513. }
  514. return value.toFixed(2);
  515. }
  516. function getNewAnnotationsMap(annotationStorage) {
  517. if (!annotationStorage) {
  518. return null;
  519. }
  520. const newAnnotationsByPage = new Map();
  521. // The concept of page in a XFA is very different, so
  522. // editing is just not implemented.
  523. for (const [key, value] of annotationStorage) {
  524. if (!key.startsWith(AnnotationEditorPrefix)) {
  525. continue;
  526. }
  527. let annotations = newAnnotationsByPage.get(value.pageIndex);
  528. if (!annotations) {
  529. annotations = [];
  530. newAnnotationsByPage.set(value.pageIndex, annotations);
  531. }
  532. annotations.push(value);
  533. }
  534. return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
  535. }
  536. function isAscii(str) {
  537. return /^[\x00-\x7F]*$/.test(str);
  538. }
  539. function stringToUTF16HexString(str) {
  540. const buf = [];
  541. for (let i = 0, ii = str.length; i < ii; i++) {
  542. const char = str.charCodeAt(i);
  543. buf.push(
  544. ((char >> 8) & 0xff).toString(16).padStart(2, "0"),
  545. (char & 0xff).toString(16).padStart(2, "0")
  546. );
  547. }
  548. return buf.join("");
  549. }
  550. function stringToUTF16String(str, bigEndian = false) {
  551. const buf = [];
  552. if (bigEndian) {
  553. buf.push("\xFE\xFF");
  554. }
  555. for (let i = 0, ii = str.length; i < ii; i++) {
  556. const char = str.charCodeAt(i);
  557. buf.push(
  558. String.fromCharCode((char >> 8) & 0xff),
  559. String.fromCharCode(char & 0xff)
  560. );
  561. }
  562. return buf.join("");
  563. }
  564. function getRotationMatrix(rotation, width, height) {
  565. switch (rotation) {
  566. case 90:
  567. return [0, 1, -1, 0, width, 0];
  568. case 180:
  569. return [-1, 0, 0, -1, width, height];
  570. case 270:
  571. return [0, -1, 1, 0, 0, height];
  572. default:
  573. throw new Error("Invalid rotation");
  574. }
  575. }
  576. export {
  577. collectActions,
  578. DocStats,
  579. encodeToXmlString,
  580. escapePDFName,
  581. escapeString,
  582. getArrayLookupTableFactory,
  583. getInheritableProperty,
  584. getLookupTableFactory,
  585. getNewAnnotationsMap,
  586. getRotationMatrix,
  587. isAscii,
  588. isWhiteSpace,
  589. log2,
  590. MissingDataException,
  591. numberToString,
  592. ParserEOFException,
  593. parseXFAPath,
  594. PDF_VERSION_REGEXP,
  595. readInt8,
  596. readUint16,
  597. readUint32,
  598. recoverJsURL,
  599. stringToUTF16HexString,
  600. stringToUTF16String,
  601. toRomanNumerals,
  602. validateCSSFont,
  603. XRefEntryException,
  604. XRefParseException,
  605. };