document.js 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741
  1. /* Copyright 2012 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import { AnnotationFactory, PopupAnnotation } from "./annotation.js";
  16. import {
  17. assert,
  18. FormatError,
  19. info,
  20. InvalidPDFException,
  21. isArrayEqual,
  22. PageActionEventType,
  23. RenderingIntentFlag,
  24. shadow,
  25. stringToBytes,
  26. stringToPDFString,
  27. stringToUTF8String,
  28. unreachable,
  29. UNSUPPORTED_FEATURES,
  30. Util,
  31. warn,
  32. } from "../shared/util.js";
  33. import {
  34. collectActions,
  35. getInheritableProperty,
  36. getNewAnnotationsMap,
  37. isWhiteSpace,
  38. MissingDataException,
  39. PDF_VERSION_REGEXP,
  40. validateCSSFont,
  41. XRefEntryException,
  42. XRefParseException,
  43. } from "./core_utils.js";
  44. import { Dict, isName, Name, Ref } from "./primitives.js";
  45. import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
  46. import { BaseStream } from "./base_stream.js";
  47. import { calculateMD5 } from "./crypto.js";
  48. import { Catalog } from "./catalog.js";
  49. import { clearGlobalCaches } from "./cleanup_helper.js";
  50. import { DatasetReader } from "./dataset_reader.js";
  51. import { Linearization } from "./parser.js";
  52. import { NullStream } from "./stream.js";
  53. import { ObjectLoader } from "./object_loader.js";
  54. import { OperatorList } from "./operator_list.js";
  55. import { PartialEvaluator } from "./evaluator.js";
  56. import { StreamsSequenceStream } from "./decode_stream.js";
  57. import { StructTreePage } from "./struct_tree.js";
  58. import { writeObject } from "./writer.js";
  59. import { XFAFactory } from "./xfa/factory.js";
  60. import { XRef } from "./xref.js";
  61. const DEFAULT_USER_UNIT = 1.0;
  62. const LETTER_SIZE_MEDIABOX = [0, 0, 612, 792];
  63. class Page {
  64. constructor({
  65. pdfManager,
  66. xref,
  67. pageIndex,
  68. pageDict,
  69. ref,
  70. globalIdFactory,
  71. fontCache,
  72. builtInCMapCache,
  73. standardFontDataCache,
  74. globalImageCache,
  75. nonBlendModesSet,
  76. xfaFactory,
  77. }) {
  78. this.pdfManager = pdfManager;
  79. this.pageIndex = pageIndex;
  80. this.pageDict = pageDict;
  81. this.xref = xref;
  82. this.ref = ref;
  83. this.fontCache = fontCache;
  84. this.builtInCMapCache = builtInCMapCache;
  85. this.standardFontDataCache = standardFontDataCache;
  86. this.globalImageCache = globalImageCache;
  87. this.nonBlendModesSet = nonBlendModesSet;
  88. this.evaluatorOptions = pdfManager.evaluatorOptions;
  89. this.resourcesPromise = null;
  90. this.xfaFactory = xfaFactory;
  91. const idCounters = {
  92. obj: 0,
  93. };
  94. this._localIdFactory = class extends globalIdFactory {
  95. static createObjId() {
  96. return `p${pageIndex}_${++idCounters.obj}`;
  97. }
  98. static getPageObjId() {
  99. return `page${ref.toString()}`;
  100. }
  101. };
  102. }
  103. /**
  104. * @private
  105. */
  106. _getInheritableProperty(key, getArray = false) {
  107. const value = getInheritableProperty({
  108. dict: this.pageDict,
  109. key,
  110. getArray,
  111. stopWhenFound: false,
  112. });
  113. if (!Array.isArray(value)) {
  114. return value;
  115. }
  116. if (value.length === 1 || !(value[0] instanceof Dict)) {
  117. return value[0];
  118. }
  119. return Dict.merge({ xref: this.xref, dictArray: value });
  120. }
  121. get content() {
  122. return this.pageDict.getArray("Contents");
  123. }
  124. get resources() {
  125. // For robustness: The spec states that a \Resources entry has to be
  126. // present, but can be empty. Some documents still omit it; in this case
  127. // we return an empty dictionary.
  128. const resources = this._getInheritableProperty("Resources");
  129. return shadow(
  130. this,
  131. "resources",
  132. resources instanceof Dict ? resources : Dict.empty
  133. );
  134. }
  135. _getBoundingBox(name) {
  136. if (this.xfaData) {
  137. return this.xfaData.bbox;
  138. }
  139. let box = this._getInheritableProperty(name, /* getArray = */ true);
  140. if (Array.isArray(box) && box.length === 4) {
  141. box = Util.normalizeRect(box);
  142. if (box[2] - box[0] > 0 && box[3] - box[1] > 0) {
  143. return box;
  144. }
  145. warn(`Empty, or invalid, /${name} entry.`);
  146. }
  147. return null;
  148. }
  149. get mediaBox() {
  150. // Reset invalid media box to letter size.
  151. return shadow(
  152. this,
  153. "mediaBox",
  154. this._getBoundingBox("MediaBox") || LETTER_SIZE_MEDIABOX
  155. );
  156. }
  157. get cropBox() {
  158. // Reset invalid crop box to media box.
  159. return shadow(
  160. this,
  161. "cropBox",
  162. this._getBoundingBox("CropBox") || this.mediaBox
  163. );
  164. }
  165. get userUnit() {
  166. let obj = this.pageDict.get("UserUnit");
  167. if (typeof obj !== "number" || obj <= 0) {
  168. obj = DEFAULT_USER_UNIT;
  169. }
  170. return shadow(this, "userUnit", obj);
  171. }
  172. get view() {
  173. // From the spec, 6th ed., p.963:
  174. // "The crop, bleed, trim, and art boxes should not ordinarily
  175. // extend beyond the boundaries of the media box. If they do, they are
  176. // effectively reduced to their intersection with the media box."
  177. const { cropBox, mediaBox } = this;
  178. if (cropBox !== mediaBox && !isArrayEqual(cropBox, mediaBox)) {
  179. const box = Util.intersect(cropBox, mediaBox);
  180. if (box && box[2] - box[0] > 0 && box[3] - box[1] > 0) {
  181. return shadow(this, "view", box);
  182. }
  183. warn("Empty /CropBox and /MediaBox intersection.");
  184. }
  185. return shadow(this, "view", mediaBox);
  186. }
  187. get rotate() {
  188. let rotate = this._getInheritableProperty("Rotate") || 0;
  189. // Normalize rotation so it's a multiple of 90 and between 0 and 270.
  190. if (rotate % 90 !== 0) {
  191. rotate = 0;
  192. } else if (rotate >= 360) {
  193. rotate %= 360;
  194. } else if (rotate < 0) {
  195. // The spec doesn't cover negatives. Assume it's counterclockwise
  196. // rotation. The following is the other implementation of modulo.
  197. rotate = ((rotate % 360) + 360) % 360;
  198. }
  199. return shadow(this, "rotate", rotate);
  200. }
  201. /**
  202. * @private
  203. */
  204. _onSubStreamError(handler, reason, objId) {
  205. if (this.evaluatorOptions.ignoreErrors) {
  206. // Error(s) when reading one of the /Contents sub-streams -- sending
  207. // unsupported feature notification and allow parsing to continue.
  208. handler.send("UnsupportedFeature", {
  209. featureId: UNSUPPORTED_FEATURES.errorContentSubStream,
  210. });
  211. warn(`getContentStream - ignoring sub-stream (${objId}): "${reason}".`);
  212. return;
  213. }
  214. throw reason;
  215. }
  216. /**
  217. * @returns {Promise<BaseStream>}
  218. */
  219. getContentStream(handler) {
  220. return this.pdfManager.ensure(this, "content").then(content => {
  221. if (content instanceof BaseStream) {
  222. return content;
  223. }
  224. if (Array.isArray(content)) {
  225. return new StreamsSequenceStream(
  226. content,
  227. this._onSubStreamError.bind(this, handler)
  228. );
  229. }
  230. // Replace non-existent page content with empty content.
  231. return new NullStream();
  232. });
  233. }
  234. get xfaData() {
  235. return shadow(
  236. this,
  237. "xfaData",
  238. this.xfaFactory
  239. ? { bbox: this.xfaFactory.getBoundingBox(this.pageIndex) }
  240. : null
  241. );
  242. }
  243. async saveNewAnnotations(handler, task, annotations) {
  244. if (this.xfaFactory) {
  245. throw new Error("XFA: Cannot save new annotations.");
  246. }
  247. const partialEvaluator = new PartialEvaluator({
  248. xref: this.xref,
  249. handler,
  250. pageIndex: this.pageIndex,
  251. idFactory: this._localIdFactory,
  252. fontCache: this.fontCache,
  253. builtInCMapCache: this.builtInCMapCache,
  254. standardFontDataCache: this.standardFontDataCache,
  255. globalImageCache: this.globalImageCache,
  256. options: this.evaluatorOptions,
  257. });
  258. const pageDict = this.pageDict;
  259. const annotationsArray = this.annotations.slice();
  260. const newData = await AnnotationFactory.saveNewAnnotations(
  261. partialEvaluator,
  262. task,
  263. annotations
  264. );
  265. for (const { ref } of newData.annotations) {
  266. annotationsArray.push(ref);
  267. }
  268. const savedDict = pageDict.get("Annots");
  269. pageDict.set("Annots", annotationsArray);
  270. const buffer = [];
  271. let transform = null;
  272. if (this.xref.encrypt) {
  273. transform = this.xref.encrypt.createCipherTransform(
  274. this.ref.num,
  275. this.ref.gen
  276. );
  277. }
  278. writeObject(this.ref, pageDict, buffer, transform);
  279. if (savedDict) {
  280. pageDict.set("Annots", savedDict);
  281. }
  282. const objects = newData.dependencies;
  283. objects.push(
  284. { ref: this.ref, data: buffer.join("") },
  285. ...newData.annotations
  286. );
  287. return objects;
  288. }
  289. save(handler, task, annotationStorage) {
  290. const partialEvaluator = new PartialEvaluator({
  291. xref: this.xref,
  292. handler,
  293. pageIndex: this.pageIndex,
  294. idFactory: this._localIdFactory,
  295. fontCache: this.fontCache,
  296. builtInCMapCache: this.builtInCMapCache,
  297. standardFontDataCache: this.standardFontDataCache,
  298. globalImageCache: this.globalImageCache,
  299. options: this.evaluatorOptions,
  300. });
  301. // Fetch the page's annotations and save the content
  302. // in case of interactive form fields.
  303. return this._parsedAnnotations.then(function (annotations) {
  304. const newRefsPromises = [];
  305. for (const annotation of annotations) {
  306. if (!annotation.mustBePrinted(annotationStorage)) {
  307. continue;
  308. }
  309. newRefsPromises.push(
  310. annotation
  311. .save(partialEvaluator, task, annotationStorage)
  312. .catch(function (reason) {
  313. warn(
  314. "save - ignoring annotation data during " +
  315. `"${task.name}" task: "${reason}".`
  316. );
  317. return null;
  318. })
  319. );
  320. }
  321. return Promise.all(newRefsPromises).then(function (newRefs) {
  322. return newRefs.filter(newRef => !!newRef);
  323. });
  324. });
  325. }
  326. loadResources(keys) {
  327. if (!this.resourcesPromise) {
  328. // TODO: add async `_getInheritableProperty` and remove this.
  329. this.resourcesPromise = this.pdfManager.ensure(this, "resources");
  330. }
  331. return this.resourcesPromise.then(() => {
  332. const objectLoader = new ObjectLoader(this.resources, keys, this.xref);
  333. return objectLoader.load();
  334. });
  335. }
  336. getOperatorList({
  337. handler,
  338. sink,
  339. task,
  340. intent,
  341. cacheKey,
  342. annotationStorage = null,
  343. }) {
  344. const contentStreamPromise = this.getContentStream(handler);
  345. const resourcesPromise = this.loadResources([
  346. "ColorSpace",
  347. "ExtGState",
  348. "Font",
  349. "Pattern",
  350. "Properties",
  351. "Shading",
  352. "XObject",
  353. ]);
  354. const partialEvaluator = new PartialEvaluator({
  355. xref: this.xref,
  356. handler,
  357. pageIndex: this.pageIndex,
  358. idFactory: this._localIdFactory,
  359. fontCache: this.fontCache,
  360. builtInCMapCache: this.builtInCMapCache,
  361. standardFontDataCache: this.standardFontDataCache,
  362. globalImageCache: this.globalImageCache,
  363. options: this.evaluatorOptions,
  364. });
  365. const newAnnotationsByPage = !this.xfaFactory
  366. ? getNewAnnotationsMap(annotationStorage)
  367. : null;
  368. let newAnnotationsPromise = Promise.resolve(null);
  369. if (newAnnotationsByPage) {
  370. const newAnnotations = newAnnotationsByPage.get(this.pageIndex);
  371. if (newAnnotations) {
  372. newAnnotationsPromise = AnnotationFactory.printNewAnnotations(
  373. partialEvaluator,
  374. task,
  375. newAnnotations
  376. );
  377. }
  378. }
  379. const dataPromises = Promise.all([contentStreamPromise, resourcesPromise]);
  380. const pageListPromise = dataPromises.then(([contentStream]) => {
  381. const opList = new OperatorList(intent, sink);
  382. handler.send("StartRenderPage", {
  383. transparency: partialEvaluator.hasBlendModes(
  384. this.resources,
  385. this.nonBlendModesSet
  386. ),
  387. pageIndex: this.pageIndex,
  388. cacheKey,
  389. });
  390. return partialEvaluator
  391. .getOperatorList({
  392. stream: contentStream,
  393. task,
  394. resources: this.resources,
  395. operatorList: opList,
  396. })
  397. .then(function () {
  398. return opList;
  399. });
  400. });
  401. // Fetch the page's annotations and add their operator lists to the
  402. // page's operator list to render them.
  403. return Promise.all([
  404. pageListPromise,
  405. this._parsedAnnotations,
  406. newAnnotationsPromise,
  407. ]).then(function ([pageOpList, annotations, newAnnotations]) {
  408. if (newAnnotations) {
  409. annotations = annotations.concat(newAnnotations);
  410. }
  411. if (
  412. annotations.length === 0 ||
  413. intent & RenderingIntentFlag.ANNOTATIONS_DISABLE
  414. ) {
  415. pageOpList.flush(/* lastChunk = */ true);
  416. return { length: pageOpList.totalLength };
  417. }
  418. const renderForms = !!(intent & RenderingIntentFlag.ANNOTATIONS_FORMS),
  419. intentAny = !!(intent & RenderingIntentFlag.ANY),
  420. intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
  421. intentPrint = !!(intent & RenderingIntentFlag.PRINT);
  422. // Collect the operator list promises for the annotations. Each promise
  423. // is resolved with the complete operator list for a single annotation.
  424. const opListPromises = [];
  425. for (const annotation of annotations) {
  426. if (
  427. intentAny ||
  428. (intentDisplay && annotation.mustBeViewed(annotationStorage)) ||
  429. (intentPrint && annotation.mustBePrinted(annotationStorage))
  430. ) {
  431. opListPromises.push(
  432. annotation
  433. .getOperatorList(
  434. partialEvaluator,
  435. task,
  436. intent,
  437. renderForms,
  438. annotationStorage
  439. )
  440. .catch(function (reason) {
  441. warn(
  442. "getOperatorList - ignoring annotation data during " +
  443. `"${task.name}" task: "${reason}".`
  444. );
  445. return {
  446. opList: null,
  447. separateForm: false,
  448. separateCanvas: false,
  449. };
  450. })
  451. );
  452. }
  453. }
  454. return Promise.all(opListPromises).then(function (opLists) {
  455. let form = false,
  456. canvas = false;
  457. for (const { opList, separateForm, separateCanvas } of opLists) {
  458. pageOpList.addOpList(opList);
  459. if (separateForm) {
  460. form = separateForm;
  461. }
  462. if (separateCanvas) {
  463. canvas = separateCanvas;
  464. }
  465. }
  466. pageOpList.flush(
  467. /* lastChunk = */ true,
  468. /* separateAnnots = */ { form, canvas }
  469. );
  470. return { length: pageOpList.totalLength };
  471. });
  472. });
  473. }
  474. extractTextContent({
  475. handler,
  476. task,
  477. includeMarkedContent,
  478. sink,
  479. combineTextItems,
  480. }) {
  481. const contentStreamPromise = this.getContentStream(handler);
  482. const resourcesPromise = this.loadResources([
  483. "ExtGState",
  484. "Font",
  485. "Properties",
  486. "XObject",
  487. ]);
  488. const dataPromises = Promise.all([contentStreamPromise, resourcesPromise]);
  489. return dataPromises.then(([contentStream]) => {
  490. const partialEvaluator = new PartialEvaluator({
  491. xref: this.xref,
  492. handler,
  493. pageIndex: this.pageIndex,
  494. idFactory: this._localIdFactory,
  495. fontCache: this.fontCache,
  496. builtInCMapCache: this.builtInCMapCache,
  497. standardFontDataCache: this.standardFontDataCache,
  498. globalImageCache: this.globalImageCache,
  499. options: this.evaluatorOptions,
  500. });
  501. return partialEvaluator.getTextContent({
  502. stream: contentStream,
  503. task,
  504. resources: this.resources,
  505. includeMarkedContent,
  506. combineTextItems,
  507. sink,
  508. viewBox: this.view,
  509. });
  510. });
  511. }
  512. async getStructTree() {
  513. const structTreeRoot = await this.pdfManager.ensureCatalog(
  514. "structTreeRoot"
  515. );
  516. if (!structTreeRoot) {
  517. return null;
  518. }
  519. const structTree = await this.pdfManager.ensure(this, "_parseStructTree", [
  520. structTreeRoot,
  521. ]);
  522. return structTree.serializable;
  523. }
  524. /**
  525. * @private
  526. */
  527. _parseStructTree(structTreeRoot) {
  528. const tree = new StructTreePage(structTreeRoot, this.pageDict);
  529. tree.parse();
  530. return tree;
  531. }
  532. async getAnnotationsData(handler, task, intent) {
  533. const annotations = await this._parsedAnnotations;
  534. if (annotations.length === 0) {
  535. return [];
  536. }
  537. const textContentPromises = [];
  538. const annotationsData = [];
  539. let partialEvaluator;
  540. const intentAny = !!(intent & RenderingIntentFlag.ANY),
  541. intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
  542. intentPrint = !!(intent & RenderingIntentFlag.PRINT);
  543. for (const annotation of annotations) {
  544. // Get the annotation even if it's hidden because
  545. // JS can change its display.
  546. const isVisible = intentAny || (intentDisplay && annotation.viewable);
  547. if (isVisible || (intentPrint && annotation.printable)) {
  548. annotationsData.push(annotation.data);
  549. }
  550. if (annotation.hasTextContent && isVisible) {
  551. if (!partialEvaluator) {
  552. partialEvaluator = new PartialEvaluator({
  553. xref: this.xref,
  554. handler,
  555. pageIndex: this.pageIndex,
  556. idFactory: this._localIdFactory,
  557. fontCache: this.fontCache,
  558. builtInCMapCache: this.builtInCMapCache,
  559. standardFontDataCache: this.standardFontDataCache,
  560. globalImageCache: this.globalImageCache,
  561. options: this.evaluatorOptions,
  562. });
  563. }
  564. textContentPromises.push(
  565. annotation
  566. .extractTextContent(partialEvaluator, task, this.view)
  567. .catch(function (reason) {
  568. warn(
  569. `getAnnotationsData - ignoring textContent during "${task.name}" task: "${reason}".`
  570. );
  571. })
  572. );
  573. }
  574. }
  575. await Promise.all(textContentPromises);
  576. return annotationsData;
  577. }
  578. get annotations() {
  579. const annots = this._getInheritableProperty("Annots");
  580. return shadow(this, "annotations", Array.isArray(annots) ? annots : []);
  581. }
  582. get _parsedAnnotations() {
  583. const parsedAnnotations = this.pdfManager
  584. .ensure(this, "annotations")
  585. .then(() => {
  586. const annotationPromises = [];
  587. for (const annotationRef of this.annotations) {
  588. annotationPromises.push(
  589. AnnotationFactory.create(
  590. this.xref,
  591. annotationRef,
  592. this.pdfManager,
  593. this._localIdFactory,
  594. /* collectFields */ false
  595. ).catch(function (reason) {
  596. warn(`_parsedAnnotations: "${reason}".`);
  597. return null;
  598. })
  599. );
  600. }
  601. return Promise.all(annotationPromises).then(function (annotations) {
  602. if (annotations.length === 0) {
  603. return annotations;
  604. }
  605. const sortedAnnotations = [];
  606. let popupAnnotations;
  607. // Ensure that PopupAnnotations are handled last, since they depend on
  608. // their parent Annotation in the display layer; fixes issue 11362.
  609. for (const annotation of annotations) {
  610. if (!annotation) {
  611. continue;
  612. }
  613. if (annotation instanceof PopupAnnotation) {
  614. if (!popupAnnotations) {
  615. popupAnnotations = [];
  616. }
  617. popupAnnotations.push(annotation);
  618. continue;
  619. }
  620. sortedAnnotations.push(annotation);
  621. }
  622. if (popupAnnotations) {
  623. sortedAnnotations.push(...popupAnnotations);
  624. }
  625. return sortedAnnotations;
  626. });
  627. });
  628. return shadow(this, "_parsedAnnotations", parsedAnnotations);
  629. }
  630. get jsActions() {
  631. const actions = collectActions(
  632. this.xref,
  633. this.pageDict,
  634. PageActionEventType
  635. );
  636. return shadow(this, "jsActions", actions);
  637. }
  638. }
  639. const PDF_HEADER_SIGNATURE = new Uint8Array([0x25, 0x50, 0x44, 0x46, 0x2d]);
  640. const STARTXREF_SIGNATURE = new Uint8Array([
  641. 0x73, 0x74, 0x61, 0x72, 0x74, 0x78, 0x72, 0x65, 0x66,
  642. ]);
  643. const ENDOBJ_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x6f, 0x62, 0x6a]);
  644. const FINGERPRINT_FIRST_BYTES = 1024;
  645. const EMPTY_FINGERPRINT =
  646. "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
  647. function find(stream, signature, limit = 1024, backwards = false) {
  648. if (
  649. typeof PDFJSDev === "undefined" ||
  650. PDFJSDev.test("!PRODUCTION || TESTING")
  651. ) {
  652. assert(limit > 0, 'The "limit" must be a positive integer.');
  653. }
  654. const signatureLength = signature.length;
  655. const scanBytes = stream.peekBytes(limit);
  656. const scanLength = scanBytes.length - signatureLength;
  657. if (scanLength <= 0) {
  658. return false;
  659. }
  660. if (backwards) {
  661. const signatureEnd = signatureLength - 1;
  662. let pos = scanBytes.length - 1;
  663. while (pos >= signatureEnd) {
  664. let j = 0;
  665. while (
  666. j < signatureLength &&
  667. scanBytes[pos - j] === signature[signatureEnd - j]
  668. ) {
  669. j++;
  670. }
  671. if (j >= signatureLength) {
  672. // `signature` found.
  673. stream.pos += pos - signatureEnd;
  674. return true;
  675. }
  676. pos--;
  677. }
  678. } else {
  679. // forwards
  680. let pos = 0;
  681. while (pos <= scanLength) {
  682. let j = 0;
  683. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  684. j++;
  685. }
  686. if (j >= signatureLength) {
  687. // `signature` found.
  688. stream.pos += pos;
  689. return true;
  690. }
  691. pos++;
  692. }
  693. }
  694. return false;
  695. }
  696. /**
  697. * The `PDFDocument` class holds all the (worker-thread) data of the PDF file.
  698. */
  699. class PDFDocument {
  700. constructor(pdfManager, stream) {
  701. if (
  702. typeof PDFJSDev === "undefined" ||
  703. PDFJSDev.test("!PRODUCTION || TESTING")
  704. ) {
  705. assert(
  706. stream instanceof BaseStream,
  707. 'PDFDocument: Invalid "stream" argument.'
  708. );
  709. }
  710. if (stream.length <= 0) {
  711. throw new InvalidPDFException(
  712. "The PDF file is empty, i.e. its size is zero bytes."
  713. );
  714. }
  715. this.pdfManager = pdfManager;
  716. this.stream = stream;
  717. this.xref = new XRef(stream, pdfManager);
  718. this._pagePromises = new Map();
  719. this._version = null;
  720. const idCounters = {
  721. font: 0,
  722. };
  723. this._globalIdFactory = class {
  724. static getDocId() {
  725. return `g_${pdfManager.docId}`;
  726. }
  727. static createFontId() {
  728. return `f${++idCounters.font}`;
  729. }
  730. static createObjId() {
  731. unreachable("Abstract method `createObjId` called.");
  732. }
  733. static getPageObjId() {
  734. unreachable("Abstract method `getPageObjId` called.");
  735. }
  736. };
  737. }
  738. parse(recoveryMode) {
  739. this.xref.parse(recoveryMode);
  740. this.catalog = new Catalog(this.pdfManager, this.xref);
  741. }
  742. get linearization() {
  743. let linearization = null;
  744. try {
  745. linearization = Linearization.create(this.stream);
  746. } catch (err) {
  747. if (err instanceof MissingDataException) {
  748. throw err;
  749. }
  750. info(err);
  751. }
  752. return shadow(this, "linearization", linearization);
  753. }
  754. get startXRef() {
  755. const stream = this.stream;
  756. let startXRef = 0;
  757. if (this.linearization) {
  758. // Find the end of the first object.
  759. stream.reset();
  760. if (find(stream, ENDOBJ_SIGNATURE)) {
  761. startXRef = stream.pos + 6 - stream.start;
  762. }
  763. } else {
  764. // Find `startxref` by checking backwards from the end of the file.
  765. const step = 1024;
  766. const startXRefLength = STARTXREF_SIGNATURE.length;
  767. let found = false,
  768. pos = stream.end;
  769. while (!found && pos > 0) {
  770. pos -= step - startXRefLength;
  771. if (pos < 0) {
  772. pos = 0;
  773. }
  774. stream.pos = pos;
  775. found = find(stream, STARTXREF_SIGNATURE, step, true);
  776. }
  777. if (found) {
  778. stream.skip(9);
  779. let ch;
  780. do {
  781. ch = stream.getByte();
  782. } while (isWhiteSpace(ch));
  783. let str = "";
  784. while (ch >= /* Space = */ 0x20 && ch <= /* '9' = */ 0x39) {
  785. str += String.fromCharCode(ch);
  786. ch = stream.getByte();
  787. }
  788. startXRef = parseInt(str, 10);
  789. if (isNaN(startXRef)) {
  790. startXRef = 0;
  791. }
  792. }
  793. }
  794. return shadow(this, "startXRef", startXRef);
  795. }
  796. // Find the header, get the PDF format version and setup the
  797. // stream to start from the header.
  798. checkHeader() {
  799. const stream = this.stream;
  800. stream.reset();
  801. if (!find(stream, PDF_HEADER_SIGNATURE)) {
  802. // May not be a PDF file, but don't throw an error and let
  803. // parsing continue.
  804. return;
  805. }
  806. stream.moveStart();
  807. // Skip over the "%PDF-" prefix, since it was found above.
  808. stream.skip(PDF_HEADER_SIGNATURE.length);
  809. // Read the PDF format version.
  810. let version = "",
  811. ch;
  812. while (
  813. (ch = stream.getByte()) > /* Space = */ 0x20 &&
  814. version.length < /* MAX_PDF_VERSION_LENGTH = */ 7
  815. ) {
  816. version += String.fromCharCode(ch);
  817. }
  818. if (PDF_VERSION_REGEXP.test(version)) {
  819. this._version = version;
  820. } else {
  821. warn(`Invalid PDF header version: ${version}`);
  822. }
  823. }
  824. parseStartXRef() {
  825. this.xref.setStartXRef(this.startXRef);
  826. }
  827. get numPages() {
  828. let num = 0;
  829. if (this.catalog.hasActualNumPages) {
  830. num = this.catalog.numPages;
  831. } else if (this.xfaFactory) {
  832. // num is a Promise.
  833. num = this.xfaFactory.getNumPages();
  834. } else if (this.linearization) {
  835. num = this.linearization.numPages;
  836. } else {
  837. num = this.catalog.numPages;
  838. }
  839. return shadow(this, "numPages", num);
  840. }
  841. /**
  842. * @private
  843. */
  844. _hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
  845. const RECURSION_LIMIT = 10;
  846. if (!Array.isArray(fields)) {
  847. return false;
  848. }
  849. return fields.every(field => {
  850. field = this.xref.fetchIfRef(field);
  851. if (!(field instanceof Dict)) {
  852. return false;
  853. }
  854. if (field.has("Kids")) {
  855. if (++recursionDepth > RECURSION_LIMIT) {
  856. warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
  857. return false;
  858. }
  859. return this._hasOnlyDocumentSignatures(
  860. field.get("Kids"),
  861. recursionDepth
  862. );
  863. }
  864. const isSignature = isName(field.get("FT"), "Sig");
  865. const rectangle = field.get("Rect");
  866. const isInvisible =
  867. Array.isArray(rectangle) && rectangle.every(value => value === 0);
  868. return isSignature && isInvisible;
  869. });
  870. }
  871. get _xfaStreams() {
  872. const acroForm = this.catalog.acroForm;
  873. if (!acroForm) {
  874. return null;
  875. }
  876. const xfa = acroForm.get("XFA");
  877. const entries = {
  878. "xdp:xdp": "",
  879. template: "",
  880. datasets: "",
  881. config: "",
  882. connectionSet: "",
  883. localeSet: "",
  884. stylesheet: "",
  885. "/xdp:xdp": "",
  886. };
  887. if (xfa instanceof BaseStream && !xfa.isEmpty) {
  888. entries["xdp:xdp"] = xfa;
  889. return entries;
  890. }
  891. if (!Array.isArray(xfa) || xfa.length === 0) {
  892. return null;
  893. }
  894. for (let i = 0, ii = xfa.length; i < ii; i += 2) {
  895. let name;
  896. if (i === 0) {
  897. name = "xdp:xdp";
  898. } else if (i === ii - 2) {
  899. name = "/xdp:xdp";
  900. } else {
  901. name = xfa[i];
  902. }
  903. if (!entries.hasOwnProperty(name)) {
  904. continue;
  905. }
  906. const data = this.xref.fetchIfRef(xfa[i + 1]);
  907. if (!(data instanceof BaseStream) || data.isEmpty) {
  908. continue;
  909. }
  910. entries[name] = data;
  911. }
  912. return entries;
  913. }
  914. get xfaDatasets() {
  915. const streams = this._xfaStreams;
  916. if (!streams) {
  917. return shadow(this, "xfaDatasets", null);
  918. }
  919. for (const key of ["datasets", "xdp:xdp"]) {
  920. const stream = streams[key];
  921. if (!stream) {
  922. continue;
  923. }
  924. try {
  925. const str = stringToUTF8String(stream.getString());
  926. const data = { [key]: str };
  927. return shadow(this, "xfaDatasets", new DatasetReader(data));
  928. } catch (_) {
  929. warn("XFA - Invalid utf-8 string.");
  930. break;
  931. }
  932. }
  933. return shadow(this, "xfaDatasets", null);
  934. }
  935. get xfaData() {
  936. const streams = this._xfaStreams;
  937. if (!streams) {
  938. return null;
  939. }
  940. const data = Object.create(null);
  941. for (const [key, stream] of Object.entries(streams)) {
  942. if (!stream) {
  943. continue;
  944. }
  945. try {
  946. data[key] = stringToUTF8String(stream.getString());
  947. } catch (_) {
  948. warn("XFA - Invalid utf-8 string.");
  949. return null;
  950. }
  951. }
  952. return data;
  953. }
  954. get xfaFactory() {
  955. let data;
  956. if (
  957. this.pdfManager.enableXfa &&
  958. this.catalog.needsRendering &&
  959. this.formInfo.hasXfa &&
  960. !this.formInfo.hasAcroForm
  961. ) {
  962. data = this.xfaData;
  963. }
  964. return shadow(this, "xfaFactory", data ? new XFAFactory(data) : null);
  965. }
  966. get isPureXfa() {
  967. return this.xfaFactory ? this.xfaFactory.isValid() : false;
  968. }
  969. get htmlForXfa() {
  970. return this.xfaFactory ? this.xfaFactory.getPages() : null;
  971. }
  972. async loadXfaImages() {
  973. const xfaImagesDict = await this.pdfManager.ensureCatalog("xfaImages");
  974. if (!xfaImagesDict) {
  975. return;
  976. }
  977. const keys = xfaImagesDict.getKeys();
  978. const objectLoader = new ObjectLoader(xfaImagesDict, keys, this.xref);
  979. await objectLoader.load();
  980. const xfaImages = new Map();
  981. for (const key of keys) {
  982. const stream = xfaImagesDict.get(key);
  983. if (stream instanceof BaseStream) {
  984. xfaImages.set(key, stream.getBytes());
  985. }
  986. }
  987. this.xfaFactory.setImages(xfaImages);
  988. }
  989. async loadXfaFonts(handler, task) {
  990. const acroForm = await this.pdfManager.ensureCatalog("acroForm");
  991. if (!acroForm) {
  992. return;
  993. }
  994. const resources = await acroForm.getAsync("DR");
  995. if (!(resources instanceof Dict)) {
  996. return;
  997. }
  998. const objectLoader = new ObjectLoader(resources, ["Font"], this.xref);
  999. await objectLoader.load();
  1000. const fontRes = resources.get("Font");
  1001. if (!(fontRes instanceof Dict)) {
  1002. return;
  1003. }
  1004. const options = Object.assign(
  1005. Object.create(null),
  1006. this.pdfManager.evaluatorOptions
  1007. );
  1008. options.useSystemFonts = false;
  1009. const partialEvaluator = new PartialEvaluator({
  1010. xref: this.xref,
  1011. handler,
  1012. pageIndex: -1,
  1013. idFactory: this._globalIdFactory,
  1014. fontCache: this.catalog.fontCache,
  1015. builtInCMapCache: this.catalog.builtInCMapCache,
  1016. standardFontDataCache: this.catalog.standardFontDataCache,
  1017. options,
  1018. });
  1019. const operatorList = new OperatorList();
  1020. const pdfFonts = [];
  1021. const initialState = {
  1022. get font() {
  1023. return pdfFonts.at(-1);
  1024. },
  1025. set font(font) {
  1026. pdfFonts.push(font);
  1027. },
  1028. clone() {
  1029. return this;
  1030. },
  1031. };
  1032. const fonts = new Map();
  1033. fontRes.forEach((fontName, font) => {
  1034. fonts.set(fontName, font);
  1035. });
  1036. const promises = [];
  1037. for (const [fontName, font] of fonts) {
  1038. const descriptor = font.get("FontDescriptor");
  1039. if (!(descriptor instanceof Dict)) {
  1040. continue;
  1041. }
  1042. let fontFamily = descriptor.get("FontFamily");
  1043. // For example, "Wingdings 3" is not a valid font name in the css specs.
  1044. fontFamily = fontFamily.replace(/[ ]+(\d)/g, "$1");
  1045. const fontWeight = descriptor.get("FontWeight");
  1046. // Angle is expressed in degrees counterclockwise in PDF
  1047. // when it's clockwise in CSS
  1048. // (see https://drafts.csswg.org/css-fonts-4/#valdef-font-style-oblique-angle)
  1049. const italicAngle = -descriptor.get("ItalicAngle");
  1050. const cssFontInfo = { fontFamily, fontWeight, italicAngle };
  1051. if (!validateCSSFont(cssFontInfo)) {
  1052. continue;
  1053. }
  1054. promises.push(
  1055. partialEvaluator
  1056. .handleSetFont(
  1057. resources,
  1058. [Name.get(fontName), 1],
  1059. /* fontRef = */ null,
  1060. operatorList,
  1061. task,
  1062. initialState,
  1063. /* fallbackFontDict = */ null,
  1064. /* cssFontInfo = */ cssFontInfo
  1065. )
  1066. .catch(function (reason) {
  1067. warn(`loadXfaFonts: "${reason}".`);
  1068. return null;
  1069. })
  1070. );
  1071. }
  1072. await Promise.all(promises);
  1073. const missingFonts = this.xfaFactory.setFonts(pdfFonts);
  1074. if (!missingFonts) {
  1075. return;
  1076. }
  1077. options.ignoreErrors = true;
  1078. promises.length = 0;
  1079. pdfFonts.length = 0;
  1080. const reallyMissingFonts = new Set();
  1081. for (const missing of missingFonts) {
  1082. if (!getXfaFontName(`${missing}-Regular`)) {
  1083. // No substitution available: we'll fallback on Myriad.
  1084. reallyMissingFonts.add(missing);
  1085. }
  1086. }
  1087. if (reallyMissingFonts.size) {
  1088. missingFonts.push("PdfJS-Fallback");
  1089. }
  1090. for (const missing of missingFonts) {
  1091. if (reallyMissingFonts.has(missing)) {
  1092. continue;
  1093. }
  1094. for (const fontInfo of [
  1095. { name: "Regular", fontWeight: 400, italicAngle: 0 },
  1096. { name: "Bold", fontWeight: 700, italicAngle: 0 },
  1097. { name: "Italic", fontWeight: 400, italicAngle: 12 },
  1098. { name: "BoldItalic", fontWeight: 700, italicAngle: 12 },
  1099. ]) {
  1100. const name = `${missing}-${fontInfo.name}`;
  1101. const dict = getXfaFontDict(name);
  1102. promises.push(
  1103. partialEvaluator
  1104. .handleSetFont(
  1105. resources,
  1106. [Name.get(name), 1],
  1107. /* fontRef = */ null,
  1108. operatorList,
  1109. task,
  1110. initialState,
  1111. /* fallbackFontDict = */ dict,
  1112. /* cssFontInfo = */ {
  1113. fontFamily: missing,
  1114. fontWeight: fontInfo.fontWeight,
  1115. italicAngle: fontInfo.italicAngle,
  1116. }
  1117. )
  1118. .catch(function (reason) {
  1119. warn(`loadXfaFonts: "${reason}".`);
  1120. return null;
  1121. })
  1122. );
  1123. }
  1124. }
  1125. await Promise.all(promises);
  1126. this.xfaFactory.appendFonts(pdfFonts, reallyMissingFonts);
  1127. }
  1128. async serializeXfaData(annotationStorage) {
  1129. return this.xfaFactory
  1130. ? this.xfaFactory.serializeData(annotationStorage)
  1131. : null;
  1132. }
  1133. /**
  1134. * The specification states in section 7.5.2 that the version from
  1135. * the catalog, if present, should overwrite the version from the header.
  1136. */
  1137. get version() {
  1138. return this.catalog.version || this._version;
  1139. }
  1140. get formInfo() {
  1141. const formInfo = {
  1142. hasFields: false,
  1143. hasAcroForm: false,
  1144. hasXfa: false,
  1145. hasSignatures: false,
  1146. };
  1147. const acroForm = this.catalog.acroForm;
  1148. if (!acroForm) {
  1149. return shadow(this, "formInfo", formInfo);
  1150. }
  1151. try {
  1152. const fields = acroForm.get("Fields");
  1153. const hasFields = Array.isArray(fields) && fields.length > 0;
  1154. formInfo.hasFields = hasFields; // Used by the `fieldObjects` getter.
  1155. // The document contains XFA data if the `XFA` entry is a non-empty
  1156. // array or stream.
  1157. const xfa = acroForm.get("XFA");
  1158. formInfo.hasXfa =
  1159. (Array.isArray(xfa) && xfa.length > 0) ||
  1160. (xfa instanceof BaseStream && !xfa.isEmpty);
  1161. // The document contains AcroForm data if the `Fields` entry is a
  1162. // non-empty array and it doesn't consist of only document signatures.
  1163. // This second check is required for files that don't actually contain
  1164. // AcroForm data (only XFA data), but that use the `Fields` entry to
  1165. // store (invisible) document signatures. This can be detected using
  1166. // the first bit of the `SigFlags` integer (see Table 219 in the
  1167. // specification).
  1168. const sigFlags = acroForm.get("SigFlags");
  1169. const hasSignatures = !!(sigFlags & 0x1);
  1170. const hasOnlyDocumentSignatures =
  1171. hasSignatures && this._hasOnlyDocumentSignatures(fields);
  1172. formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
  1173. formInfo.hasSignatures = hasSignatures;
  1174. } catch (ex) {
  1175. if (ex instanceof MissingDataException) {
  1176. throw ex;
  1177. }
  1178. warn(`Cannot fetch form information: "${ex}".`);
  1179. }
  1180. return shadow(this, "formInfo", formInfo);
  1181. }
  1182. get documentInfo() {
  1183. const docInfo = {
  1184. PDFFormatVersion: this.version,
  1185. Language: this.catalog.lang,
  1186. EncryptFilterName: this.xref.encrypt
  1187. ? this.xref.encrypt.filterName
  1188. : null,
  1189. IsLinearized: !!this.linearization,
  1190. IsAcroFormPresent: this.formInfo.hasAcroForm,
  1191. IsXFAPresent: this.formInfo.hasXfa,
  1192. IsCollectionPresent: !!this.catalog.collection,
  1193. IsSignaturesPresent: this.formInfo.hasSignatures,
  1194. };
  1195. let infoDict;
  1196. try {
  1197. infoDict = this.xref.trailer.get("Info");
  1198. } catch (err) {
  1199. if (err instanceof MissingDataException) {
  1200. throw err;
  1201. }
  1202. info("The document information dictionary is invalid.");
  1203. }
  1204. if (!(infoDict instanceof Dict)) {
  1205. return shadow(this, "documentInfo", docInfo);
  1206. }
  1207. for (const key of infoDict.getKeys()) {
  1208. const value = infoDict.get(key);
  1209. switch (key) {
  1210. case "Title":
  1211. case "Author":
  1212. case "Subject":
  1213. case "Keywords":
  1214. case "Creator":
  1215. case "Producer":
  1216. case "CreationDate":
  1217. case "ModDate":
  1218. if (typeof value === "string") {
  1219. docInfo[key] = stringToPDFString(value);
  1220. continue;
  1221. }
  1222. break;
  1223. case "Trapped":
  1224. if (value instanceof Name) {
  1225. docInfo[key] = value;
  1226. continue;
  1227. }
  1228. break;
  1229. default:
  1230. // For custom values, only accept white-listed types to prevent
  1231. // errors that would occur when trying to send non-serializable
  1232. // objects to the main-thread (for example `Dict` or `Stream`).
  1233. let customValue;
  1234. switch (typeof value) {
  1235. case "string":
  1236. customValue = stringToPDFString(value);
  1237. break;
  1238. case "number":
  1239. case "boolean":
  1240. customValue = value;
  1241. break;
  1242. default:
  1243. if (value instanceof Name) {
  1244. customValue = value;
  1245. }
  1246. break;
  1247. }
  1248. if (customValue === undefined) {
  1249. warn(`Bad value, for custom key "${key}", in Info: ${value}.`);
  1250. continue;
  1251. }
  1252. if (!docInfo.Custom) {
  1253. docInfo.Custom = Object.create(null);
  1254. }
  1255. docInfo.Custom[key] = customValue;
  1256. continue;
  1257. }
  1258. warn(`Bad value, for key "${key}", in Info: ${value}.`);
  1259. }
  1260. return shadow(this, "documentInfo", docInfo);
  1261. }
  1262. get fingerprints() {
  1263. function validate(data) {
  1264. return (
  1265. typeof data === "string" &&
  1266. data.length > 0 &&
  1267. data !== EMPTY_FINGERPRINT
  1268. );
  1269. }
  1270. function hexString(hash) {
  1271. const buf = [];
  1272. for (const num of hash) {
  1273. const hex = num.toString(16);
  1274. buf.push(hex.padStart(2, "0"));
  1275. }
  1276. return buf.join("");
  1277. }
  1278. const idArray = this.xref.trailer.get("ID");
  1279. let hashOriginal, hashModified;
  1280. if (Array.isArray(idArray) && validate(idArray[0])) {
  1281. hashOriginal = stringToBytes(idArray[0]);
  1282. if (idArray[1] !== idArray[0] && validate(idArray[1])) {
  1283. hashModified = stringToBytes(idArray[1]);
  1284. }
  1285. } else {
  1286. hashOriginal = calculateMD5(
  1287. this.stream.getByteRange(0, FINGERPRINT_FIRST_BYTES),
  1288. 0,
  1289. FINGERPRINT_FIRST_BYTES
  1290. );
  1291. }
  1292. return shadow(this, "fingerprints", [
  1293. hexString(hashOriginal),
  1294. hashModified ? hexString(hashModified) : null,
  1295. ]);
  1296. }
  1297. async _getLinearizationPage(pageIndex) {
  1298. const { catalog, linearization, xref } = this;
  1299. if (
  1300. typeof PDFJSDev === "undefined" ||
  1301. PDFJSDev.test("!PRODUCTION || TESTING")
  1302. ) {
  1303. assert(
  1304. linearization && linearization.pageFirst === pageIndex,
  1305. "_getLinearizationPage - invalid pageIndex argument."
  1306. );
  1307. }
  1308. const ref = Ref.get(linearization.objectNumberFirst, 0);
  1309. try {
  1310. const obj = await xref.fetchAsync(ref);
  1311. // Ensure that the object that was found is actually a Page dictionary.
  1312. if (obj instanceof Dict) {
  1313. let type = obj.getRaw("Type");
  1314. if (type instanceof Ref) {
  1315. type = await xref.fetchAsync(type);
  1316. }
  1317. if (isName(type, "Page") || (!obj.has("Type") && !obj.has("Kids"))) {
  1318. if (!catalog.pageKidsCountCache.has(ref)) {
  1319. catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference.
  1320. }
  1321. // Help improve performance of the `Catalog.getPageIndex` method.
  1322. if (!catalog.pageIndexCache.has(ref)) {
  1323. catalog.pageIndexCache.put(ref, 0);
  1324. }
  1325. return [obj, ref];
  1326. }
  1327. }
  1328. throw new FormatError(
  1329. "The Linearization dictionary doesn't point to a valid Page dictionary."
  1330. );
  1331. } catch (reason) {
  1332. warn(`_getLinearizationPage: "${reason.message}".`);
  1333. return catalog.getPageDict(pageIndex);
  1334. }
  1335. }
  1336. getPage(pageIndex) {
  1337. const cachedPromise = this._pagePromises.get(pageIndex);
  1338. if (cachedPromise) {
  1339. return cachedPromise;
  1340. }
  1341. const { catalog, linearization, xfaFactory } = this;
  1342. let promise;
  1343. if (xfaFactory) {
  1344. promise = Promise.resolve([Dict.empty, null]);
  1345. } else if (linearization && linearization.pageFirst === pageIndex) {
  1346. promise = this._getLinearizationPage(pageIndex);
  1347. } else {
  1348. promise = catalog.getPageDict(pageIndex);
  1349. }
  1350. promise = promise.then(([pageDict, ref]) => {
  1351. return new Page({
  1352. pdfManager: this.pdfManager,
  1353. xref: this.xref,
  1354. pageIndex,
  1355. pageDict,
  1356. ref,
  1357. globalIdFactory: this._globalIdFactory,
  1358. fontCache: catalog.fontCache,
  1359. builtInCMapCache: catalog.builtInCMapCache,
  1360. standardFontDataCache: catalog.standardFontDataCache,
  1361. globalImageCache: catalog.globalImageCache,
  1362. nonBlendModesSet: catalog.nonBlendModesSet,
  1363. xfaFactory,
  1364. });
  1365. });
  1366. this._pagePromises.set(pageIndex, promise);
  1367. return promise;
  1368. }
  1369. async checkFirstPage(recoveryMode = false) {
  1370. if (recoveryMode) {
  1371. return;
  1372. }
  1373. try {
  1374. await this.getPage(0);
  1375. } catch (reason) {
  1376. if (reason instanceof XRefEntryException) {
  1377. // Clear out the various caches to ensure that we haven't stored any
  1378. // inconsistent and/or incorrect state, since that could easily break
  1379. // subsequent `this.getPage` calls.
  1380. this._pagePromises.delete(0);
  1381. await this.cleanup();
  1382. throw new XRefParseException();
  1383. }
  1384. }
  1385. }
  1386. async checkLastPage(recoveryMode = false) {
  1387. const { catalog, pdfManager } = this;
  1388. catalog.setActualNumPages(); // Ensure that it's always reset.
  1389. let numPages;
  1390. try {
  1391. await Promise.all([
  1392. pdfManager.ensureDoc("xfaFactory"),
  1393. pdfManager.ensureDoc("linearization"),
  1394. pdfManager.ensureCatalog("numPages"),
  1395. ]);
  1396. if (this.xfaFactory) {
  1397. return; // The Page count is always calculated for XFA-documents.
  1398. } else if (this.linearization) {
  1399. numPages = this.linearization.numPages;
  1400. } else {
  1401. numPages = catalog.numPages;
  1402. }
  1403. if (!Number.isInteger(numPages)) {
  1404. throw new FormatError("Page count is not an integer.");
  1405. } else if (numPages <= 1) {
  1406. return;
  1407. }
  1408. await this.getPage(numPages - 1);
  1409. } catch (reason) {
  1410. // Clear out the various caches to ensure that we haven't stored any
  1411. // inconsistent and/or incorrect state, since that could easily break
  1412. // subsequent `this.getPage` calls.
  1413. this._pagePromises.delete(numPages - 1);
  1414. await this.cleanup();
  1415. if (reason instanceof XRefEntryException && !recoveryMode) {
  1416. throw new XRefParseException();
  1417. }
  1418. warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
  1419. let pagesTree;
  1420. try {
  1421. pagesTree = await catalog.getAllPageDicts(recoveryMode);
  1422. } catch (reasonAll) {
  1423. if (reasonAll instanceof XRefEntryException && !recoveryMode) {
  1424. throw new XRefParseException();
  1425. }
  1426. catalog.setActualNumPages(1);
  1427. return;
  1428. }
  1429. for (const [pageIndex, [pageDict, ref]] of pagesTree) {
  1430. let promise;
  1431. if (pageDict instanceof Error) {
  1432. promise = Promise.reject(pageDict);
  1433. // Prevent "uncaught exception: Object"-messages in the console.
  1434. promise.catch(() => {});
  1435. } else {
  1436. promise = Promise.resolve(
  1437. new Page({
  1438. pdfManager,
  1439. xref: this.xref,
  1440. pageIndex,
  1441. pageDict,
  1442. ref,
  1443. globalIdFactory: this._globalIdFactory,
  1444. fontCache: catalog.fontCache,
  1445. builtInCMapCache: catalog.builtInCMapCache,
  1446. standardFontDataCache: catalog.standardFontDataCache,
  1447. globalImageCache: catalog.globalImageCache,
  1448. nonBlendModesSet: catalog.nonBlendModesSet,
  1449. xfaFactory: null,
  1450. })
  1451. );
  1452. }
  1453. this._pagePromises.set(pageIndex, promise);
  1454. }
  1455. catalog.setActualNumPages(pagesTree.size);
  1456. }
  1457. }
  1458. fontFallback(id, handler) {
  1459. return this.catalog.fontFallback(id, handler);
  1460. }
  1461. async cleanup(manuallyTriggered = false) {
  1462. return this.catalog
  1463. ? this.catalog.cleanup(manuallyTriggered)
  1464. : clearGlobalCaches();
  1465. }
  1466. /**
  1467. * @private
  1468. */
  1469. _collectFieldObjects(name, fieldRef, promises) {
  1470. const field = this.xref.fetchIfRef(fieldRef);
  1471. if (field.has("T")) {
  1472. const partName = stringToPDFString(field.get("T"));
  1473. if (name === "") {
  1474. name = partName;
  1475. } else {
  1476. name = `${name}.${partName}`;
  1477. }
  1478. }
  1479. if (!promises.has(name)) {
  1480. promises.set(name, []);
  1481. }
  1482. promises.get(name).push(
  1483. AnnotationFactory.create(
  1484. this.xref,
  1485. fieldRef,
  1486. this.pdfManager,
  1487. this._localIdFactory,
  1488. /* collectFields */ true
  1489. )
  1490. .then(annotation => annotation && annotation.getFieldObject())
  1491. .catch(function (reason) {
  1492. warn(`_collectFieldObjects: "${reason}".`);
  1493. return null;
  1494. })
  1495. );
  1496. if (field.has("Kids")) {
  1497. const kids = field.get("Kids");
  1498. for (const kid of kids) {
  1499. this._collectFieldObjects(name, kid, promises);
  1500. }
  1501. }
  1502. }
  1503. get fieldObjects() {
  1504. if (!this.formInfo.hasFields) {
  1505. return shadow(this, "fieldObjects", Promise.resolve(null));
  1506. }
  1507. const allFields = Object.create(null);
  1508. const fieldPromises = new Map();
  1509. for (const fieldRef of this.catalog.acroForm.get("Fields")) {
  1510. this._collectFieldObjects("", fieldRef, fieldPromises);
  1511. }
  1512. const allPromises = [];
  1513. for (const [name, promises] of fieldPromises) {
  1514. allPromises.push(
  1515. Promise.all(promises).then(fields => {
  1516. fields = fields.filter(field => !!field);
  1517. if (fields.length > 0) {
  1518. allFields[name] = fields;
  1519. }
  1520. })
  1521. );
  1522. }
  1523. return shadow(
  1524. this,
  1525. "fieldObjects",
  1526. Promise.all(allPromises).then(() => allFields)
  1527. );
  1528. }
  1529. get hasJSActions() {
  1530. const promise = this.pdfManager.ensureDoc("_parseHasJSActions");
  1531. return shadow(this, "hasJSActions", promise);
  1532. }
  1533. /**
  1534. * @private
  1535. */
  1536. async _parseHasJSActions() {
  1537. const [catalogJsActions, fieldObjects] = await Promise.all([
  1538. this.pdfManager.ensureCatalog("jsActions"),
  1539. this.pdfManager.ensureDoc("fieldObjects"),
  1540. ]);
  1541. if (catalogJsActions) {
  1542. return true;
  1543. }
  1544. if (fieldObjects) {
  1545. return Object.values(fieldObjects).some(fieldObject =>
  1546. fieldObject.some(object => object.actions !== null)
  1547. );
  1548. }
  1549. return false;
  1550. }
  1551. get calculationOrderIds() {
  1552. const acroForm = this.catalog.acroForm;
  1553. if (!acroForm || !acroForm.has("CO")) {
  1554. return shadow(this, "calculationOrderIds", null);
  1555. }
  1556. const calculationOrder = acroForm.get("CO");
  1557. if (!Array.isArray(calculationOrder) || calculationOrder.length === 0) {
  1558. return shadow(this, "calculationOrderIds", null);
  1559. }
  1560. const ids = [];
  1561. for (const id of calculationOrder) {
  1562. if (id instanceof Ref) {
  1563. ids.push(id.toString());
  1564. }
  1565. }
  1566. if (ids.length === 0) {
  1567. return shadow(this, "calculationOrderIds", null);
  1568. }
  1569. return shadow(this, "calculationOrderIds", ids);
  1570. }
  1571. }
  1572. export { Page, PDFDocument };