parser.js 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415
  1. /* Copyright 2012 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import {
  16. assert,
  17. bytesToString,
  18. FormatError,
  19. info,
  20. StreamType,
  21. warn,
  22. } from "../shared/util.js";
  23. import { Cmd, Dict, EOF, isCmd, Name, Ref } from "./primitives.js";
  24. import {
  25. isWhiteSpace,
  26. MissingDataException,
  27. ParserEOFException,
  28. } from "./core_utils.js";
  29. import { Ascii85Stream } from "./ascii_85_stream.js";
  30. import { AsciiHexStream } from "./ascii_hex_stream.js";
  31. import { CCITTFaxStream } from "./ccitt_stream.js";
  32. import { FlateStream } from "./flate_stream.js";
  33. import { Jbig2Stream } from "./jbig2_stream.js";
  34. import { JpegStream } from "./jpeg_stream.js";
  35. import { JpxStream } from "./jpx_stream.js";
  36. import { LZWStream } from "./lzw_stream.js";
  37. import { NullStream } from "./stream.js";
  38. import { PredictorStream } from "./predictor_stream.js";
  39. import { RunLengthStream } from "./run_length_stream.js";
  40. const MAX_LENGTH_TO_CACHE = 1000;
  41. function getInlineImageCacheKey(bytes) {
  42. const strBuf = [],
  43. ii = bytes.length;
  44. let i = 0;
  45. while (i < ii - 1) {
  46. strBuf.push((bytes[i++] << 8) | bytes[i++]);
  47. }
  48. // Handle an odd number of elements.
  49. if (i < ii) {
  50. strBuf.push(bytes[i]);
  51. }
  52. // We purposely include the "raw" length in the cacheKey, to prevent any
  53. // possible issues with hash collisions in the inline image cache.
  54. // Here we also assume that `strBuf` is never larger than 8192 elements,
  55. // please refer to the `bytesToString` implementation.
  56. return ii + "_" + String.fromCharCode.apply(null, strBuf);
  57. }
  58. class Parser {
  59. constructor({ lexer, xref, allowStreams = false, recoveryMode = false }) {
  60. this.lexer = lexer;
  61. this.xref = xref;
  62. this.allowStreams = allowStreams;
  63. this.recoveryMode = recoveryMode;
  64. this.imageCache = Object.create(null);
  65. this._imageId = 0;
  66. this.refill();
  67. }
  68. refill() {
  69. this.buf1 = this.lexer.getObj();
  70. this.buf2 = this.lexer.getObj();
  71. }
  72. shift() {
  73. if (this.buf2 instanceof Cmd && this.buf2.cmd === "ID") {
  74. this.buf1 = this.buf2;
  75. this.buf2 = null;
  76. } else {
  77. this.buf1 = this.buf2;
  78. this.buf2 = this.lexer.getObj();
  79. }
  80. }
  81. tryShift() {
  82. try {
  83. this.shift();
  84. return true;
  85. } catch (e) {
  86. if (e instanceof MissingDataException) {
  87. throw e;
  88. }
  89. // Upon failure, the caller should reset this.lexer.pos to a known good
  90. // state and call this.shift() twice to reset the buffers.
  91. return false;
  92. }
  93. }
  94. getObj(cipherTransform = null) {
  95. const buf1 = this.buf1;
  96. this.shift();
  97. if (buf1 instanceof Cmd) {
  98. switch (buf1.cmd) {
  99. case "BI": // inline image
  100. return this.makeInlineImage(cipherTransform);
  101. case "[": // array
  102. const array = [];
  103. while (!isCmd(this.buf1, "]") && this.buf1 !== EOF) {
  104. array.push(this.getObj(cipherTransform));
  105. }
  106. if (this.buf1 === EOF) {
  107. if (this.recoveryMode) {
  108. return array;
  109. }
  110. throw new ParserEOFException("End of file inside array.");
  111. }
  112. this.shift();
  113. return array;
  114. case "<<": // dictionary or stream
  115. const dict = new Dict(this.xref);
  116. while (!isCmd(this.buf1, ">>") && this.buf1 !== EOF) {
  117. if (!(this.buf1 instanceof Name)) {
  118. info("Malformed dictionary: key must be a name object");
  119. this.shift();
  120. continue;
  121. }
  122. const key = this.buf1.name;
  123. this.shift();
  124. if (this.buf1 === EOF) {
  125. break;
  126. }
  127. dict.set(key, this.getObj(cipherTransform));
  128. }
  129. if (this.buf1 === EOF) {
  130. if (this.recoveryMode) {
  131. return dict;
  132. }
  133. throw new ParserEOFException("End of file inside dictionary.");
  134. }
  135. // Stream objects are not allowed inside content streams or
  136. // object streams.
  137. if (isCmd(this.buf2, "stream")) {
  138. return this.allowStreams
  139. ? this.makeStream(dict, cipherTransform)
  140. : dict;
  141. }
  142. this.shift();
  143. return dict;
  144. default: // simple object
  145. return buf1;
  146. }
  147. }
  148. if (Number.isInteger(buf1)) {
  149. // indirect reference or integer
  150. if (Number.isInteger(this.buf1) && isCmd(this.buf2, "R")) {
  151. const ref = Ref.get(buf1, this.buf1);
  152. this.shift();
  153. this.shift();
  154. return ref;
  155. }
  156. return buf1;
  157. }
  158. if (typeof buf1 === "string") {
  159. if (cipherTransform) {
  160. return cipherTransform.decryptString(buf1);
  161. }
  162. return buf1;
  163. }
  164. // simple object
  165. return buf1;
  166. }
  167. /**
  168. * Find the end of the stream by searching for the /EI\s/.
  169. * @returns {number} The inline stream length.
  170. */
  171. findDefaultInlineStreamEnd(stream) {
  172. const E = 0x45,
  173. I = 0x49,
  174. SPACE = 0x20,
  175. LF = 0xa,
  176. CR = 0xd,
  177. NUL = 0x0;
  178. const lexer = this.lexer,
  179. startPos = stream.pos,
  180. n = 10;
  181. let state = 0,
  182. ch,
  183. maybeEIPos;
  184. while ((ch = stream.getByte()) !== -1) {
  185. if (state === 0) {
  186. state = ch === E ? 1 : 0;
  187. } else if (state === 1) {
  188. state = ch === I ? 2 : 0;
  189. } else {
  190. if (
  191. typeof PDFJSDev === "undefined" ||
  192. PDFJSDev.test("!PRODUCTION || TESTING")
  193. ) {
  194. assert(state === 2, "findDefaultInlineStreamEnd - invalid state.");
  195. }
  196. if (ch === SPACE || ch === LF || ch === CR) {
  197. maybeEIPos = stream.pos;
  198. // Let's check that the next `n` bytes are ASCII... just to be sure.
  199. const followingBytes = stream.peekBytes(n);
  200. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  201. ch = followingBytes[i];
  202. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  203. // NUL bytes are not supposed to occur *outside* of inline
  204. // images, but some PDF generators violate that assumption,
  205. // thus breaking the EI detection heuristics used below.
  206. //
  207. // However, we can't unconditionally treat NUL bytes as "ASCII",
  208. // since that *could* result in inline images being truncated.
  209. //
  210. // To attempt to address this, we'll still treat any *sequence*
  211. // of NUL bytes as non-ASCII, but for a *single* NUL byte we'll
  212. // continue checking the `followingBytes` (fixes issue8823.pdf).
  213. continue;
  214. }
  215. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  216. // Not a LF, CR, SPACE or any visible ASCII character, i.e.
  217. // it's binary stuff. Resetting the state.
  218. state = 0;
  219. break;
  220. }
  221. }
  222. if (state !== 2) {
  223. continue;
  224. }
  225. // Check that the "EI" sequence isn't part of the image data, since
  226. // that would cause the image to be truncated (fixes issue11124.pdf).
  227. if (lexer.knownCommands) {
  228. const nextObj = lexer.peekObj();
  229. if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
  230. // Not a valid command, i.e. the inline image data *itself*
  231. // contains an "EI" sequence. Resetting the state.
  232. state = 0;
  233. }
  234. } else {
  235. warn(
  236. "findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
  237. );
  238. }
  239. if (state === 2) {
  240. break; // Finished!
  241. }
  242. } else {
  243. state = 0;
  244. }
  245. }
  246. }
  247. if (ch === -1) {
  248. warn(
  249. "findDefaultInlineStreamEnd: " +
  250. "Reached the end of the stream without finding a valid EI marker"
  251. );
  252. if (maybeEIPos) {
  253. warn('... trying to recover by using the last "EI" occurrence.');
  254. stream.skip(-(stream.pos - maybeEIPos)); // Reset the stream position.
  255. }
  256. }
  257. let endOffset = 4;
  258. stream.skip(-endOffset); // Set the stream position to just before "EI".
  259. ch = stream.peekByte();
  260. stream.skip(endOffset); // ... and remember to reset the stream position.
  261. // Ensure that we don't accidentally truncate the inline image, when the
  262. // data is immediately followed by the "EI" marker (fixes issue10388.pdf).
  263. if (!isWhiteSpace(ch)) {
  264. endOffset--;
  265. }
  266. return stream.pos - endOffset - startPos;
  267. }
  268. /**
  269. * Find the EOI (end-of-image) marker 0xFFD9 of the stream.
  270. * @returns {number} The inline stream length.
  271. */
  272. findDCTDecodeInlineStreamEnd(stream) {
  273. const startPos = stream.pos;
  274. let foundEOI = false,
  275. b,
  276. markerLength;
  277. while ((b = stream.getByte()) !== -1) {
  278. if (b !== 0xff) {
  279. // Not a valid marker.
  280. continue;
  281. }
  282. switch (stream.getByte()) {
  283. case 0x00: // Byte stuffing.
  284. // 0xFF00 appears to be a very common byte sequence in JPEG images.
  285. break;
  286. case 0xff: // Fill byte.
  287. // Avoid skipping a valid marker, resetting the stream position.
  288. stream.skip(-1);
  289. break;
  290. case 0xd9: // EOI
  291. foundEOI = true;
  292. break;
  293. case 0xc0: // SOF0
  294. case 0xc1: // SOF1
  295. case 0xc2: // SOF2
  296. case 0xc3: // SOF3
  297. /* falls through */
  298. case 0xc5: // SOF5
  299. case 0xc6: // SOF6
  300. case 0xc7: // SOF7
  301. /* falls through */
  302. case 0xc9: // SOF9
  303. case 0xca: // SOF10
  304. case 0xcb: // SOF11
  305. /* falls through */
  306. case 0xcd: // SOF13
  307. case 0xce: // SOF14
  308. case 0xcf: // SOF15
  309. /* falls through */
  310. case 0xc4: // DHT
  311. case 0xcc: // DAC
  312. /* falls through */
  313. case 0xda: // SOS
  314. case 0xdb: // DQT
  315. case 0xdc: // DNL
  316. case 0xdd: // DRI
  317. case 0xde: // DHP
  318. case 0xdf: // EXP
  319. /* falls through */
  320. case 0xe0: // APP0
  321. case 0xe1: // APP1
  322. case 0xe2: // APP2
  323. case 0xe3: // APP3
  324. case 0xe4: // APP4
  325. case 0xe5: // APP5
  326. case 0xe6: // APP6
  327. case 0xe7: // APP7
  328. case 0xe8: // APP8
  329. case 0xe9: // APP9
  330. case 0xea: // APP10
  331. case 0xeb: // APP11
  332. case 0xec: // APP12
  333. case 0xed: // APP13
  334. case 0xee: // APP14
  335. case 0xef: // APP15
  336. /* falls through */
  337. case 0xfe: // COM
  338. // The marker should be followed by the length of the segment.
  339. markerLength = stream.getUint16();
  340. if (markerLength > 2) {
  341. // |markerLength| contains the byte length of the marker segment,
  342. // including its own length (2 bytes) and excluding the marker.
  343. stream.skip(markerLength - 2); // Jump to the next marker.
  344. } else {
  345. // The marker length is invalid, resetting the stream position.
  346. stream.skip(-2);
  347. }
  348. break;
  349. }
  350. if (foundEOI) {
  351. break;
  352. }
  353. }
  354. const length = stream.pos - startPos;
  355. if (b === -1) {
  356. warn(
  357. "Inline DCTDecode image stream: " +
  358. "EOI marker not found, searching for /EI/ instead."
  359. );
  360. stream.skip(-length); // Reset the stream position.
  361. return this.findDefaultInlineStreamEnd(stream);
  362. }
  363. this.inlineStreamSkipEI(stream);
  364. return length;
  365. }
  366. /**
  367. * Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
  368. * @returns {number} The inline stream length.
  369. */
  370. findASCII85DecodeInlineStreamEnd(stream) {
  371. const TILDE = 0x7e,
  372. GT = 0x3e;
  373. const startPos = stream.pos;
  374. let ch;
  375. while ((ch = stream.getByte()) !== -1) {
  376. if (ch === TILDE) {
  377. const tildePos = stream.pos;
  378. ch = stream.peekByte();
  379. // Handle corrupt PDF documents which contains whitespace "inside" of
  380. // the EOD marker (fixes issue10614.pdf).
  381. while (isWhiteSpace(ch)) {
  382. stream.skip();
  383. ch = stream.peekByte();
  384. }
  385. if (ch === GT) {
  386. stream.skip();
  387. break;
  388. }
  389. // Handle corrupt PDF documents which contains truncated EOD markers,
  390. // where the '>' character is missing (fixes issue11385.pdf).
  391. if (stream.pos > tildePos) {
  392. const maybeEI = stream.peekBytes(2);
  393. if (maybeEI[0] === /* E = */ 0x45 && maybeEI[1] === /* I = */ 0x49) {
  394. break;
  395. }
  396. }
  397. }
  398. }
  399. const length = stream.pos - startPos;
  400. if (ch === -1) {
  401. warn(
  402. "Inline ASCII85Decode image stream: " +
  403. "EOD marker not found, searching for /EI/ instead."
  404. );
  405. stream.skip(-length); // Reset the stream position.
  406. return this.findDefaultInlineStreamEnd(stream);
  407. }
  408. this.inlineStreamSkipEI(stream);
  409. return length;
  410. }
  411. /**
  412. * Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
  413. * @returns {number} The inline stream length.
  414. */
  415. findASCIIHexDecodeInlineStreamEnd(stream) {
  416. const GT = 0x3e;
  417. const startPos = stream.pos;
  418. let ch;
  419. while ((ch = stream.getByte()) !== -1) {
  420. if (ch === GT) {
  421. break;
  422. }
  423. }
  424. const length = stream.pos - startPos;
  425. if (ch === -1) {
  426. warn(
  427. "Inline ASCIIHexDecode image stream: " +
  428. "EOD marker not found, searching for /EI/ instead."
  429. );
  430. stream.skip(-length); // Reset the stream position.
  431. return this.findDefaultInlineStreamEnd(stream);
  432. }
  433. this.inlineStreamSkipEI(stream);
  434. return length;
  435. }
  436. /**
  437. * Skip over the /EI/ for streams where we search for an EOD marker.
  438. */
  439. inlineStreamSkipEI(stream) {
  440. const E = 0x45,
  441. I = 0x49;
  442. let state = 0,
  443. ch;
  444. while ((ch = stream.getByte()) !== -1) {
  445. if (state === 0) {
  446. state = ch === E ? 1 : 0;
  447. } else if (state === 1) {
  448. state = ch === I ? 2 : 0;
  449. } else if (state === 2) {
  450. break;
  451. }
  452. }
  453. }
  454. makeInlineImage(cipherTransform) {
  455. const lexer = this.lexer;
  456. const stream = lexer.stream;
  457. // Parse dictionary, but initialize it lazily to improve performance with
  458. // cached inline images (see issue 2618).
  459. const dictMap = Object.create(null);
  460. let dictLength;
  461. while (!isCmd(this.buf1, "ID") && this.buf1 !== EOF) {
  462. if (!(this.buf1 instanceof Name)) {
  463. throw new FormatError("Dictionary key must be a name object");
  464. }
  465. const key = this.buf1.name;
  466. this.shift();
  467. if (this.buf1 === EOF) {
  468. break;
  469. }
  470. dictMap[key] = this.getObj(cipherTransform);
  471. }
  472. if (lexer.beginInlineImagePos !== -1) {
  473. dictLength = stream.pos - lexer.beginInlineImagePos;
  474. }
  475. // Extract the name of the first (i.e. the current) image filter.
  476. const filter = this.xref.fetchIfRef(dictMap.F || dictMap.Filter);
  477. let filterName;
  478. if (filter instanceof Name) {
  479. filterName = filter.name;
  480. } else if (Array.isArray(filter)) {
  481. const filterZero = this.xref.fetchIfRef(filter[0]);
  482. if (filterZero instanceof Name) {
  483. filterName = filterZero.name;
  484. }
  485. }
  486. // Parse image stream.
  487. const startPos = stream.pos;
  488. let length;
  489. switch (filterName) {
  490. case "DCT":
  491. case "DCTDecode":
  492. length = this.findDCTDecodeInlineStreamEnd(stream);
  493. break;
  494. case "A85":
  495. case "ASCII85Decode":
  496. length = this.findASCII85DecodeInlineStreamEnd(stream);
  497. break;
  498. case "AHx":
  499. case "ASCIIHexDecode":
  500. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  501. break;
  502. default:
  503. length = this.findDefaultInlineStreamEnd(stream);
  504. }
  505. // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
  506. // stringified content, to prevent possible hash collisions.
  507. let cacheKey;
  508. if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) {
  509. const initialStreamPos = stream.pos;
  510. // Set the stream position to the beginning of the dictionary data...
  511. stream.pos = lexer.beginInlineImagePos;
  512. // ... and fetch the bytes of the dictionary *and* the inline image.
  513. cacheKey = getInlineImageCacheKey(stream.getBytes(dictLength + length));
  514. // Finally, don't forget to reset the stream position.
  515. stream.pos = initialStreamPos;
  516. const cacheEntry = this.imageCache[cacheKey];
  517. if (cacheEntry !== undefined) {
  518. this.buf2 = Cmd.get("EI");
  519. this.shift();
  520. cacheEntry.reset();
  521. return cacheEntry;
  522. }
  523. }
  524. const dict = new Dict(this.xref);
  525. for (const key in dictMap) {
  526. dict.set(key, dictMap[key]);
  527. }
  528. let imageStream = stream.makeSubStream(startPos, length, dict);
  529. if (cipherTransform) {
  530. imageStream = cipherTransform.createStream(imageStream, length);
  531. }
  532. imageStream = this.filter(imageStream, dict, length);
  533. imageStream.dict = dict;
  534. if (cacheKey !== undefined) {
  535. imageStream.cacheKey = `inline_img_${++this._imageId}`;
  536. this.imageCache[cacheKey] = imageStream;
  537. }
  538. this.buf2 = Cmd.get("EI");
  539. this.shift();
  540. return imageStream;
  541. }
  542. _findStreamLength(startPos, signature) {
  543. const { stream } = this.lexer;
  544. stream.pos = startPos;
  545. const SCAN_BLOCK_LENGTH = 2048;
  546. const signatureLength = signature.length;
  547. while (stream.pos < stream.end) {
  548. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  549. const scanLength = scanBytes.length - signatureLength;
  550. if (scanLength <= 0) {
  551. break;
  552. }
  553. let pos = 0;
  554. while (pos < scanLength) {
  555. let j = 0;
  556. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  557. j++;
  558. }
  559. if (j >= signatureLength) {
  560. // `signature` found.
  561. stream.pos += pos;
  562. return stream.pos - startPos;
  563. }
  564. pos++;
  565. }
  566. stream.pos += scanLength;
  567. }
  568. return -1;
  569. }
  570. makeStream(dict, cipherTransform) {
  571. const lexer = this.lexer;
  572. let stream = lexer.stream;
  573. // Get the stream's start position.
  574. lexer.skipToNextLine();
  575. const startPos = stream.pos - 1;
  576. // Get the length.
  577. let length = dict.get("Length");
  578. if (!Number.isInteger(length)) {
  579. info(`Bad length "${length && length.toString()}" in stream.`);
  580. length = 0;
  581. }
  582. // Skip over the stream data.
  583. stream.pos = startPos + length;
  584. lexer.nextChar();
  585. // Shift '>>' and check whether the new object marks the end of the stream.
  586. if (this.tryShift() && isCmd(this.buf2, "endstream")) {
  587. this.shift(); // 'stream'
  588. } else {
  589. // Bad stream length, scanning for endstream command.
  590. const ENDSTREAM_SIGNATURE = new Uint8Array([
  591. 0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d,
  592. ]);
  593. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  594. if (actualLength < 0) {
  595. // Only allow limited truncation of the endstream signature,
  596. // to prevent false positives.
  597. const MAX_TRUNCATION = 1;
  598. // Check if the PDF generator included truncated endstream commands,
  599. // such as e.g. "endstrea" (fixes issue10004.pdf).
  600. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  601. const end = ENDSTREAM_SIGNATURE.length - i;
  602. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  603. const maybeLength = this._findStreamLength(
  604. startPos,
  605. TRUNCATED_SIGNATURE
  606. );
  607. if (maybeLength >= 0) {
  608. // Ensure that the byte immediately following the truncated
  609. // endstream command is a space, to prevent false positives.
  610. const lastByte = stream.peekBytes(end + 1)[end];
  611. if (!isWhiteSpace(lastByte)) {
  612. break;
  613. }
  614. info(
  615. `Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
  616. "searching for endstream command."
  617. );
  618. actualLength = maybeLength;
  619. break;
  620. }
  621. }
  622. if (actualLength < 0) {
  623. throw new FormatError("Missing endstream command.");
  624. }
  625. }
  626. length = actualLength;
  627. lexer.nextChar();
  628. this.shift();
  629. this.shift();
  630. }
  631. this.shift(); // 'endstream'
  632. stream = stream.makeSubStream(startPos, length, dict);
  633. if (cipherTransform) {
  634. stream = cipherTransform.createStream(stream, length);
  635. }
  636. stream = this.filter(stream, dict, length);
  637. stream.dict = dict;
  638. return stream;
  639. }
  640. filter(stream, dict, length) {
  641. let filter = dict.get("F", "Filter");
  642. let params = dict.get("DP", "DecodeParms");
  643. if (filter instanceof Name) {
  644. if (Array.isArray(params)) {
  645. warn("/DecodeParms should not be an Array, when /Filter is a Name.");
  646. }
  647. return this.makeFilter(stream, filter.name, length, params);
  648. }
  649. let maybeLength = length;
  650. if (Array.isArray(filter)) {
  651. const filterArray = filter;
  652. const paramsArray = params;
  653. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  654. filter = this.xref.fetchIfRef(filterArray[i]);
  655. if (!(filter instanceof Name)) {
  656. throw new FormatError(`Bad filter name "${filter}"`);
  657. }
  658. params = null;
  659. if (Array.isArray(paramsArray) && i in paramsArray) {
  660. params = this.xref.fetchIfRef(paramsArray[i]);
  661. }
  662. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  663. // After the first stream the `length` variable is invalid.
  664. maybeLength = null;
  665. }
  666. }
  667. return stream;
  668. }
  669. makeFilter(stream, name, maybeLength, params) {
  670. // Since the 'Length' entry in the stream dictionary can be completely
  671. // wrong, e.g. zero for non-empty streams, only skip parsing the stream
  672. // when we can be absolutely certain that it actually is empty.
  673. if (maybeLength === 0) {
  674. warn(`Empty "${name}" stream.`);
  675. return new NullStream();
  676. }
  677. const xrefStats = this.xref.stats;
  678. try {
  679. switch (name) {
  680. case "Fl":
  681. case "FlateDecode":
  682. xrefStats.addStreamType(StreamType.FLATE);
  683. if (params) {
  684. return new PredictorStream(
  685. new FlateStream(stream, maybeLength),
  686. maybeLength,
  687. params
  688. );
  689. }
  690. return new FlateStream(stream, maybeLength);
  691. case "LZW":
  692. case "LZWDecode":
  693. xrefStats.addStreamType(StreamType.LZW);
  694. let earlyChange = 1;
  695. if (params) {
  696. if (params.has("EarlyChange")) {
  697. earlyChange = params.get("EarlyChange");
  698. }
  699. return new PredictorStream(
  700. new LZWStream(stream, maybeLength, earlyChange),
  701. maybeLength,
  702. params
  703. );
  704. }
  705. return new LZWStream(stream, maybeLength, earlyChange);
  706. case "DCT":
  707. case "DCTDecode":
  708. xrefStats.addStreamType(StreamType.DCT);
  709. return new JpegStream(stream, maybeLength, params);
  710. case "JPX":
  711. case "JPXDecode":
  712. xrefStats.addStreamType(StreamType.JPX);
  713. return new JpxStream(stream, maybeLength, params);
  714. case "A85":
  715. case "ASCII85Decode":
  716. xrefStats.addStreamType(StreamType.A85);
  717. return new Ascii85Stream(stream, maybeLength);
  718. case "AHx":
  719. case "ASCIIHexDecode":
  720. xrefStats.addStreamType(StreamType.AHX);
  721. return new AsciiHexStream(stream, maybeLength);
  722. case "CCF":
  723. case "CCITTFaxDecode":
  724. xrefStats.addStreamType(StreamType.CCF);
  725. return new CCITTFaxStream(stream, maybeLength, params);
  726. case "RL":
  727. case "RunLengthDecode":
  728. xrefStats.addStreamType(StreamType.RLX);
  729. return new RunLengthStream(stream, maybeLength);
  730. case "JBIG2Decode":
  731. xrefStats.addStreamType(StreamType.JBIG);
  732. return new Jbig2Stream(stream, maybeLength, params);
  733. }
  734. warn(`Filter "${name}" is not supported.`);
  735. return stream;
  736. } catch (ex) {
  737. if (ex instanceof MissingDataException) {
  738. throw ex;
  739. }
  740. warn(`Invalid stream: "${ex}"`);
  741. return new NullStream();
  742. }
  743. }
  744. }
  745. // A '1' in this array means the character is white space. A '1' or
  746. // '2' means the character ends a name or command.
  747. // prettier-ignore
  748. const specialChars = [
  749. 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
  750. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
  751. 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
  752. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
  753. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
  754. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
  755. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
  756. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
  757. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
  758. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
  759. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
  760. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
  761. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
  762. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
  763. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
  764. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
  765. ];
  766. function toHexDigit(ch) {
  767. if (ch >= /* '0' = */ 0x30 && ch /* '9' = */ <= 0x39) {
  768. return ch & 0x0f;
  769. }
  770. if (
  771. (ch >= /* 'A' = */ 0x41 && ch <= /* 'F' = */ 0x46) ||
  772. (ch >= /* 'a' = */ 0x61 && ch <= /* 'f' = */ 0x66)
  773. ) {
  774. return (ch & 0x0f) + 9;
  775. }
  776. return -1;
  777. }
  778. class Lexer {
  779. constructor(stream, knownCommands = null) {
  780. this.stream = stream;
  781. this.nextChar();
  782. // While lexing, we build up many strings one char at a time. Using += for
  783. // this can result in lots of garbage strings. It's better to build an
  784. // array of single-char strings and then join() them together at the end.
  785. // And reusing a single array (i.e. |this.strBuf|) over and over for this
  786. // purpose uses less memory than using a new array for each string.
  787. this.strBuf = [];
  788. // The PDFs might have "glued" commands with other commands, operands or
  789. // literals, e.g. "q1". The knownCommands is a dictionary of the valid
  790. // commands and their prefixes. The prefixes are built the following way:
  791. // if there a command that is a prefix of the other valid command or
  792. // literal (e.g. 'f' and 'false') the following prefixes must be included,
  793. // 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
  794. // other commands or literals as a prefix. The knowCommands is optional.
  795. this.knownCommands = knownCommands;
  796. this._hexStringNumWarn = 0;
  797. this.beginInlineImagePos = -1;
  798. }
  799. nextChar() {
  800. return (this.currentChar = this.stream.getByte());
  801. }
  802. peekChar() {
  803. return this.stream.peekByte();
  804. }
  805. getNumber() {
  806. let ch = this.currentChar;
  807. let eNotation = false;
  808. let divideBy = 0; // Different from 0 if it's a floating point value.
  809. let sign = 0;
  810. if (ch === /* '-' = */ 0x2d) {
  811. sign = -1;
  812. ch = this.nextChar();
  813. if (ch === /* '-' = */ 0x2d) {
  814. // Ignore double negative (this is consistent with Adobe Reader).
  815. ch = this.nextChar();
  816. }
  817. } else if (ch === /* '+' = */ 0x2b) {
  818. sign = 1;
  819. ch = this.nextChar();
  820. }
  821. if (ch === /* LF = */ 0x0a || ch === /* CR = */ 0x0d) {
  822. // Ignore line-breaks (this is consistent with Adobe Reader).
  823. do {
  824. ch = this.nextChar();
  825. } while (ch === 0x0a || ch === 0x0d);
  826. }
  827. if (ch === /* '.' = */ 0x2e) {
  828. divideBy = 10;
  829. ch = this.nextChar();
  830. }
  831. if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
  832. const msg = `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`;
  833. if (isWhiteSpace(ch) || ch === /* EOF = */ -1) {
  834. // This is consistent with Adobe Reader (fixes issue9252.pdf,
  835. // issue15604.pdf, bug1753983.pdf).
  836. info(`Lexer.getNumber - "${msg}".`);
  837. return 0;
  838. }
  839. throw new FormatError(msg);
  840. }
  841. sign = sign || 1;
  842. let baseValue = ch - 0x30; // '0'
  843. let powerValue = 0;
  844. let powerValueSign = 1;
  845. while ((ch = this.nextChar()) >= 0) {
  846. if (ch >= /* '0' = */ 0x30 && ch <= /* '9' = */ 0x39) {
  847. const currentDigit = ch - 0x30; // '0'
  848. if (eNotation) {
  849. // We are after an 'e' or 'E'.
  850. powerValue = powerValue * 10 + currentDigit;
  851. } else {
  852. if (divideBy !== 0) {
  853. // We are after a point.
  854. divideBy *= 10;
  855. }
  856. baseValue = baseValue * 10 + currentDigit;
  857. }
  858. } else if (ch === /* '.' = */ 0x2e) {
  859. if (divideBy === 0) {
  860. divideBy = 1;
  861. } else {
  862. // A number can have only one dot.
  863. break;
  864. }
  865. } else if (ch === /* '-' = */ 0x2d) {
  866. // Ignore minus signs in the middle of numbers to match
  867. // Adobe's behavior.
  868. warn("Badly formatted number: minus sign in the middle");
  869. } else if (ch === /* 'E' = */ 0x45 || ch === /* 'e' = */ 0x65) {
  870. // 'E' can be either a scientific notation or the beginning of a new
  871. // operator.
  872. ch = this.peekChar();
  873. if (ch === /* '+' = */ 0x2b || ch === /* '-' = */ 0x2d) {
  874. powerValueSign = ch === 0x2d ? -1 : 1;
  875. this.nextChar(); // Consume the sign character.
  876. } else if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
  877. // The 'E' must be the beginning of a new operator.
  878. break;
  879. }
  880. eNotation = true;
  881. } else {
  882. // The last character doesn't belong to us.
  883. break;
  884. }
  885. }
  886. if (divideBy !== 0) {
  887. baseValue /= divideBy;
  888. }
  889. if (eNotation) {
  890. baseValue *= 10 ** (powerValueSign * powerValue);
  891. }
  892. return sign * baseValue;
  893. }
  894. getString() {
  895. let numParen = 1;
  896. let done = false;
  897. const strBuf = this.strBuf;
  898. strBuf.length = 0;
  899. let ch = this.nextChar();
  900. while (true) {
  901. let charBuffered = false;
  902. switch (ch | 0) {
  903. case -1:
  904. warn("Unterminated string");
  905. done = true;
  906. break;
  907. case 0x28: // '('
  908. ++numParen;
  909. strBuf.push("(");
  910. break;
  911. case 0x29: // ')'
  912. if (--numParen === 0) {
  913. this.nextChar(); // consume strings ')'
  914. done = true;
  915. } else {
  916. strBuf.push(")");
  917. }
  918. break;
  919. case 0x5c: // '\\'
  920. ch = this.nextChar();
  921. switch (ch) {
  922. case -1:
  923. warn("Unterminated string");
  924. done = true;
  925. break;
  926. case 0x6e: // 'n'
  927. strBuf.push("\n");
  928. break;
  929. case 0x72: // 'r'
  930. strBuf.push("\r");
  931. break;
  932. case 0x74: // 't'
  933. strBuf.push("\t");
  934. break;
  935. case 0x62: // 'b'
  936. strBuf.push("\b");
  937. break;
  938. case 0x66: // 'f'
  939. strBuf.push("\f");
  940. break;
  941. case 0x5c: // '\'
  942. case 0x28: // '('
  943. case 0x29: // ')'
  944. strBuf.push(String.fromCharCode(ch));
  945. break;
  946. case 0x30: // '0'
  947. case 0x31: // '1'
  948. case 0x32: // '2'
  949. case 0x33: // '3'
  950. case 0x34: // '4'
  951. case 0x35: // '5'
  952. case 0x36: // '6'
  953. case 0x37: // '7'
  954. let x = ch & 0x0f;
  955. ch = this.nextChar();
  956. charBuffered = true;
  957. if (ch >= /* '0' = */ 0x30 && ch <= /* '7' = */ 0x37) {
  958. x = (x << 3) + (ch & 0x0f);
  959. ch = this.nextChar();
  960. if (ch >= /* '0' = */ 0x30 && ch /* '7' = */ <= 0x37) {
  961. charBuffered = false;
  962. x = (x << 3) + (ch & 0x0f);
  963. }
  964. }
  965. strBuf.push(String.fromCharCode(x));
  966. break;
  967. case 0x0d: // CR
  968. if (this.peekChar() === /* LF = */ 0x0a) {
  969. this.nextChar();
  970. }
  971. break;
  972. case 0x0a: // LF
  973. break;
  974. default:
  975. strBuf.push(String.fromCharCode(ch));
  976. break;
  977. }
  978. break;
  979. default:
  980. strBuf.push(String.fromCharCode(ch));
  981. break;
  982. }
  983. if (done) {
  984. break;
  985. }
  986. if (!charBuffered) {
  987. ch = this.nextChar();
  988. }
  989. }
  990. return strBuf.join("");
  991. }
  992. getName() {
  993. let ch, previousCh;
  994. const strBuf = this.strBuf;
  995. strBuf.length = 0;
  996. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  997. if (ch === /* '#' = */ 0x23) {
  998. ch = this.nextChar();
  999. if (specialChars[ch]) {
  1000. warn(
  1001. "Lexer_getName: " +
  1002. "NUMBER SIGN (#) should be followed by a hexadecimal number."
  1003. );
  1004. strBuf.push("#");
  1005. break;
  1006. }
  1007. const x = toHexDigit(ch);
  1008. if (x !== -1) {
  1009. previousCh = ch;
  1010. ch = this.nextChar();
  1011. const x2 = toHexDigit(ch);
  1012. if (x2 === -1) {
  1013. warn(
  1014. `Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
  1015. "in hexadecimal number."
  1016. );
  1017. strBuf.push("#", String.fromCharCode(previousCh));
  1018. if (specialChars[ch]) {
  1019. break;
  1020. }
  1021. strBuf.push(String.fromCharCode(ch));
  1022. continue;
  1023. }
  1024. strBuf.push(String.fromCharCode((x << 4) | x2));
  1025. } else {
  1026. strBuf.push("#", String.fromCharCode(ch));
  1027. }
  1028. } else {
  1029. strBuf.push(String.fromCharCode(ch));
  1030. }
  1031. }
  1032. if (strBuf.length > 127) {
  1033. warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  1034. }
  1035. return Name.get(strBuf.join(""));
  1036. }
  1037. /**
  1038. * @private
  1039. */
  1040. _hexStringWarn(ch) {
  1041. const MAX_HEX_STRING_NUM_WARN = 5;
  1042. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  1043. warn("getHexString - ignoring additional invalid characters.");
  1044. return;
  1045. }
  1046. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  1047. // Limit the number of warning messages printed for a `this.getHexString`
  1048. // invocation, since corrupt PDF documents may otherwise spam the console
  1049. // enough to affect general performance negatively.
  1050. return;
  1051. }
  1052. warn(`getHexString - ignoring invalid character: ${ch}`);
  1053. }
  1054. getHexString() {
  1055. const strBuf = this.strBuf;
  1056. strBuf.length = 0;
  1057. let ch = this.currentChar;
  1058. let isFirstHex = true;
  1059. let firstDigit, secondDigit;
  1060. this._hexStringNumWarn = 0;
  1061. while (true) {
  1062. if (ch < 0) {
  1063. warn("Unterminated hex string");
  1064. break;
  1065. } else if (ch === /* '>' = */ 0x3e) {
  1066. this.nextChar();
  1067. break;
  1068. } else if (specialChars[ch] === 1) {
  1069. ch = this.nextChar();
  1070. continue;
  1071. } else {
  1072. if (isFirstHex) {
  1073. firstDigit = toHexDigit(ch);
  1074. if (firstDigit === -1) {
  1075. this._hexStringWarn(ch);
  1076. ch = this.nextChar();
  1077. continue;
  1078. }
  1079. } else {
  1080. secondDigit = toHexDigit(ch);
  1081. if (secondDigit === -1) {
  1082. this._hexStringWarn(ch);
  1083. ch = this.nextChar();
  1084. continue;
  1085. }
  1086. strBuf.push(String.fromCharCode((firstDigit << 4) | secondDigit));
  1087. }
  1088. isFirstHex = !isFirstHex;
  1089. ch = this.nextChar();
  1090. }
  1091. }
  1092. return strBuf.join("");
  1093. }
  1094. getObj() {
  1095. // Skip whitespace and comments.
  1096. let comment = false;
  1097. let ch = this.currentChar;
  1098. while (true) {
  1099. if (ch < 0) {
  1100. return EOF;
  1101. }
  1102. if (comment) {
  1103. if (ch === /* LF = */ 0x0a || ch === /* CR = */ 0x0d) {
  1104. comment = false;
  1105. }
  1106. } else if (ch === /* '%' = */ 0x25) {
  1107. comment = true;
  1108. } else if (specialChars[ch] !== 1) {
  1109. break;
  1110. }
  1111. ch = this.nextChar();
  1112. }
  1113. // Start reading a token.
  1114. switch (ch | 0) {
  1115. case 0x30: // '0'
  1116. case 0x31: // '1'
  1117. case 0x32: // '2'
  1118. case 0x33: // '3'
  1119. case 0x34: // '4'
  1120. case 0x35: // '5'
  1121. case 0x36: // '6'
  1122. case 0x37: // '7'
  1123. case 0x38: // '8'
  1124. case 0x39: // '9'
  1125. case 0x2b: // '+'
  1126. case 0x2d: // '-'
  1127. case 0x2e: // '.'
  1128. return this.getNumber();
  1129. case 0x28: // '('
  1130. return this.getString();
  1131. case 0x2f: // '/'
  1132. return this.getName();
  1133. // array punctuation
  1134. case 0x5b: // '['
  1135. this.nextChar();
  1136. return Cmd.get("[");
  1137. case 0x5d: // ']'
  1138. this.nextChar();
  1139. return Cmd.get("]");
  1140. // hex string or dict punctuation
  1141. case 0x3c: // '<'
  1142. ch = this.nextChar();
  1143. if (ch === 0x3c) {
  1144. // dict punctuation
  1145. this.nextChar();
  1146. return Cmd.get("<<");
  1147. }
  1148. return this.getHexString();
  1149. // dict punctuation
  1150. case 0x3e: // '>'
  1151. ch = this.nextChar();
  1152. if (ch === 0x3e) {
  1153. this.nextChar();
  1154. return Cmd.get(">>");
  1155. }
  1156. return Cmd.get(">");
  1157. case 0x7b: // '{'
  1158. this.nextChar();
  1159. return Cmd.get("{");
  1160. case 0x7d: // '}'
  1161. this.nextChar();
  1162. return Cmd.get("}");
  1163. case 0x29: // ')'
  1164. // Consume the current character in order to avoid permanently hanging
  1165. // the worker thread if `Lexer.getObj` is called from within a loop
  1166. // containing try-catch statements, since we would otherwise attempt
  1167. // to parse the *same* character over and over (fixes issue8061.pdf).
  1168. this.nextChar();
  1169. throw new FormatError(`Illegal character: ${ch}`);
  1170. }
  1171. // Start reading a command.
  1172. let str = String.fromCharCode(ch);
  1173. // A valid command cannot start with a non-visible ASCII character,
  1174. // and the next character may be (the start of) a valid command.
  1175. if (ch < 0x20 || ch > 0x7f) {
  1176. const nextCh = this.peekChar();
  1177. if (nextCh >= 0x20 && nextCh <= 0x7f) {
  1178. this.nextChar();
  1179. return Cmd.get(str);
  1180. }
  1181. }
  1182. const knownCommands = this.knownCommands;
  1183. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  1184. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  1185. // Stop if a known command is found and next character does not make
  1186. // the string a command.
  1187. const possibleCommand = str + String.fromCharCode(ch);
  1188. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  1189. break;
  1190. }
  1191. if (str.length === 128) {
  1192. throw new FormatError(`Command token too long: ${str.length}`);
  1193. }
  1194. str = possibleCommand;
  1195. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  1196. }
  1197. if (str === "true") {
  1198. return true;
  1199. }
  1200. if (str === "false") {
  1201. return false;
  1202. }
  1203. if (str === "null") {
  1204. return null;
  1205. }
  1206. if (str === "BI") {
  1207. // Keep track of the current stream position, since it's needed in order
  1208. // to correctly cache inline images; see `Parser.makeInlineImage`.
  1209. this.beginInlineImagePos = this.stream.pos;
  1210. }
  1211. return Cmd.get(str);
  1212. }
  1213. peekObj() {
  1214. const streamPos = this.stream.pos,
  1215. currentChar = this.currentChar,
  1216. beginInlineImagePos = this.beginInlineImagePos;
  1217. let nextObj;
  1218. try {
  1219. nextObj = this.getObj();
  1220. } catch (ex) {
  1221. if (ex instanceof MissingDataException) {
  1222. throw ex;
  1223. }
  1224. warn(`peekObj: ${ex}`);
  1225. }
  1226. // Ensure that we reset *all* relevant `Lexer`-instance state.
  1227. this.stream.pos = streamPos;
  1228. this.currentChar = currentChar;
  1229. this.beginInlineImagePos = beginInlineImagePos;
  1230. return nextObj;
  1231. }
  1232. skipToNextLine() {
  1233. let ch = this.currentChar;
  1234. while (ch >= 0) {
  1235. if (ch === /* CR = */ 0x0d) {
  1236. ch = this.nextChar();
  1237. if (ch === /* LF = */ 0x0a) {
  1238. this.nextChar();
  1239. }
  1240. break;
  1241. } else if (ch === /* LF = */ 0x0a) {
  1242. this.nextChar();
  1243. break;
  1244. }
  1245. ch = this.nextChar();
  1246. }
  1247. }
  1248. }
  1249. class Linearization {
  1250. static create(stream) {
  1251. function getInt(linDict, name, allowZeroValue = false) {
  1252. const obj = linDict.get(name);
  1253. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1254. return obj;
  1255. }
  1256. throw new Error(
  1257. `The "${name}" parameter in the linearization ` +
  1258. "dictionary is invalid."
  1259. );
  1260. }
  1261. function getHints(linDict) {
  1262. const hints = linDict.get("H");
  1263. let hintsLength;
  1264. if (
  1265. Array.isArray(hints) &&
  1266. ((hintsLength = hints.length) === 2 || hintsLength === 4)
  1267. ) {
  1268. for (let index = 0; index < hintsLength; index++) {
  1269. const hint = hints[index];
  1270. if (!(Number.isInteger(hint) && hint > 0)) {
  1271. throw new Error(
  1272. `Hint (${index}) in the linearization dictionary is invalid.`
  1273. );
  1274. }
  1275. }
  1276. return hints;
  1277. }
  1278. throw new Error("Hint array in the linearization dictionary is invalid.");
  1279. }
  1280. const parser = new Parser({
  1281. lexer: new Lexer(stream),
  1282. xref: null,
  1283. });
  1284. const obj1 = parser.getObj();
  1285. const obj2 = parser.getObj();
  1286. const obj3 = parser.getObj();
  1287. const linDict = parser.getObj();
  1288. let obj, length;
  1289. if (
  1290. !(
  1291. Number.isInteger(obj1) &&
  1292. Number.isInteger(obj2) &&
  1293. isCmd(obj3, "obj") &&
  1294. linDict instanceof Dict &&
  1295. typeof (obj = linDict.get("Linearized")) === "number" &&
  1296. obj > 0
  1297. )
  1298. ) {
  1299. return null; // No valid linearization dictionary found.
  1300. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1301. throw new Error(
  1302. 'The "L" parameter in the linearization dictionary ' +
  1303. "does not equal the stream length."
  1304. );
  1305. }
  1306. return {
  1307. length,
  1308. hints: getHints(linDict),
  1309. objectNumberFirst: getInt(linDict, "O"),
  1310. endFirst: getInt(linDict, "E"),
  1311. numPages: getInt(linDict, "N"),
  1312. mainXRefEntriesOffset: getInt(linDict, "T"),
  1313. pageFirst: linDict.has("P")
  1314. ? getInt(linDict, "P", /* allowZeroValue = */ true)
  1315. : 0,
  1316. };
  1317. }
  1318. }
  1319. export { Lexer, Linearization, Parser };