chunked_stream.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. /* Copyright 2012 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import {
  16. arrayByteLength,
  17. arraysToBytes,
  18. createPromiseCapability,
  19. } from "../shared/util.js";
  20. import { MissingDataException } from "./core_utils.js";
  21. import { Stream } from "./stream.js";
  22. class ChunkedStream extends Stream {
  23. constructor(length, chunkSize, manager) {
  24. super(
  25. /* arrayBuffer = */ new Uint8Array(length),
  26. /* start = */ 0,
  27. /* length = */ length,
  28. /* dict = */ null
  29. );
  30. this.chunkSize = chunkSize;
  31. this._loadedChunks = new Set();
  32. this.numChunks = Math.ceil(length / chunkSize);
  33. this.manager = manager;
  34. this.progressiveDataLength = 0;
  35. this.lastSuccessfulEnsureByteChunk = -1; // Single-entry cache
  36. }
  37. // If a particular stream does not implement one or more of these methods,
  38. // an error should be thrown.
  39. getMissingChunks() {
  40. const chunks = [];
  41. for (let chunk = 0, n = this.numChunks; chunk < n; ++chunk) {
  42. if (!this._loadedChunks.has(chunk)) {
  43. chunks.push(chunk);
  44. }
  45. }
  46. return chunks;
  47. }
  48. get numChunksLoaded() {
  49. return this._loadedChunks.size;
  50. }
  51. get isDataLoaded() {
  52. return this.numChunksLoaded === this.numChunks;
  53. }
  54. onReceiveData(begin, chunk) {
  55. const chunkSize = this.chunkSize;
  56. if (begin % chunkSize !== 0) {
  57. throw new Error(`Bad begin offset: ${begin}`);
  58. }
  59. // Using `this.length` is inaccurate here since `this.start` can be moved
  60. // (see the `moveStart` method).
  61. const end = begin + chunk.byteLength;
  62. if (end % chunkSize !== 0 && end !== this.bytes.length) {
  63. throw new Error(`Bad end offset: ${end}`);
  64. }
  65. this.bytes.set(new Uint8Array(chunk), begin);
  66. const beginChunk = Math.floor(begin / chunkSize);
  67. const endChunk = Math.floor((end - 1) / chunkSize) + 1;
  68. for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
  69. // Since a value can only occur *once* in a `Set`, there's no need to
  70. // manually check `Set.prototype.has()` before adding the value here.
  71. this._loadedChunks.add(curChunk);
  72. }
  73. }
  74. onReceiveProgressiveData(data) {
  75. let position = this.progressiveDataLength;
  76. const beginChunk = Math.floor(position / this.chunkSize);
  77. this.bytes.set(new Uint8Array(data), position);
  78. position += data.byteLength;
  79. this.progressiveDataLength = position;
  80. const endChunk =
  81. position >= this.end
  82. ? this.numChunks
  83. : Math.floor(position / this.chunkSize);
  84. for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
  85. // Since a value can only occur *once* in a `Set`, there's no need to
  86. // manually check `Set.prototype.has()` before adding the value here.
  87. this._loadedChunks.add(curChunk);
  88. }
  89. }
  90. ensureByte(pos) {
  91. if (pos < this.progressiveDataLength) {
  92. return;
  93. }
  94. const chunk = Math.floor(pos / this.chunkSize);
  95. if (chunk > this.numChunks) {
  96. return;
  97. }
  98. if (chunk === this.lastSuccessfulEnsureByteChunk) {
  99. return;
  100. }
  101. if (!this._loadedChunks.has(chunk)) {
  102. throw new MissingDataException(pos, pos + 1);
  103. }
  104. this.lastSuccessfulEnsureByteChunk = chunk;
  105. }
  106. ensureRange(begin, end) {
  107. if (begin >= end) {
  108. return;
  109. }
  110. if (end <= this.progressiveDataLength) {
  111. return;
  112. }
  113. const beginChunk = Math.floor(begin / this.chunkSize);
  114. if (beginChunk > this.numChunks) {
  115. return;
  116. }
  117. const endChunk = Math.min(
  118. Math.floor((end - 1) / this.chunkSize) + 1,
  119. this.numChunks
  120. );
  121. for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
  122. if (!this._loadedChunks.has(chunk)) {
  123. throw new MissingDataException(begin, end);
  124. }
  125. }
  126. }
  127. nextEmptyChunk(beginChunk) {
  128. const numChunks = this.numChunks;
  129. for (let i = 0; i < numChunks; ++i) {
  130. const chunk = (beginChunk + i) % numChunks; // Wrap around to beginning.
  131. if (!this._loadedChunks.has(chunk)) {
  132. return chunk;
  133. }
  134. }
  135. return null;
  136. }
  137. hasChunk(chunk) {
  138. return this._loadedChunks.has(chunk);
  139. }
  140. getByte() {
  141. const pos = this.pos;
  142. if (pos >= this.end) {
  143. return -1;
  144. }
  145. if (pos >= this.progressiveDataLength) {
  146. this.ensureByte(pos);
  147. }
  148. return this.bytes[this.pos++];
  149. }
  150. getBytes(length) {
  151. const bytes = this.bytes;
  152. const pos = this.pos;
  153. const strEnd = this.end;
  154. if (!length) {
  155. if (strEnd > this.progressiveDataLength) {
  156. this.ensureRange(pos, strEnd);
  157. }
  158. return bytes.subarray(pos, strEnd);
  159. }
  160. let end = pos + length;
  161. if (end > strEnd) {
  162. end = strEnd;
  163. }
  164. if (end > this.progressiveDataLength) {
  165. this.ensureRange(pos, end);
  166. }
  167. this.pos = end;
  168. return bytes.subarray(pos, end);
  169. }
  170. getByteRange(begin, end) {
  171. if (begin < 0) {
  172. begin = 0;
  173. }
  174. if (end > this.end) {
  175. end = this.end;
  176. }
  177. if (end > this.progressiveDataLength) {
  178. this.ensureRange(begin, end);
  179. }
  180. return this.bytes.subarray(begin, end);
  181. }
  182. makeSubStream(start, length, dict = null) {
  183. if (length) {
  184. if (start + length > this.progressiveDataLength) {
  185. this.ensureRange(start, start + length);
  186. }
  187. } else {
  188. // When the `length` is undefined you do *not*, under any circumstances,
  189. // want to fallback on calling `this.ensureRange(start, this.end)` since
  190. // that would force the *entire* PDF file to be loaded, thus completely
  191. // breaking the whole purpose of using streaming and/or range requests.
  192. //
  193. // However, not doing any checking here could very easily lead to wasted
  194. // time/resources during e.g. parsing, since `MissingDataException`s will
  195. // require data to be re-parsed, which we attempt to minimize by at least
  196. // checking that the *beginning* of the data is available here.
  197. if (start >= this.progressiveDataLength) {
  198. this.ensureByte(start);
  199. }
  200. }
  201. function ChunkedStreamSubstream() {}
  202. ChunkedStreamSubstream.prototype = Object.create(this);
  203. ChunkedStreamSubstream.prototype.getMissingChunks = function () {
  204. const chunkSize = this.chunkSize;
  205. const beginChunk = Math.floor(this.start / chunkSize);
  206. const endChunk = Math.floor((this.end - 1) / chunkSize) + 1;
  207. const missingChunks = [];
  208. for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
  209. if (!this._loadedChunks.has(chunk)) {
  210. missingChunks.push(chunk);
  211. }
  212. }
  213. return missingChunks;
  214. };
  215. Object.defineProperty(ChunkedStreamSubstream.prototype, "isDataLoaded", {
  216. get() {
  217. if (this.numChunksLoaded === this.numChunks) {
  218. return true;
  219. }
  220. return this.getMissingChunks().length === 0;
  221. },
  222. configurable: true,
  223. });
  224. const subStream = new ChunkedStreamSubstream();
  225. subStream.pos = subStream.start = start;
  226. subStream.end = start + length || this.end;
  227. subStream.dict = dict;
  228. return subStream;
  229. }
  230. getBaseStreams() {
  231. return [this];
  232. }
  233. }
  234. class ChunkedStreamManager {
  235. constructor(pdfNetworkStream, args) {
  236. this.length = args.length;
  237. this.chunkSize = args.rangeChunkSize;
  238. this.stream = new ChunkedStream(this.length, this.chunkSize, this);
  239. this.pdfNetworkStream = pdfNetworkStream;
  240. this.disableAutoFetch = args.disableAutoFetch;
  241. this.msgHandler = args.msgHandler;
  242. this.currRequestId = 0;
  243. this._chunksNeededByRequest = new Map();
  244. this._requestsByChunk = new Map();
  245. this._promisesByRequest = new Map();
  246. this.progressiveDataLength = 0;
  247. this.aborted = false;
  248. this._loadedStreamCapability = createPromiseCapability();
  249. }
  250. sendRequest(begin, end) {
  251. const rangeReader = this.pdfNetworkStream.getRangeReader(begin, end);
  252. if (!rangeReader.isStreamingSupported) {
  253. rangeReader.onProgress = this.onProgress.bind(this);
  254. }
  255. let chunks = [],
  256. loaded = 0;
  257. return new Promise((resolve, reject) => {
  258. const readChunk = chunk => {
  259. try {
  260. if (!chunk.done) {
  261. const data = chunk.value;
  262. chunks.push(data);
  263. loaded += arrayByteLength(data);
  264. if (rangeReader.isStreamingSupported) {
  265. this.onProgress({ loaded });
  266. }
  267. rangeReader.read().then(readChunk, reject);
  268. return;
  269. }
  270. const chunkData = arraysToBytes(chunks);
  271. chunks = null;
  272. resolve(chunkData);
  273. } catch (e) {
  274. reject(e);
  275. }
  276. };
  277. rangeReader.read().then(readChunk, reject);
  278. }).then(data => {
  279. if (this.aborted) {
  280. return; // Ignoring any data after abort.
  281. }
  282. this.onReceiveData({ chunk: data, begin });
  283. });
  284. }
  285. /**
  286. * Get all the chunks that are not yet loaded and group them into
  287. * contiguous ranges to load in as few requests as possible.
  288. */
  289. requestAllChunks(noFetch = false) {
  290. if (!noFetch) {
  291. const missingChunks = this.stream.getMissingChunks();
  292. this._requestChunks(missingChunks);
  293. }
  294. return this._loadedStreamCapability.promise;
  295. }
  296. _requestChunks(chunks) {
  297. const requestId = this.currRequestId++;
  298. const chunksNeeded = new Set();
  299. this._chunksNeededByRequest.set(requestId, chunksNeeded);
  300. for (const chunk of chunks) {
  301. if (!this.stream.hasChunk(chunk)) {
  302. chunksNeeded.add(chunk);
  303. }
  304. }
  305. if (chunksNeeded.size === 0) {
  306. return Promise.resolve();
  307. }
  308. const capability = createPromiseCapability();
  309. this._promisesByRequest.set(requestId, capability);
  310. const chunksToRequest = [];
  311. for (const chunk of chunksNeeded) {
  312. let requestIds = this._requestsByChunk.get(chunk);
  313. if (!requestIds) {
  314. requestIds = [];
  315. this._requestsByChunk.set(chunk, requestIds);
  316. chunksToRequest.push(chunk);
  317. }
  318. requestIds.push(requestId);
  319. }
  320. if (chunksToRequest.length > 0) {
  321. const groupedChunksToRequest = this.groupChunks(chunksToRequest);
  322. for (const groupedChunk of groupedChunksToRequest) {
  323. const begin = groupedChunk.beginChunk * this.chunkSize;
  324. const end = Math.min(
  325. groupedChunk.endChunk * this.chunkSize,
  326. this.length
  327. );
  328. this.sendRequest(begin, end).catch(capability.reject);
  329. }
  330. }
  331. return capability.promise.catch(reason => {
  332. if (this.aborted) {
  333. return; // Ignoring any pending requests after abort.
  334. }
  335. throw reason;
  336. });
  337. }
  338. getStream() {
  339. return this.stream;
  340. }
  341. /**
  342. * Loads any chunks in the requested range that are not yet loaded.
  343. */
  344. requestRange(begin, end) {
  345. end = Math.min(end, this.length);
  346. const beginChunk = this.getBeginChunk(begin);
  347. const endChunk = this.getEndChunk(end);
  348. const chunks = [];
  349. for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
  350. chunks.push(chunk);
  351. }
  352. return this._requestChunks(chunks);
  353. }
  354. requestRanges(ranges = []) {
  355. const chunksToRequest = [];
  356. for (const range of ranges) {
  357. const beginChunk = this.getBeginChunk(range.begin);
  358. const endChunk = this.getEndChunk(range.end);
  359. for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
  360. if (!chunksToRequest.includes(chunk)) {
  361. chunksToRequest.push(chunk);
  362. }
  363. }
  364. }
  365. chunksToRequest.sort(function (a, b) {
  366. return a - b;
  367. });
  368. return this._requestChunks(chunksToRequest);
  369. }
  370. /**
  371. * Groups a sorted array of chunks into as few contiguous larger
  372. * chunks as possible.
  373. */
  374. groupChunks(chunks) {
  375. const groupedChunks = [];
  376. let beginChunk = -1;
  377. let prevChunk = -1;
  378. for (let i = 0, ii = chunks.length; i < ii; ++i) {
  379. const chunk = chunks[i];
  380. if (beginChunk < 0) {
  381. beginChunk = chunk;
  382. }
  383. if (prevChunk >= 0 && prevChunk + 1 !== chunk) {
  384. groupedChunks.push({ beginChunk, endChunk: prevChunk + 1 });
  385. beginChunk = chunk;
  386. }
  387. if (i + 1 === chunks.length) {
  388. groupedChunks.push({ beginChunk, endChunk: chunk + 1 });
  389. }
  390. prevChunk = chunk;
  391. }
  392. return groupedChunks;
  393. }
  394. onProgress(args) {
  395. this.msgHandler.send("DocProgress", {
  396. loaded: this.stream.numChunksLoaded * this.chunkSize + args.loaded,
  397. total: this.length,
  398. });
  399. }
  400. onReceiveData(args) {
  401. const chunk = args.chunk;
  402. const isProgressive = args.begin === undefined;
  403. const begin = isProgressive ? this.progressiveDataLength : args.begin;
  404. const end = begin + chunk.byteLength;
  405. const beginChunk = Math.floor(begin / this.chunkSize);
  406. const endChunk =
  407. end < this.length
  408. ? Math.floor(end / this.chunkSize)
  409. : Math.ceil(end / this.chunkSize);
  410. if (isProgressive) {
  411. this.stream.onReceiveProgressiveData(chunk);
  412. this.progressiveDataLength = end;
  413. } else {
  414. this.stream.onReceiveData(begin, chunk);
  415. }
  416. if (this.stream.isDataLoaded) {
  417. this._loadedStreamCapability.resolve(this.stream);
  418. }
  419. const loadedRequests = [];
  420. for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
  421. // The server might return more chunks than requested.
  422. const requestIds = this._requestsByChunk.get(curChunk);
  423. if (!requestIds) {
  424. continue;
  425. }
  426. this._requestsByChunk.delete(curChunk);
  427. for (const requestId of requestIds) {
  428. const chunksNeeded = this._chunksNeededByRequest.get(requestId);
  429. if (chunksNeeded.has(curChunk)) {
  430. chunksNeeded.delete(curChunk);
  431. }
  432. if (chunksNeeded.size > 0) {
  433. continue;
  434. }
  435. loadedRequests.push(requestId);
  436. }
  437. }
  438. // If there are no pending requests, automatically fetch the next
  439. // unfetched chunk of the PDF file.
  440. if (!this.disableAutoFetch && this._requestsByChunk.size === 0) {
  441. let nextEmptyChunk;
  442. if (this.stream.numChunksLoaded === 1) {
  443. // This is a special optimization so that after fetching the first
  444. // chunk, rather than fetching the second chunk, we fetch the last
  445. // chunk.
  446. const lastChunk = this.stream.numChunks - 1;
  447. if (!this.stream.hasChunk(lastChunk)) {
  448. nextEmptyChunk = lastChunk;
  449. }
  450. } else {
  451. nextEmptyChunk = this.stream.nextEmptyChunk(endChunk);
  452. }
  453. if (Number.isInteger(nextEmptyChunk)) {
  454. this._requestChunks([nextEmptyChunk]);
  455. }
  456. }
  457. for (const requestId of loadedRequests) {
  458. const capability = this._promisesByRequest.get(requestId);
  459. this._promisesByRequest.delete(requestId);
  460. capability.resolve();
  461. }
  462. this.msgHandler.send("DocProgress", {
  463. loaded: this.stream.numChunksLoaded * this.chunkSize,
  464. total: this.length,
  465. });
  466. }
  467. onError(err) {
  468. this._loadedStreamCapability.reject(err);
  469. }
  470. getBeginChunk(begin) {
  471. return Math.floor(begin / this.chunkSize);
  472. }
  473. getEndChunk(end) {
  474. return Math.floor((end - 1) / this.chunkSize) + 1;
  475. }
  476. abort(reason) {
  477. this.aborted = true;
  478. if (this.pdfNetworkStream) {
  479. this.pdfNetworkStream.cancelAllRequests(reason);
  480. }
  481. for (const capability of this._promisesByRequest.values()) {
  482. capability.reject(reason);
  483. }
  484. }
  485. }
  486. export { ChunkedStream, ChunkedStreamManager };