node_stream.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. /* Copyright 2012 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. /* globals __non_webpack_require__ */
  16. import {
  17. AbortException,
  18. assert,
  19. createPromiseCapability,
  20. MissingPDFException,
  21. } from "../shared/util.js";
  22. import {
  23. extractFilenameFromHeader,
  24. validateRangeRequestCapabilities,
  25. } from "./network_utils.js";
  26. if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
  27. throw new Error(
  28. 'Module "./node_stream.js" shall not be used with MOZCENTRAL builds.'
  29. );
  30. }
  31. const fs = __non_webpack_require__("fs");
  32. const http = __non_webpack_require__("http");
  33. const https = __non_webpack_require__("https");
  34. const url = __non_webpack_require__("url");
  35. const fileUriRegex = /^file:\/\/\/[a-zA-Z]:\//;
  36. function parseUrl(sourceUrl) {
  37. const parsedUrl = url.parse(sourceUrl);
  38. if (parsedUrl.protocol === "file:" || parsedUrl.host) {
  39. return parsedUrl;
  40. }
  41. // Prepending 'file:///' to Windows absolute path.
  42. if (/^[a-z]:[/\\]/i.test(sourceUrl)) {
  43. return url.parse(`file:///${sourceUrl}`);
  44. }
  45. // Changes protocol to 'file:' if url refers to filesystem.
  46. if (!parsedUrl.host) {
  47. parsedUrl.protocol = "file:";
  48. }
  49. return parsedUrl;
  50. }
  51. class PDFNodeStream {
  52. constructor(source) {
  53. this.source = source;
  54. this.url = parseUrl(source.url);
  55. this.isHttp =
  56. this.url.protocol === "http:" || this.url.protocol === "https:";
  57. // Check if url refers to filesystem.
  58. this.isFsUrl = this.url.protocol === "file:";
  59. this.httpHeaders = (this.isHttp && source.httpHeaders) || {};
  60. this._fullRequestReader = null;
  61. this._rangeRequestReaders = [];
  62. }
  63. get _progressiveDataLength() {
  64. return this._fullRequestReader?._loaded ?? 0;
  65. }
  66. getFullReader() {
  67. assert(
  68. !this._fullRequestReader,
  69. "PDFNodeStream.getFullReader can only be called once."
  70. );
  71. this._fullRequestReader = this.isFsUrl
  72. ? new PDFNodeStreamFsFullReader(this)
  73. : new PDFNodeStreamFullReader(this);
  74. return this._fullRequestReader;
  75. }
  76. getRangeReader(start, end) {
  77. if (end <= this._progressiveDataLength) {
  78. return null;
  79. }
  80. const rangeReader = this.isFsUrl
  81. ? new PDFNodeStreamFsRangeReader(this, start, end)
  82. : new PDFNodeStreamRangeReader(this, start, end);
  83. this._rangeRequestReaders.push(rangeReader);
  84. return rangeReader;
  85. }
  86. cancelAllRequests(reason) {
  87. this._fullRequestReader?.cancel(reason);
  88. for (const reader of this._rangeRequestReaders.slice(0)) {
  89. reader.cancel(reason);
  90. }
  91. }
  92. }
  93. class BaseFullReader {
  94. constructor(stream) {
  95. this._url = stream.url;
  96. this._done = false;
  97. this._storedError = null;
  98. this.onProgress = null;
  99. const source = stream.source;
  100. this._contentLength = source.length; // optional
  101. this._loaded = 0;
  102. this._filename = null;
  103. this._disableRange = source.disableRange || false;
  104. this._rangeChunkSize = source.rangeChunkSize;
  105. if (!this._rangeChunkSize && !this._disableRange) {
  106. this._disableRange = true;
  107. }
  108. this._isStreamingSupported = !source.disableStream;
  109. this._isRangeSupported = !source.disableRange;
  110. this._readableStream = null;
  111. this._readCapability = createPromiseCapability();
  112. this._headersCapability = createPromiseCapability();
  113. }
  114. get headersReady() {
  115. return this._headersCapability.promise;
  116. }
  117. get filename() {
  118. return this._filename;
  119. }
  120. get contentLength() {
  121. return this._contentLength;
  122. }
  123. get isRangeSupported() {
  124. return this._isRangeSupported;
  125. }
  126. get isStreamingSupported() {
  127. return this._isStreamingSupported;
  128. }
  129. async read() {
  130. await this._readCapability.promise;
  131. if (this._done) {
  132. return { value: undefined, done: true };
  133. }
  134. if (this._storedError) {
  135. throw this._storedError;
  136. }
  137. const chunk = this._readableStream.read();
  138. if (chunk === null) {
  139. this._readCapability = createPromiseCapability();
  140. return this.read();
  141. }
  142. this._loaded += chunk.length;
  143. this.onProgress?.({
  144. loaded: this._loaded,
  145. total: this._contentLength,
  146. });
  147. // Ensure that `read()` method returns ArrayBuffer.
  148. const buffer = new Uint8Array(chunk).buffer;
  149. return { value: buffer, done: false };
  150. }
  151. cancel(reason) {
  152. // Call `this._error()` method when cancel is called
  153. // before _readableStream is set.
  154. if (!this._readableStream) {
  155. this._error(reason);
  156. return;
  157. }
  158. this._readableStream.destroy(reason);
  159. }
  160. _error(reason) {
  161. this._storedError = reason;
  162. this._readCapability.resolve();
  163. }
  164. _setReadableStream(readableStream) {
  165. this._readableStream = readableStream;
  166. readableStream.on("readable", () => {
  167. this._readCapability.resolve();
  168. });
  169. readableStream.on("end", () => {
  170. // Destroy readable to minimize resource usage.
  171. readableStream.destroy();
  172. this._done = true;
  173. this._readCapability.resolve();
  174. });
  175. readableStream.on("error", reason => {
  176. this._error(reason);
  177. });
  178. // We need to stop reading when range is supported and streaming is
  179. // disabled.
  180. if (!this._isStreamingSupported && this._isRangeSupported) {
  181. this._error(new AbortException("streaming is disabled"));
  182. }
  183. // Destroy ReadableStream if already in errored state.
  184. if (this._storedError) {
  185. this._readableStream.destroy(this._storedError);
  186. }
  187. }
  188. }
  189. class BaseRangeReader {
  190. constructor(stream) {
  191. this._url = stream.url;
  192. this._done = false;
  193. this._storedError = null;
  194. this.onProgress = null;
  195. this._loaded = 0;
  196. this._readableStream = null;
  197. this._readCapability = createPromiseCapability();
  198. const source = stream.source;
  199. this._isStreamingSupported = !source.disableStream;
  200. }
  201. get isStreamingSupported() {
  202. return this._isStreamingSupported;
  203. }
  204. async read() {
  205. await this._readCapability.promise;
  206. if (this._done) {
  207. return { value: undefined, done: true };
  208. }
  209. if (this._storedError) {
  210. throw this._storedError;
  211. }
  212. const chunk = this._readableStream.read();
  213. if (chunk === null) {
  214. this._readCapability = createPromiseCapability();
  215. return this.read();
  216. }
  217. this._loaded += chunk.length;
  218. this.onProgress?.({ loaded: this._loaded });
  219. // Ensure that `read()` method returns ArrayBuffer.
  220. const buffer = new Uint8Array(chunk).buffer;
  221. return { value: buffer, done: false };
  222. }
  223. cancel(reason) {
  224. // Call `this._error()` method when cancel is called
  225. // before _readableStream is set.
  226. if (!this._readableStream) {
  227. this._error(reason);
  228. return;
  229. }
  230. this._readableStream.destroy(reason);
  231. }
  232. _error(reason) {
  233. this._storedError = reason;
  234. this._readCapability.resolve();
  235. }
  236. _setReadableStream(readableStream) {
  237. this._readableStream = readableStream;
  238. readableStream.on("readable", () => {
  239. this._readCapability.resolve();
  240. });
  241. readableStream.on("end", () => {
  242. // Destroy readableStream to minimize resource usage.
  243. readableStream.destroy();
  244. this._done = true;
  245. this._readCapability.resolve();
  246. });
  247. readableStream.on("error", reason => {
  248. this._error(reason);
  249. });
  250. // Destroy readableStream if already in errored state.
  251. if (this._storedError) {
  252. this._readableStream.destroy(this._storedError);
  253. }
  254. }
  255. }
  256. function createRequestOptions(parsedUrl, headers) {
  257. return {
  258. protocol: parsedUrl.protocol,
  259. auth: parsedUrl.auth,
  260. host: parsedUrl.hostname,
  261. port: parsedUrl.port,
  262. path: parsedUrl.path,
  263. method: "GET",
  264. headers,
  265. };
  266. }
  267. class PDFNodeStreamFullReader extends BaseFullReader {
  268. constructor(stream) {
  269. super(stream);
  270. const handleResponse = response => {
  271. if (response.statusCode === 404) {
  272. const error = new MissingPDFException(`Missing PDF "${this._url}".`);
  273. this._storedError = error;
  274. this._headersCapability.reject(error);
  275. return;
  276. }
  277. this._headersCapability.resolve();
  278. this._setReadableStream(response);
  279. const getResponseHeader = name => {
  280. // Make sure that headers name are in lower case, as mentioned
  281. // here: https://nodejs.org/api/http.html#http_message_headers.
  282. return this._readableStream.headers[name.toLowerCase()];
  283. };
  284. const { allowRangeRequests, suggestedLength } =
  285. validateRangeRequestCapabilities({
  286. getResponseHeader,
  287. isHttp: stream.isHttp,
  288. rangeChunkSize: this._rangeChunkSize,
  289. disableRange: this._disableRange,
  290. });
  291. this._isRangeSupported = allowRangeRequests;
  292. // Setting right content length.
  293. this._contentLength = suggestedLength || this._contentLength;
  294. this._filename = extractFilenameFromHeader(getResponseHeader);
  295. };
  296. this._request = null;
  297. if (this._url.protocol === "http:") {
  298. this._request = http.request(
  299. createRequestOptions(this._url, stream.httpHeaders),
  300. handleResponse
  301. );
  302. } else {
  303. this._request = https.request(
  304. createRequestOptions(this._url, stream.httpHeaders),
  305. handleResponse
  306. );
  307. }
  308. this._request.on("error", reason => {
  309. this._storedError = reason;
  310. this._headersCapability.reject(reason);
  311. });
  312. // Note: `request.end(data)` is used to write `data` to request body
  313. // and notify end of request. But one should always call `request.end()`
  314. // even if there is no data to write -- (to notify the end of request).
  315. this._request.end();
  316. }
  317. }
  318. class PDFNodeStreamRangeReader extends BaseRangeReader {
  319. constructor(stream, start, end) {
  320. super(stream);
  321. this._httpHeaders = {};
  322. for (const property in stream.httpHeaders) {
  323. const value = stream.httpHeaders[property];
  324. if (value === undefined) {
  325. continue;
  326. }
  327. this._httpHeaders[property] = value;
  328. }
  329. this._httpHeaders.Range = `bytes=${start}-${end - 1}`;
  330. const handleResponse = response => {
  331. if (response.statusCode === 404) {
  332. const error = new MissingPDFException(`Missing PDF "${this._url}".`);
  333. this._storedError = error;
  334. return;
  335. }
  336. this._setReadableStream(response);
  337. };
  338. this._request = null;
  339. if (this._url.protocol === "http:") {
  340. this._request = http.request(
  341. createRequestOptions(this._url, this._httpHeaders),
  342. handleResponse
  343. );
  344. } else {
  345. this._request = https.request(
  346. createRequestOptions(this._url, this._httpHeaders),
  347. handleResponse
  348. );
  349. }
  350. this._request.on("error", reason => {
  351. this._storedError = reason;
  352. });
  353. this._request.end();
  354. }
  355. }
  356. class PDFNodeStreamFsFullReader extends BaseFullReader {
  357. constructor(stream) {
  358. super(stream);
  359. let path = decodeURIComponent(this._url.path);
  360. // Remove the extra slash to get right path from url like `file:///C:/`
  361. if (fileUriRegex.test(this._url.href)) {
  362. path = path.replace(/^\//, "");
  363. }
  364. fs.lstat(path, (error, stat) => {
  365. if (error) {
  366. if (error.code === "ENOENT") {
  367. error = new MissingPDFException(`Missing PDF "${path}".`);
  368. }
  369. this._storedError = error;
  370. this._headersCapability.reject(error);
  371. return;
  372. }
  373. // Setting right content length.
  374. this._contentLength = stat.size;
  375. this._setReadableStream(fs.createReadStream(path));
  376. this._headersCapability.resolve();
  377. });
  378. }
  379. }
  380. class PDFNodeStreamFsRangeReader extends BaseRangeReader {
  381. constructor(stream, start, end) {
  382. super(stream);
  383. let path = decodeURIComponent(this._url.path);
  384. // Remove the extra slash to get right path from url like `file:///C:/`
  385. if (fileUriRegex.test(this._url.href)) {
  386. path = path.replace(/^\//, "");
  387. }
  388. this._setReadableStream(fs.createReadStream(path, { start, end: end - 1 }));
  389. }
  390. }
  391. export { PDFNodeStream };