getinfo.js 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. /* Any copyright is dedicated to the Public Domain.
  2. * http://creativecommons.org/publicdomain/zero/1.0/ */
  3. //
  4. // Basic node example that prints document metadata and text content.
  5. //
  6. // Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
  7. const pdfjsLib = require("pdfjs-dist/legacy/build/pdf.js");
  8. // Loading file from file system into typed array
  9. const pdfPath =
  10. process.argv[2] || "../../web/compressed.tracemonkey-pldi-09.pdf";
  11. // Will be using promises to load document, pages and misc data instead of
  12. // callback.
  13. const loadingTask = pdfjsLib.getDocument(pdfPath);
  14. loadingTask.promise
  15. .then(function (doc) {
  16. const numPages = doc.numPages;
  17. console.log("# Document Loaded");
  18. console.log("Number of Pages: " + numPages);
  19. console.log();
  20. let lastPromise; // will be used to chain promises
  21. lastPromise = doc.getMetadata().then(function (data) {
  22. console.log("# Metadata Is Loaded");
  23. console.log("## Info");
  24. console.log(JSON.stringify(data.info, null, 2));
  25. console.log();
  26. if (data.metadata) {
  27. console.log("## Metadata");
  28. console.log(JSON.stringify(data.metadata.getAll(), null, 2));
  29. console.log();
  30. }
  31. });
  32. const loadPage = function (pageNum) {
  33. return doc.getPage(pageNum).then(function (page) {
  34. console.log("# Page " + pageNum);
  35. const viewport = page.getViewport({ scale: 1.0 });
  36. console.log("Size: " + viewport.width + "x" + viewport.height);
  37. console.log();
  38. return page
  39. .getTextContent()
  40. .then(function (content) {
  41. // Content contains lots of information about the text layout and
  42. // styles, but we need only strings at the moment
  43. const strings = content.items.map(function (item) {
  44. return item.str;
  45. });
  46. console.log("## Text Content");
  47. console.log(strings.join(" "));
  48. // Release page resources.
  49. page.cleanup();
  50. })
  51. .then(function () {
  52. console.log();
  53. });
  54. });
  55. };
  56. // Loading of the first page will wait on metadata and subsequent loadings
  57. // will wait on the previous pages.
  58. for (let i = 1; i <= numPages; i++) {
  59. lastPromise = lastPromise.then(loadPage.bind(null, i));
  60. }
  61. return lastPromise;
  62. })
  63. .then(
  64. function () {
  65. console.log("# End of Document");
  66. },
  67. function (err) {
  68. console.error("Error: " + err);
  69. }
  70. );