pdf2svg.js 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. /* Any copyright is dedicated to the Public Domain.
  2. * http://creativecommons.org/publicdomain/zero/1.0/ */
  3. //
  4. // Node tool to dump SVG output into a file.
  5. //
  6. const fs = require("fs");
  7. const util = require("util");
  8. const path = require("path");
  9. const stream = require("stream");
  10. // HACK few hacks to let PDF.js be loaded not as a module in global space.
  11. require("./domstubs.js").setStubs(global);
  12. // Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
  13. const pdfjsLib = require("pdfjs-dist/legacy/build/pdf.js");
  14. // Some PDFs need external cmaps.
  15. const CMAP_URL = "../../node_modules/pdfjs-dist/cmaps/";
  16. const CMAP_PACKED = true;
  17. // Loading file from file system into typed array
  18. const pdfPath =
  19. process.argv[2] || "../../web/compressed.tracemonkey-pldi-09.pdf";
  20. const data = new Uint8Array(fs.readFileSync(pdfPath));
  21. const outputDirectory = "./svgdump";
  22. try {
  23. // Note: This creates a directory only one level deep. If you want to create
  24. // multiple subdirectories on the fly, use the mkdirp module from npm.
  25. fs.mkdirSync(outputDirectory);
  26. } catch (e) {
  27. if (e.code !== "EEXIST") {
  28. throw e;
  29. }
  30. }
  31. // Dumps svg outputs to a folder called svgdump
  32. function getFilePathForPage(pageNum) {
  33. const name = path.basename(pdfPath, path.extname(pdfPath));
  34. return path.join(outputDirectory, `${name}-${pageNum}.svg`);
  35. }
  36. /**
  37. * A readable stream which offers a stream representing the serialization of a
  38. * given DOM element (as defined by domstubs.js).
  39. *
  40. * @param {object} options
  41. * @param {DOMElement} options.svgElement The element to serialize
  42. */
  43. function ReadableSVGStream(options) {
  44. if (!(this instanceof ReadableSVGStream)) {
  45. return new ReadableSVGStream(options);
  46. }
  47. stream.Readable.call(this, options);
  48. this.serializer = options.svgElement.getSerializer();
  49. }
  50. util.inherits(ReadableSVGStream, stream.Readable);
  51. // Implements https://nodejs.org/api/stream.html#stream_readable_read_size_1
  52. ReadableSVGStream.prototype._read = function () {
  53. let chunk;
  54. while ((chunk = this.serializer.getNext()) !== null) {
  55. if (!this.push(chunk)) {
  56. return;
  57. }
  58. }
  59. this.push(null);
  60. };
  61. // Streams the SVG element to the given file path.
  62. function writeSvgToFile(svgElement, filePath) {
  63. let readableSvgStream = new ReadableSVGStream({
  64. svgElement,
  65. });
  66. const writableStream = fs.createWriteStream(filePath);
  67. return new Promise(function (resolve, reject) {
  68. readableSvgStream.once("error", reject);
  69. writableStream.once("error", reject);
  70. writableStream.once("finish", resolve);
  71. readableSvgStream.pipe(writableStream);
  72. }).catch(function (err) {
  73. readableSvgStream = null; // Explicitly null because of v8 bug 6512.
  74. writableStream.end();
  75. throw err;
  76. });
  77. }
  78. // Will be using async/await to load document, pages and misc data.
  79. const loadingTask = pdfjsLib.getDocument({
  80. data,
  81. cMapUrl: CMAP_URL,
  82. cMapPacked: CMAP_PACKED,
  83. fontExtraProperties: true,
  84. });
  85. (async function () {
  86. const doc = await loadingTask.promise;
  87. const numPages = doc.numPages;
  88. console.log("# Document Loaded");
  89. console.log(`Number of Pages: ${numPages}`);
  90. console.log();
  91. for (let pageNum = 1; pageNum <= numPages; pageNum++) {
  92. try {
  93. const page = await doc.getPage(pageNum);
  94. console.log(`# Page ${pageNum}`);
  95. const viewport = page.getViewport({ scale: 1.0 });
  96. console.log(`Size: ${viewport.width}x${viewport.height}`);
  97. console.log();
  98. const opList = await page.getOperatorList();
  99. const svgGfx = new pdfjsLib.SVGGraphics(
  100. page.commonObjs,
  101. page.objs,
  102. /* forceDataSchema = */ true
  103. );
  104. svgGfx.embedFonts = true;
  105. const svg = await svgGfx.getSVG(opList, viewport);
  106. await writeSvgToFile(svg, getFilePathForPage(pageNum));
  107. // Release page resources.
  108. page.cleanup();
  109. } catch (err) {
  110. console.log(`Error: ${err}`);
  111. }
  112. }
  113. console.log("# End of Document");
  114. })();