metadata_spec.js 12 KB


  1. /* Copyright 2017 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import { isEmptyObj } from "./test_utils.js";
  16. import { Metadata } from "../../src/display/metadata.js";
  17. import { MetadataParser } from "../../src/core/metadata_parser.js";
  18. function createMetadata(data) {
  19. const metadataParser = new MetadataParser(data);
  20. return new Metadata(metadataParser.serializable);
  21. }
  22. describe("metadata", function () {
  23. it("should handle valid metadata", function () {
  24. const data =
  25. "<x:xmpmeta xmlns:x='adobe:ns:meta/'>" +
  26. "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" +
  27. "<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
  28. '<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Foo bar baz</rdf:li>' +
  29. "</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
  30. const metadata = createMetadata(data);
  31. expect(metadata.has("dc:title")).toBeTruthy();
  32. expect(metadata.has("dc:qux")).toBeFalsy();
  33. expect(metadata.get("dc:title")).toEqual("Foo bar baz");
  34. expect(metadata.get("dc:qux")).toEqual(null);
  35. expect(metadata.getAll()).toEqual({ "dc:title": "Foo bar baz" });
  36. });
  37. it("should repair and handle invalid metadata", function () {
  38. const data =
  39. "<x:xmpmeta xmlns:x='adobe:ns:meta/'>" +
  40. "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" +
  41. "<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
  42. "<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>" +
  43. "</rdf:Description></rdf:RDF></x:xmpmeta>";
  44. const metadata = createMetadata(data);
  45. expect(metadata.has("dc:title")).toBeTruthy();
  46. expect(metadata.has("dc:qux")).toBeFalsy();
  47. expect(metadata.get("dc:title")).toEqual("PDF&");
  48. expect(metadata.get("dc:qux")).toEqual(null);
  49. expect(metadata.getAll()).toEqual({ "dc:title": "PDF&" });
  50. });
  51. it("should repair and handle invalid metadata (bug 1424938)", function () {
  52. const data =
  53. "<x:xmpmeta xmlns:x='adobe:ns:meta/' " +
  54. "x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>" +
  55. "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' " +
  56. "xmlns:iX='http://ns.adobe.com/iX/1.0/'>" +
  57. "<rdf:Description rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " +
  58. "xmlns:pdf='http://ns.adobe.com/pdf/1.3/' " +
  59. "pdf:Producer='GPL Ghostscript 8.63'/>" +
  60. "<rdf:Description rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " +
  61. "xmlns:xap='http://ns.adobe.com/xap/1.0/' " +
  62. "xap:ModifyDate='2009-02-13T12:42:54+01:00' " +
  63. "xap:CreateDate='2009-02-13T12:42:54+01:00'>" +
  64. "<xap:CreatorTool>\\376\\377\\000P\\000D\\000F\\000C\\000r\\000e\\000a" +
  65. "\\000t\\000o\\000r\\000 \\000V\\000e\\000r\\000s\\000i\\000o\\000n" +
  66. "\\000 \\0000\\000.\\0009\\000.\\0006</xap:CreatorTool>" +
  67. "</rdf:Description><rdf:Description " +
  68. "rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " +
  69. "xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/' " +
  70. "xapMM:DocumentID='61652fa7-fc1f-11dd-0000-ce81d41f9ecf'/>" +
  71. "<rdf:Description rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " +
  72. "xmlns:dc='http://purl.org/dc/elements/1.1/' " +
  73. "dc:format='application/pdf'><dc:title><rdf:Alt>" +
  74. "<rdf:li xml:lang='x-default'>\\376\\377\\000L\\000&apos;\\000O\\000d" +
  75. "\\000i\\000s\\000s\\000e\\000e\\000 \\000t\\000h\\000\\351\\000m\\000a" +
  76. "\\000t\\000i\\000q\\000u\\000e\\000 \\000l\\000o\\000g\\000o\\000 " +
  77. "\\000O\\000d\\000i\\000s\\000s\\000\\351\\000\\351\\000 \\000-\\000 " +
  78. "\\000d\\000\\351\\000c\\000e\\000m\\000b\\000r\\000e\\000 \\0002\\0000" +
  79. "\\0000\\0008\\000.\\000p\\000u\\000b</rdf:li></rdf:Alt></dc:title>" +
  80. "<dc:creator><rdf:Seq><rdf:li>\\376\\377\\000O\\000D\\000I\\000S" +
  81. "</rdf:li></rdf:Seq></dc:creator></rdf:Description></rdf:RDF>" +
  82. "</x:xmpmeta>";
  83. const metadata = createMetadata(data);
  84. expect(metadata.has("dc:title")).toBeTruthy();
  85. expect(metadata.has("dc:qux")).toBeFalsy();
  86. expect(metadata.get("dc:title")).toEqual(
  87. "L'Odissee thématique logo Odisséé - décembre 2008.pub"
  88. );
  89. expect(metadata.get("dc:qux")).toEqual(null);
  90. expect(metadata.getAll()).toEqual({
  91. "dc:creator": ["ODIS"],
  92. "dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub",
  93. "xap:creatortool": "PDFCreator Version 0.9.6",
  94. });
  95. });
  96. it("should gracefully handle incomplete tags (issue 8884)", function () {
  97. const data =
  98. '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d' +
  99. '<x:xmpmeta xmlns:x="adobe:ns:meta/">' +
  100. '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' +
  101. '<rdf:Description rdf:about=""' +
  102. 'xmlns:pdfx="http://ns.adobe.com/pdfx/1.3/">' +
  103. "</rdf:Description>" +
  104. '<rdf:Description rdf:about=""' +
  105. 'xmlns:xap="http://ns.adobe.com/xap/1.0/">' +
  106. "<xap:ModifyDate>2010-03-25T11:20:09-04:00</xap:ModifyDate>" +
  107. "<xap:CreateDate>2010-03-25T11:20:09-04:00</xap:CreateDate>" +
  108. "<xap:MetadataDate>2010-03-25T11:20:09-04:00</xap:MetadataDate>" +
  109. "</rdf:Description>" +
  110. '<rdf:Description rdf:about=""' +
  111. 'xmlns:dc="http://purl.org/dc/elements/1.1/">' +
  112. "<dc:format>application/pdf</dc:format>" +
  113. "</rdf:Description>" +
  114. '<rdf:Description rdf:about=""' +
  115. 'xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">' +
  116. "<pdfaid:part>1</pdfaid:part>" +
  117. "<pdfaid:conformance>A</pdfaid:conformance>" +
  118. "</rdf:Description>" +
  119. "</rdf:RDF>" +
  120. "</x:xmpmeta>" +
  121. '<?xpacket end="w"?>';
  122. const metadata = createMetadata(data);
  123. expect(isEmptyObj(metadata.getAll())).toEqual(true);
  124. });
  125. it('should gracefully handle "junk" before the actual metadata (issue 10395)', function () {
  126. const data =
  127. '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>' +
  128. '<x:xmpmeta x:xmptk="TallComponents PDFObjects 1.0" ' +
  129. 'xmlns:x="adobe:ns:meta/">' +
  130. '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' +
  131. '<rdf:Description rdf:about="" ' +
  132. 'xmlns:pdf="http://ns.adobe.com/pdf/1.3/">' +
  133. "<pdf:Producer>PDFKit.NET 4.0.102.0</pdf:Producer>" +
  134. "<pdf:Keywords></pdf:Keywords>" +
  135. "<pdf:PDFVersion>1.7</pdf:PDFVersion></rdf:Description>" +
  136. '<rdf:Description rdf:about="" ' +
  137. 'xmlns:xap="http://ns.adobe.com/xap/1.0/">' +
  138. "<xap:CreateDate>2018-12-27T13:50:36-08:00</xap:CreateDate>" +
  139. "<xap:ModifyDate>2018-12-27T13:50:38-08:00</xap:ModifyDate>" +
  140. "<xap:CreatorTool></xap:CreatorTool>" +
  141. "<xap:MetadataDate>2018-12-27T13:50:38-08:00</xap:MetadataDate>" +
  142. '</rdf:Description><rdf:Description rdf:about="" ' +
  143. 'xmlns:dc="http://purl.org/dc/elements/1.1/">' +
  144. "<dc:creator><rdf:Seq><rdf:li></rdf:li></rdf:Seq></dc:creator>" +
  145. "<dc:subject><rdf:Bag /></dc:subject>" +
  146. '<dc:description><rdf:Alt><rdf:li xml:lang="x-default">' +
  147. "</rdf:li></rdf:Alt></dc:description>" +
  148. '<dc:title><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li>' +
  149. "</rdf:Alt></dc:title><dc:format>application/pdf</dc:format>" +
  150. '</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
  151. const metadata = createMetadata(data);
  152. expect(metadata.has("dc:title")).toBeTruthy();
  153. expect(metadata.has("dc:qux")).toBeFalsy();
  154. expect(metadata.get("dc:title")).toEqual("");
  155. expect(metadata.get("dc:qux")).toEqual(null);
  156. expect(metadata.getAll()).toEqual({
  157. "dc:creator": [""],
  158. "dc:description": "",
  159. "dc:format": "application/pdf",
  160. "dc:subject": [],
  161. "dc:title": "",
  162. "pdf:keywords": "",
  163. "pdf:pdfversion": "1.7",
  164. "pdf:producer": "PDFKit.NET 4.0.102.0",
  165. "xap:createdate": "2018-12-27T13:50:36-08:00",
  166. "xap:creatortool": "",
  167. "xap:metadatadate": "2018-12-27T13:50:38-08:00",
  168. "xap:modifydate": "2018-12-27T13:50:38-08:00",
  169. });
  170. });
  171. it('should correctly handle metadata containing "&apos" (issue 10407)', function () {
  172. const data =
  173. "<x:xmpmeta xmlns:x='adobe:ns:meta/'>" +
  174. "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" +
  175. "<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
  176. "<dc:title><rdf:Alt>" +
  177. '<rdf:li xml:lang="x-default">&apos;Foo bar baz&apos;</rdf:li>' +
  178. "</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
  179. const metadata = createMetadata(data);
  180. expect(metadata.has("dc:title")).toBeTruthy();
  181. expect(metadata.has("dc:qux")).toBeFalsy();
  182. expect(metadata.get("dc:title")).toEqual("'Foo bar baz'");
  183. expect(metadata.get("dc:qux")).toEqual(null);
  184. expect(metadata.getAll()).toEqual({ "dc:title": "'Foo bar baz'" });
  185. });
  186. it("should gracefully handle unbalanced end tags (issue 10410)", function () {
  187. const data =
  188. '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>' +
  189. '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' +
  190. '<rdf:Description rdf:about="" ' +
  191. 'xmlns:pdf="http://ns.adobe.com/pdf/1.3/">' +
  192. "<pdf:Producer>Soda PDF 5</pdf:Producer></rdf:Description>" +
  193. '<rdf:Description rdf:about="" ' +
  194. 'xmlns:xap="http://ns.adobe.com/xap/1.0/">' +
  195. "<xap:CreateDate>2018-10-02T08:14:49-05:00</xap:CreateDate>" +
  196. "<xap:CreatorTool>Soda PDF 5</xap:CreatorTool>" +
  197. "<xap:MetadataDate>2018-10-02T08:14:49-05:00</xap:MetadataDate> " +
  198. "<xap:ModifyDate>2018-10-02T08:14:49-05:00</xap:ModifyDate>" +
  199. '</rdf:Description><rdf:Description rdf:about="" ' +
  200. 'xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/">' +
  201. "<xmpMM:DocumentID>uuid:00000000-1c84-3cf9-89ba-bef0e729c831" +
  202. "</xmpMM:DocumentID></rdf:Description>" +
  203. '</rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
  204. const metadata = createMetadata(data);
  205. expect(isEmptyObj(metadata.getAll())).toEqual(true);
  206. });
  207. it("should not be vulnerable to the billion laughs attack", function () {
  208. const data =
  209. '<?xml version="1.0"?>' +
  210. "<!DOCTYPE lolz [" +
  211. ' <!ENTITY lol "lol">' +
  212. ' <!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">' +
  213. ' <!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">' +
  214. ' <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">' +
  215. ' <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">' +
  216. ' <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">' +
  217. ' <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">' +
  218. ' <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">' +
  219. ' <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">' +
  220. ' <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">' +
  221. "]>" +
  222. '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' +
  223. ' <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/">' +
  224. " <dc:title>" +
  225. " <rdf:Alt>" +
  226. ' <rdf:li xml:lang="x-default">a&lol9;b</rdf:li>' +
  227. " </rdf:Alt>" +
  228. " </dc:title>" +
  229. " </rdf:Description>" +
  230. "</rdf:RDF>";
  231. const metadata = createMetadata(data);
  232. expect(metadata.has("dc:title")).toBeTruthy();
  233. expect(metadata.has("dc:qux")).toBeFalsy();
  234. expect(metadata.get("dc:title")).toEqual("a&lol9;b");
  235. expect(metadata.get("dc:qux")).toEqual(null);
  236. expect(metadata.getAll()).toEqual({ "dc:title": "a&lol9;b" });
  237. });
  238. });