parser_spec.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. /* Copyright 2017 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import { Cmd, EOF, Name } from "../../src/core/primitives.js";
  16. import { Lexer, Linearization, Parser } from "../../src/core/parser.js";
  17. import { FormatError } from "../../src/shared/util.js";
  18. import { StringStream } from "../../src/core/stream.js";
  19. describe("parser", function () {
  20. describe("Parser", function () {
  21. describe("inlineStreamSkipEI", function () {
  22. it("should skip over the EI marker if it is found", function () {
  23. const string =
  24. "q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 " +
  25. "/F /A85 ID abc123~> EI Q";
  26. const input = new StringStream(string);
  27. const parser = new Parser({
  28. lexer: new Lexer(input),
  29. xref: null,
  30. allowStreams: true,
  31. });
  32. parser.inlineStreamSkipEI(input);
  33. expect(input.pos).toEqual(string.indexOf("Q"));
  34. expect(input.peekByte()).toEqual(0x51); // 'Q'
  35. });
  36. it("should skip to the end of stream if the EI marker is not found", function () {
  37. const string =
  38. "q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 /F /A85 ID abc123~> Q";
  39. const input = new StringStream(string);
  40. const parser = new Parser({
  41. lexer: new Lexer(input),
  42. xref: null,
  43. allowStreams: true,
  44. });
  45. parser.inlineStreamSkipEI(input);
  46. expect(input.pos).toEqual(string.length);
  47. expect(input.peekByte()).toEqual(-1);
  48. });
  49. });
  50. });
  51. describe("Lexer", function () {
  52. describe("nextChar", function () {
  53. it("should return and set -1 when the end of the stream is reached", function () {
  54. const input = new StringStream("");
  55. const lexer = new Lexer(input);
  56. expect(lexer.nextChar()).toEqual(-1);
  57. expect(lexer.currentChar).toEqual(-1);
  58. });
  59. it("should return and set the character after the current position", function () {
  60. const input = new StringStream("123");
  61. const lexer = new Lexer(input);
  62. expect(lexer.nextChar()).toEqual(0x32); // '2'
  63. expect(lexer.currentChar).toEqual(0x32); // '2'
  64. });
  65. });
  66. describe("peekChar", function () {
  67. it("should only return -1 when the end of the stream is reached", function () {
  68. const input = new StringStream("");
  69. const lexer = new Lexer(input);
  70. expect(lexer.peekChar()).toEqual(-1);
  71. expect(lexer.currentChar).toEqual(-1);
  72. });
  73. it("should only return the character after the current position", function () {
  74. const input = new StringStream("123");
  75. const lexer = new Lexer(input);
  76. expect(lexer.peekChar()).toEqual(0x32); // '2'
  77. expect(lexer.currentChar).toEqual(0x31); // '1'
  78. });
  79. });
  80. describe("getNumber", function () {
  81. it("should stop parsing numbers at the end of stream", function () {
  82. const input = new StringStream("11.234");
  83. const lexer = new Lexer(input);
  84. expect(lexer.getNumber()).toEqual(11.234);
  85. });
  86. it("should parse PostScript numbers", function () {
  87. const numbers = [
  88. "-.002",
  89. "34.5",
  90. "-3.62",
  91. "123.6e10",
  92. "1E-5",
  93. "-1.",
  94. "0.0",
  95. "123",
  96. "-98",
  97. "43445",
  98. "0",
  99. "+17",
  100. ];
  101. for (const number of numbers) {
  102. const input = new StringStream(number);
  103. const lexer = new Lexer(input);
  104. const result = lexer.getNumber(),
  105. expected = parseFloat(number);
  106. if (result !== expected && Math.abs(result - expected) < 1e-15) {
  107. console.error(
  108. `Fuzzy matching "${result}" with "${expected}" to ` +
  109. "work-around rounding bugs in Chromium browsers."
  110. );
  111. expect(true).toEqual(true);
  112. continue;
  113. }
  114. expect(result).toEqual(expected);
  115. }
  116. });
  117. it("should ignore double negative before number", function () {
  118. const input = new StringStream("--205.88");
  119. const lexer = new Lexer(input);
  120. expect(lexer.getNumber()).toEqual(-205.88);
  121. });
  122. it("should ignore minus signs in the middle of number", function () {
  123. const input = new StringStream("205--.88");
  124. const lexer = new Lexer(input);
  125. expect(lexer.getNumber()).toEqual(205.88);
  126. });
  127. it("should ignore line-breaks between operator and digit in number", function () {
  128. const minusInput = new StringStream("-\r\n205.88");
  129. const minusLexer = new Lexer(minusInput);
  130. expect(minusLexer.getNumber()).toEqual(-205.88);
  131. const plusInput = new StringStream("+\r\n205.88");
  132. const plusLexer = new Lexer(plusInput);
  133. expect(plusLexer.getNumber()).toEqual(205.88);
  134. });
  135. it("should treat a single decimal point, or minus/plus sign, as zero", function () {
  136. const validNums = [".", "-", "+", "-.", "+.", "-\r\n.", "+\r\n."];
  137. for (const number of validNums) {
  138. const validInput = new StringStream(number);
  139. const validLexer = new Lexer(validInput);
  140. expect(validLexer.getNumber()).toEqual(0);
  141. }
  142. const invalidNums = ["..", ".-", ".+"];
  143. for (const number of invalidNums) {
  144. const invalidInput = new StringStream(number);
  145. const invalidLexer = new Lexer(invalidInput);
  146. expect(function () {
  147. return invalidLexer.getNumber();
  148. }).toThrowError(FormatError, /^Invalid number:\s/);
  149. }
  150. });
  151. it("should handle glued numbers and operators", function () {
  152. const input = new StringStream("123ET");
  153. const lexer = new Lexer(input);
  154. expect(lexer.getNumber()).toEqual(123);
  155. // The lexer must not have consumed the 'E'
  156. expect(lexer.currentChar).toEqual(0x45); // 'E'
  157. });
  158. });
  159. describe("getString", function () {
  160. it("should stop parsing strings at the end of stream", function () {
  161. const input = new StringStream("(1$4)");
  162. input.getByte = function (super_getByte) {
  163. // Simulating end of file using null (see issue 2766).
  164. const ch = super_getByte.call(input);
  165. return ch === 0x24 /* '$' */ ? -1 : ch;
  166. }.bind(input, input.getByte);
  167. const lexer = new Lexer(input);
  168. expect(lexer.getString()).toEqual("1");
  169. });
  170. it("should ignore escaped CR and LF", function () {
  171. // '(\101\<CR><LF>\102)' should be parsed as 'AB'.
  172. const input = new StringStream("(\\101\\\r\n\\102\\\r\\103\\\n\\104)");
  173. const lexer = new Lexer(input);
  174. expect(lexer.getString()).toEqual("ABCD");
  175. });
  176. });
  177. describe("getHexString", function () {
  178. it("should not throw exception on bad input", function () {
  179. // '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
  180. const input = new StringStream("<7 0 2 15 5 2 2 2 4 3 2 4>");
  181. const lexer = new Lexer(input);
  182. expect(lexer.getHexString()).toEqual('p!U"$2');
  183. });
  184. });
  185. describe("getName", function () {
  186. it("should handle Names with invalid usage of NUMBER SIGN (#)", function () {
  187. const inputNames = ["/# 680 0 R", "/#AQwerty", "/#A<</B"];
  188. const expectedNames = ["#", "#AQwerty", "#A"];
  189. for (let i = 0, ii = inputNames.length; i < ii; i++) {
  190. const input = new StringStream(inputNames[i]);
  191. const lexer = new Lexer(input);
  192. expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
  193. }
  194. });
  195. });
  196. describe("getObj", function () {
  197. it(
  198. "should stop immediately when the start of a command is " +
  199. "a non-visible ASCII character (issue 13999)",
  200. function () {
  201. const input = new StringStream("\x14q\nQ");
  202. const lexer = new Lexer(input);
  203. let obj = lexer.getObj();
  204. expect(obj instanceof Cmd).toEqual(true);
  205. expect(obj.cmd).toEqual("\x14");
  206. obj = lexer.getObj();
  207. expect(obj instanceof Cmd).toEqual(true);
  208. expect(obj.cmd).toEqual("q");
  209. obj = lexer.getObj();
  210. expect(obj instanceof Cmd).toEqual(true);
  211. expect(obj.cmd).toEqual("Q");
  212. obj = lexer.getObj();
  213. expect(obj).toEqual(EOF);
  214. }
  215. );
  216. });
  217. });
  218. describe("Linearization", function () {
  219. it("should not find a linearization dictionary", function () {
  220. // Not an actual linearization dictionary.
  221. // prettier-ignore
  222. const stream1 = new StringStream(
  223. "3 0 obj\n" +
  224. "<<\n" +
  225. "/Length 4622\n" +
  226. "/Filter /FlateDecode\n" +
  227. ">>\n" +
  228. "endobj"
  229. );
  230. expect(Linearization.create(stream1)).toEqual(null);
  231. // Linearization dictionary with invalid version number.
  232. // prettier-ignore
  233. const stream2 = new StringStream(
  234. "1 0 obj\n" +
  235. "<<\n" +
  236. "/Linearized 0\n" +
  237. ">>\n" +
  238. "endobj"
  239. );
  240. expect(Linearization.create(stream2)).toEqual(null);
  241. });
  242. it("should accept a valid linearization dictionary", function () {
  243. // prettier-ignore
  244. const stream = new StringStream(
  245. "131 0 obj\n" +
  246. "<<\n" +
  247. "/Linearized 1\n" +
  248. "/O 133\n" +
  249. "/H [ 1388 863 ]\n" +
  250. "/L 90\n" +
  251. "/E 43573\n" +
  252. "/N 18\n" +
  253. "/T 193883\n" +
  254. ">>\n" +
  255. "endobj"
  256. );
  257. const expectedLinearizationDict = {
  258. length: 90,
  259. hints: [1388, 863],
  260. objectNumberFirst: 133,
  261. endFirst: 43573,
  262. numPages: 18,
  263. mainXRefEntriesOffset: 193883,
  264. pageFirst: 0,
  265. };
  266. expect(Linearization.create(stream)).toEqual(expectedLinearizationDict);
  267. });
  268. it(
  269. "should reject a linearization dictionary with invalid " +
  270. "integer parameters",
  271. function () {
  272. // The /L parameter should be equal to the stream length.
  273. // prettier-ignore
  274. const stream1 = new StringStream(
  275. "1 0 obj\n" +
  276. "<<\n" +
  277. "/Linearized 1\n" +
  278. "/O 133\n" +
  279. "/H [ 1388 863 ]\n" +
  280. "/L 196622\n" +
  281. "/E 43573\n" +
  282. "/N 18\n" +
  283. "/T 193883\n" +
  284. ">>\n" +
  285. "endobj"
  286. );
  287. expect(function () {
  288. return Linearization.create(stream1);
  289. }).toThrow(
  290. new Error(
  291. 'The "L" parameter in the linearization ' +
  292. "dictionary does not equal the stream length."
  293. )
  294. );
  295. // The /E parameter should not be zero.
  296. // prettier-ignore
  297. const stream2 = new StringStream(
  298. "1 0 obj\n" +
  299. "<<\n" +
  300. "/Linearized 1\n" +
  301. "/O 133\n" +
  302. "/H [ 1388 863 ]\n" +
  303. "/L 84\n" +
  304. "/E 0\n" +
  305. "/N 18\n" +
  306. "/T 193883\n" +
  307. ">>\n" +
  308. "endobj"
  309. );
  310. expect(function () {
  311. return Linearization.create(stream2);
  312. }).toThrow(
  313. new Error(
  314. 'The "E" parameter in the linearization dictionary is invalid.'
  315. )
  316. );
  317. // The /O parameter should be an integer.
  318. // prettier-ignore
  319. const stream3 = new StringStream(
  320. "1 0 obj\n" +
  321. "<<\n" +
  322. "/Linearized 1\n" +
  323. "/O /abc\n" +
  324. "/H [ 1388 863 ]\n" +
  325. "/L 89\n" +
  326. "/E 43573\n" +
  327. "/N 18\n" +
  328. "/T 193883\n" +
  329. ">>\n" +
  330. "endobj"
  331. );
  332. expect(function () {
  333. return Linearization.create(stream3);
  334. }).toThrow(
  335. new Error(
  336. 'The "O" parameter in the linearization dictionary is invalid.'
  337. )
  338. );
  339. }
  340. );
  341. it("should reject a linearization dictionary with invalid hint parameters", function () {
  342. // The /H parameter should be an array.
  343. // prettier-ignore
  344. const stream1 = new StringStream(
  345. "1 0 obj\n" +
  346. "<<\n" +
  347. "/Linearized 1\n" +
  348. "/O 133\n" +
  349. "/H 1388\n" +
  350. "/L 80\n" +
  351. "/E 43573\n" +
  352. "/N 18\n" +
  353. "/T 193883\n" +
  354. ">>\n" +
  355. "endobj"
  356. );
  357. expect(function () {
  358. return Linearization.create(stream1);
  359. }).toThrow(
  360. new Error("Hint array in the linearization dictionary is invalid.")
  361. );
  362. // The hint array should contain two, or four, elements.
  363. // prettier-ignore
  364. const stream2 = new StringStream(
  365. "1 0 obj\n" +
  366. "<<\n" +
  367. "/Linearized 1\n" +
  368. "/O 133\n" +
  369. "/H [ 1388 ]\n" +
  370. "/L 84\n" +
  371. "/E 43573\n" +
  372. "/N 18\n" +
  373. "/T 193883\n" +
  374. ">>\n" +
  375. "endobj"
  376. );
  377. expect(function () {
  378. return Linearization.create(stream2);
  379. }).toThrow(
  380. new Error("Hint array in the linearization dictionary is invalid.")
  381. );
  382. // The hint array should not contain zero.
  383. // prettier-ignore
  384. const stream3 = new StringStream(
  385. "1 0 obj\n" +
  386. "<<\n" +
  387. "/Linearized 1\n" +
  388. "/O 133\n" +
  389. "/H [ 1388 863 0 234]\n" +
  390. "/L 93\n" +
  391. "/E 43573\n" +
  392. "/N 18\n" +
  393. "/T 193883\n" +
  394. ">>\n" +
  395. "endobj"
  396. );
  397. expect(function () {
  398. return Linearization.create(stream3);
  399. }).toThrow(
  400. new Error("Hint (2) in the linearization dictionary is invalid.")
  401. );
  402. });
  403. });
  404. });