pdf_find_controller_spec.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. /* Copyright 2018 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import { buildGetDocumentParams } from "./test_utils.js";
  16. import { EventBus } from "../../web/event_utils.js";
  17. import { getDocument } from "../../src/display/api.js";
  18. import { isNodeJS } from "../../src/shared/is_node.js";
  19. import { PDFFindController } from "../../web/pdf_find_controller.js";
  20. import { SimpleLinkService } from "../../web/pdf_link_service.js";
  21. const tracemonkeyFileName = "tracemonkey.pdf";
  22. const CMAP_PARAMS = {
  23. cMapUrl: isNodeJS ? "./external/bcmaps/" : "../../../external/bcmaps/",
  24. cMapPacked: true,
  25. };
  26. class MockLinkService extends SimpleLinkService {
  27. constructor() {
  28. super();
  29. this._page = 1;
  30. this._pdfDocument = null;
  31. }
  32. setDocument(pdfDocument) {
  33. this._pdfDocument = pdfDocument;
  34. }
  35. get pagesCount() {
  36. return this._pdfDocument.numPages;
  37. }
  38. get page() {
  39. return this._page;
  40. }
  41. set page(value) {
  42. this._page = value;
  43. }
  44. }
  45. async function initPdfFindController(filename) {
  46. const loadingTask = getDocument(
  47. buildGetDocumentParams(filename || tracemonkeyFileName, {
  48. ...CMAP_PARAMS,
  49. })
  50. );
  51. const pdfDocument = await loadingTask.promise;
  52. const eventBus = new EventBus();
  53. const linkService = new MockLinkService();
  54. linkService.setDocument(pdfDocument);
  55. const pdfFindController = new PDFFindController({
  56. linkService,
  57. eventBus,
  58. });
  59. pdfFindController.setDocument(pdfDocument); // Enable searching.
  60. return { eventBus, pdfFindController };
  61. }
  62. function testSearch({
  63. eventBus,
  64. pdfFindController,
  65. state,
  66. matchesPerPage,
  67. selectedMatch,
  68. pageMatches = null,
  69. pageMatchesLength = null,
  70. }) {
  71. return new Promise(function (resolve) {
  72. const eventState = Object.assign(
  73. Object.create(null),
  74. {
  75. source: this,
  76. type: "",
  77. query: null,
  78. caseSensitive: false,
  79. entireWord: false,
  80. phraseSearch: true,
  81. findPrevious: false,
  82. matchDiacritics: false,
  83. },
  84. state
  85. );
  86. eventBus.dispatch("find", eventState);
  87. // The `updatefindmatchescount` event is only emitted if the page contains
  88. // at least one match for the query, so the last non-zero item in the
  89. // matches per page array corresponds to the page for which the final
  90. // `updatefindmatchescount` event is emitted. If this happens, we know
  91. // that any subsequent pages won't trigger the event anymore and we
  92. // can start comparing the matches per page. This logic is necessary
  93. // because we call the `pdfFindController.pageMatches` getter directly
  94. // after receiving the event and the underlying `_pageMatches` array
  95. // is only extended when a page is processed, so it will only contain
  96. // entries for the pages processed until the time when the final event
  97. // was emitted.
  98. let totalPages = matchesPerPage.length;
  99. for (let i = totalPages - 1; i >= 0; i--) {
  100. if (matchesPerPage[i] > 0) {
  101. totalPages = i + 1;
  102. break;
  103. }
  104. }
  105. const totalMatches = matchesPerPage.reduce((a, b) => {
  106. return a + b;
  107. });
  108. eventBus.on(
  109. "updatefindmatchescount",
  110. function onUpdateFindMatchesCount(evt) {
  111. if (pdfFindController.pageMatches.length !== totalPages) {
  112. return;
  113. }
  114. eventBus.off("updatefindmatchescount", onUpdateFindMatchesCount);
  115. expect(evt.matchesCount.total).toBe(totalMatches);
  116. for (let i = 0; i < totalPages; i++) {
  117. expect(pdfFindController.pageMatches[i].length).toEqual(
  118. matchesPerPage[i]
  119. );
  120. }
  121. expect(pdfFindController.selected.pageIdx).toEqual(
  122. selectedMatch.pageIndex
  123. );
  124. expect(pdfFindController.selected.matchIdx).toEqual(
  125. selectedMatch.matchIndex
  126. );
  127. if (pageMatches) {
  128. expect(pdfFindController.pageMatches).toEqual(pageMatches);
  129. expect(pdfFindController.pageMatchesLength).toEqual(
  130. pageMatchesLength
  131. );
  132. }
  133. resolve();
  134. }
  135. );
  136. });
  137. }
  138. describe("pdf_find_controller", function () {
  139. it("performs a normal search", async function () {
  140. const { eventBus, pdfFindController } = await initPdfFindController();
  141. await testSearch({
  142. eventBus,
  143. pdfFindController,
  144. state: {
  145. query: "Dynamic",
  146. },
  147. matchesPerPage: [11, 5, 0, 3, 0, 0, 0, 1, 1, 1, 0, 3, 4, 4],
  148. selectedMatch: {
  149. pageIndex: 0,
  150. matchIndex: 0,
  151. },
  152. });
  153. });
  154. it("performs a normal search and finds the previous result", async function () {
  155. // Page 14 (with page index 13) contains five results. By default, the
  156. // first result (match index 0) is selected, so the previous result
  157. // should be the fifth result (match index 4).
  158. const { eventBus, pdfFindController } = await initPdfFindController();
  159. await testSearch({
  160. eventBus,
  161. pdfFindController,
  162. state: {
  163. query: "conference",
  164. findPrevious: true,
  165. },
  166. matchesPerPage: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
  167. selectedMatch: {
  168. pageIndex: 13,
  169. matchIndex: 4,
  170. },
  171. });
  172. });
  173. it("performs a case sensitive search", async function () {
  174. const { eventBus, pdfFindController } = await initPdfFindController();
  175. await testSearch({
  176. eventBus,
  177. pdfFindController,
  178. state: {
  179. query: "Dynamic",
  180. caseSensitive: true,
  181. },
  182. matchesPerPage: [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3],
  183. selectedMatch: {
  184. pageIndex: 0,
  185. matchIndex: 0,
  186. },
  187. });
  188. });
  189. it("performs an entire word search", async function () {
  190. // Page 13 contains both 'Government' and 'Governmental', so the latter
  191. // should not be found with entire word search.
  192. const { eventBus, pdfFindController } = await initPdfFindController();
  193. await testSearch({
  194. eventBus,
  195. pdfFindController,
  196. state: {
  197. query: "Government",
  198. entireWord: true,
  199. },
  200. matchesPerPage: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  201. selectedMatch: {
  202. pageIndex: 12,
  203. matchIndex: 0,
  204. },
  205. });
  206. });
  207. it("performs a multiple term (no phrase) search", async function () {
  208. // Page 9 contains 'alternate' and pages 6 and 9 contain 'solution'.
  209. // Both should be found for multiple term (no phrase) search.
  210. const { eventBus, pdfFindController } = await initPdfFindController();
  211. await testSearch({
  212. eventBus,
  213. pdfFindController,
  214. state: {
  215. query: "alternate solution",
  216. phraseSearch: false,
  217. },
  218. matchesPerPage: [0, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0],
  219. selectedMatch: {
  220. pageIndex: 5,
  221. matchIndex: 0,
  222. },
  223. });
  224. });
  225. it("performs a normal search, where the text is normalized", async function () {
  226. const { eventBus, pdfFindController } = await initPdfFindController(
  227. "fraction-highlight.pdf"
  228. );
  229. await testSearch({
  230. eventBus,
  231. pdfFindController,
  232. state: {
  233. query: "fraction",
  234. },
  235. matchesPerPage: [3],
  236. selectedMatch: {
  237. pageIndex: 0,
  238. matchIndex: 0,
  239. },
  240. pageMatches: [[19, 46, 62]],
  241. pageMatchesLength: [[8, 8, 8]],
  242. });
  243. await testSearch({
  244. eventBus,
  245. pdfFindController,
  246. state: {
  247. query: "1/2",
  248. },
  249. matchesPerPage: [2],
  250. selectedMatch: {
  251. pageIndex: 0,
  252. matchIndex: 0,
  253. },
  254. pageMatches: [[27, 54]],
  255. pageMatchesLength: [[1, 1]],
  256. });
  257. await testSearch({
  258. eventBus,
  259. pdfFindController,
  260. state: {
  261. query: "½",
  262. },
  263. matchesPerPage: [2],
  264. selectedMatch: {
  265. pageIndex: 0,
  266. matchIndex: 0,
  267. },
  268. pageMatches: [[27, 54]],
  269. pageMatchesLength: [[1, 1]],
  270. });
  271. });
  272. it("performs a normal search, where the text with diacritics is normalized", async function () {
  273. const { eventBus, pdfFindController } = await initPdfFindController(
  274. "french_diacritics.pdf"
  275. );
  276. await testSearch({
  277. eventBus,
  278. pdfFindController,
  279. state: {
  280. query: "a",
  281. },
  282. matchesPerPage: [6],
  283. selectedMatch: {
  284. pageIndex: 0,
  285. matchIndex: 0,
  286. },
  287. pageMatches: [[0, 2, 4, 6, 8, 10]],
  288. pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
  289. });
  290. await testSearch({
  291. eventBus,
  292. pdfFindController,
  293. state: {
  294. query: "u",
  295. },
  296. matchesPerPage: [6],
  297. selectedMatch: {
  298. pageIndex: 0,
  299. matchIndex: 0,
  300. },
  301. pageMatches: [[44, 46, 48, 50, 52, 54]],
  302. pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
  303. });
  304. await testSearch({
  305. eventBus,
  306. pdfFindController,
  307. state: {
  308. query: "ë",
  309. matchDiacritics: true,
  310. },
  311. matchesPerPage: [2],
  312. selectedMatch: {
  313. pageIndex: 0,
  314. matchIndex: 0,
  315. },
  316. pageMatches: [[28, 30]],
  317. pageMatchesLength: [[1, 1]],
  318. });
  319. });
  320. it("performs a search where one of the results contains an hyphen", async function () {
  321. const { eventBus, pdfFindController } = await initPdfFindController();
  322. await testSearch({
  323. eventBus,
  324. pdfFindController,
  325. state: {
  326. query: "optimiz",
  327. },
  328. matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4],
  329. selectedMatch: {
  330. pageIndex: 0,
  331. matchIndex: 0,
  332. },
  333. });
  334. });
  335. it("performs a search where the result is on two lines", async function () {
  336. const { eventBus, pdfFindController } = await initPdfFindController();
  337. await testSearch({
  338. eventBus,
  339. pdfFindController,
  340. state: {
  341. query: "user experience",
  342. },
  343. matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  344. selectedMatch: {
  345. pageIndex: 0,
  346. matchIndex: 0,
  347. },
  348. pageMatches: [[2743]],
  349. pageMatchesLength: [[14]],
  350. });
  351. });
  352. it("performs a search where the result is on two lines with a punctuation at eol", async function () {
  353. const { eventBus, pdfFindController } = await initPdfFindController();
  354. await testSearch({
  355. eventBus,
  356. pdfFindController,
  357. state: {
  358. query: "version.the",
  359. },
  360. matchesPerPage: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  361. selectedMatch: {
  362. pageIndex: 1,
  363. matchIndex: 0,
  364. },
  365. pageMatches: [[], [1493]],
  366. pageMatchesLength: [[], [11]],
  367. });
  368. });
  369. it("performs a search with a minus sign in the query", async function () {
  370. const { eventBus, pdfFindController } = await initPdfFindController();
  371. await testSearch({
  372. eventBus,
  373. pdfFindController,
  374. state: {
  375. query: "trace-based just-in-time",
  376. },
  377. matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
  378. selectedMatch: {
  379. pageIndex: 0,
  380. matchIndex: 0,
  381. },
  382. pageMatches: [
  383. [0],
  384. [],
  385. [],
  386. [],
  387. [],
  388. [],
  389. [],
  390. [],
  391. [],
  392. [],
  393. [],
  394. [],
  395. [],
  396. [2087],
  397. ],
  398. pageMatchesLength: [
  399. [24],
  400. [],
  401. [],
  402. [],
  403. [],
  404. [],
  405. [],
  406. [],
  407. [],
  408. [],
  409. [],
  410. [],
  411. [],
  412. [24],
  413. ],
  414. });
  415. });
  416. it("performs a search with square brackets in the query", async function () {
  417. const { eventBus, pdfFindController } = await initPdfFindController();
  418. await testSearch({
  419. eventBus,
  420. pdfFindController,
  421. state: {
  422. query: "[Programming Languages]",
  423. },
  424. matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  425. selectedMatch: {
  426. pageIndex: 0,
  427. matchIndex: 0,
  428. },
  429. pageMatches: [[1501]],
  430. pageMatchesLength: [[25]],
  431. });
  432. });
  433. it("performs a search with parenthesis in the query", async function () {
  434. const { eventBus, pdfFindController } = await initPdfFindController();
  435. await testSearch({
  436. eventBus,
  437. pdfFindController,
  438. state: {
  439. query: "\t (checks)",
  440. },
  441. matchesPerPage: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  442. selectedMatch: {
  443. pageIndex: 1,
  444. matchIndex: 0,
  445. },
  446. pageMatches: [[], [201]],
  447. pageMatchesLength: [[], [9]],
  448. });
  449. });
  450. it("performs a search with a final dot in the query", async function () {
  451. const { eventBus, pdfFindController } = await initPdfFindController();
  452. // The whitespace after the dot mustn't be matched.
  453. const query = "complex applications.";
  454. await testSearch({
  455. eventBus,
  456. pdfFindController,
  457. state: {
  458. query,
  459. },
  460. matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  461. selectedMatch: {
  462. pageIndex: 0,
  463. matchIndex: 0,
  464. },
  465. pageMatches: [[1946]],
  466. pageMatchesLength: [[21]],
  467. });
  468. });
  469. it("performs a search with a dot in the query and a missing whitespace", async function () {
  470. const { eventBus, pdfFindController } = await initPdfFindController();
  471. // The whitespace after the dot must be matched.
  472. const query = "complex applications.J";
  473. await testSearch({
  474. eventBus,
  475. pdfFindController,
  476. state: {
  477. query,
  478. },
  479. matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  480. selectedMatch: {
  481. pageIndex: 0,
  482. matchIndex: 0,
  483. },
  484. pageMatches: [[1946]],
  485. pageMatchesLength: [[23]],
  486. });
  487. });
  488. it("performs a search with a dot followed by a whitespace in the query", async function () {
  489. const { eventBus, pdfFindController } = await initPdfFindController();
  490. const query = "complex applications. j";
  491. await testSearch({
  492. eventBus,
  493. pdfFindController,
  494. state: {
  495. query,
  496. },
  497. matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  498. selectedMatch: {
  499. pageIndex: 0,
  500. matchIndex: 0,
  501. },
  502. pageMatches: [[1946]],
  503. pageMatchesLength: [[23]],
  504. });
  505. });
  506. it("performs a search in a text containing diacritics before -\\n", async function () {
  507. if (isNodeJS) {
  508. pending("Linked test-cases are not supported in Node.js.");
  509. }
  510. const { eventBus, pdfFindController } = await initPdfFindController(
  511. "issue14562.pdf"
  512. );
  513. await testSearch({
  514. eventBus,
  515. pdfFindController,
  516. state: {
  517. query: "ä",
  518. matchDiacritics: true,
  519. },
  520. matchesPerPage: [80],
  521. selectedMatch: {
  522. pageIndex: 0,
  523. matchIndex: 0,
  524. },
  525. pageMatches: [
  526. [
  527. 302, 340, 418, 481, 628, 802, 983, 989, 1015, 1063, 1084, 1149, 1157,
  528. 1278, 1346, 1394, 1402, 1424, 1500, 1524, 1530, 1686, 1776, 1788,
  529. 1859, 1881, 1911, 1948, 2066, 2076, 2163, 2180, 2215, 2229, 2274,
  530. 2324, 2360, 2402, 2413, 2424, 2463, 2532, 2538, 2553, 2562, 2576,
  531. 2602, 2613, 2638, 2668, 2792, 2805, 2836, 2847, 2858, 2895, 2901,
  532. 2915, 2939, 2959, 3089, 3236, 3246, 3336, 3384, 3391, 3465, 3474,
  533. 3482, 3499, 3687, 3693, 3708, 3755, 3786, 3862, 3974, 4049, 4055,
  534. 4068,
  535. ],
  536. ],
  537. pageMatchesLength: [
  538. [
  539. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  540. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  541. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  542. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  543. ],
  544. ],
  545. });
  546. });
  547. it("performs a search in a text containing some Hangul syllables", async function () {
  548. const { eventBus, pdfFindController } = await initPdfFindController(
  549. "bug1771477.pdf"
  550. );
  551. await testSearch({
  552. eventBus,
  553. pdfFindController,
  554. state: {
  555. query: "안녕하세요 세계",
  556. },
  557. matchesPerPage: [1],
  558. selectedMatch: {
  559. pageIndex: 0,
  560. matchIndex: 0,
  561. },
  562. pageMatches: [[139]],
  563. pageMatchesLength: [[8]],
  564. });
  565. });
  566. it("performs a search in a text containing an ideographic at the end of a line", async function () {
  567. const { eventBus, pdfFindController } = await initPdfFindController(
  568. "issue15340.pdf"
  569. );
  570. await testSearch({
  571. eventBus,
  572. pdfFindController,
  573. state: {
  574. query: "検知機構",
  575. },
  576. matchesPerPage: [1],
  577. selectedMatch: {
  578. pageIndex: 0,
  579. matchIndex: 0,
  580. },
  581. pageMatches: [[29]],
  582. pageMatchesLength: [[4]],
  583. });
  584. });
  585. it("performs a search in a text containing fullwidth chars", async function () {
  586. const { eventBus, pdfFindController } = await initPdfFindController(
  587. "issue15690.pdf"
  588. );
  589. await testSearch({
  590. eventBus,
  591. pdfFindController,
  592. state: {
  593. query: "o",
  594. },
  595. matchesPerPage: [13],
  596. selectedMatch: {
  597. pageIndex: 0,
  598. matchIndex: 0,
  599. },
  600. pageMatches: [[0, 10, 13, 30, 39, 41, 55, 60, 66, 84, 102, 117, 134]],
  601. pageMatchesLength: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
  602. });
  603. });
  604. it("performs a search in a text with some Katakana at the end of a line", async function () {
  605. const { eventBus, pdfFindController } = await initPdfFindController(
  606. "issue15759.pdf"
  607. );
  608. await testSearch({
  609. eventBus,
  610. pdfFindController,
  611. state: {
  612. query: "ソレノイド",
  613. },
  614. matchesPerPage: [1],
  615. selectedMatch: {
  616. pageIndex: 0,
  617. matchIndex: 0,
  618. },
  619. pageMatches: [[6]],
  620. pageMatchesLength: [[5]],
  621. });
  622. });
  623. });