| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047 | 
							- /* Copyright 2012 Mozilla Foundation
 
-  *
 
-  * Licensed under the Apache License, Version 2.0 (the "License");
 
-  * you may not use this file except in compliance with the License.
 
-  * You may obtain a copy of the License at
 
-  *
 
-  *     http://www.apache.org/licenses/LICENSE-2.0
 
-  *
 
-  * Unless required by applicable law or agreed to in writing, software
 
-  * distributed under the License is distributed on an "AS IS" BASIS,
 
-  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
-  * See the License for the specific language governing permissions and
 
-  * limitations under the License.
 
-  */
 
- /** @typedef {import("../src/display/api").PDFDocumentProxy} PDFDocumentProxy */
 
- /** @typedef {import("./event_utils").EventBus} EventBus */
 
- /** @typedef {import("./interfaces").IPDFLinkService} IPDFLinkService */
 
- import { binarySearchFirstItem, scrollIntoView } from "./ui_utils.js";
 
- import { createPromiseCapability } from "pdfjs-lib";
 
- import { getCharacterType } from "./pdf_find_utils.js";
 
- const FindState = {
 
-   FOUND: 0,
 
-   NOT_FOUND: 1,
 
-   WRAPPED: 2,
 
-   PENDING: 3,
 
- };
 
- const FIND_TIMEOUT = 250; // ms
 
- const MATCH_SCROLL_OFFSET_TOP = -50; // px
 
- const MATCH_SCROLL_OFFSET_LEFT = -400; // px
 
- const CHARACTERS_TO_NORMALIZE = {
 
-   "\u2010": "-", // Hyphen
 
-   "\u2018": "'", // Left single quotation mark
 
-   "\u2019": "'", // Right single quotation mark
 
-   "\u201A": "'", // Single low-9 quotation mark
 
-   "\u201B": "'", // Single high-reversed-9 quotation mark
 
-   "\u201C": '"', // Left double quotation mark
 
-   "\u201D": '"', // Right double quotation mark
 
-   "\u201E": '"', // Double low-9 quotation mark
 
-   "\u201F": '"', // Double high-reversed-9 quotation mark
 
-   "\u00BC": "1/4", // Vulgar fraction one quarter
 
-   "\u00BD": "1/2", // Vulgar fraction one half
 
-   "\u00BE": "3/4", // Vulgar fraction three quarters
 
- };
 
- // These diacritics aren't considered as combining diacritics
 
- // when searching in a document:
 
- //   https://searchfox.org/mozilla-central/source/intl/unicharutil/util/is_combining_diacritic.py.
 
- // The combining class definitions can be found:
 
- //   https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
 
- // Category 0 corresponds to [^\p{Mn}].
 
- const DIACRITICS_EXCEPTION = new Set([
 
-   // UNICODE_COMBINING_CLASS_KANA_VOICING
 
-   // https://www.compart.com/fr/unicode/combining/8
 
-   0x3099, 0x309a,
 
-   // UNICODE_COMBINING_CLASS_VIRAMA (under 0xFFFF)
 
-   // https://www.compart.com/fr/unicode/combining/9
 
-   0x094d, 0x09cd, 0x0a4d, 0x0acd, 0x0b4d, 0x0bcd, 0x0c4d, 0x0ccd, 0x0d3b,
 
-   0x0d3c, 0x0d4d, 0x0dca, 0x0e3a, 0x0eba, 0x0f84, 0x1039, 0x103a, 0x1714,
 
-   0x1734, 0x17d2, 0x1a60, 0x1b44, 0x1baa, 0x1bab, 0x1bf2, 0x1bf3, 0x2d7f,
 
-   0xa806, 0xa82c, 0xa8c4, 0xa953, 0xa9c0, 0xaaf6, 0xabed,
 
-   // 91
 
-   // https://www.compart.com/fr/unicode/combining/91
 
-   0x0c56,
 
-   // 129
 
-   // https://www.compart.com/fr/unicode/combining/129
 
-   0x0f71,
 
-   // 130
 
-   // https://www.compart.com/fr/unicode/combining/130
 
-   0x0f72, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f80,
 
-   // 132
 
-   // https://www.compart.com/fr/unicode/combining/132
 
-   0x0f74,
 
- ]);
 
- let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.
 
- const DIACRITICS_REG_EXP = /\p{M}+/gu;
 
- const SPECIAL_CHARS_REG_EXP =
 
-   /([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
 
- const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
 
- const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
 
- // The range [AC00-D7AF] corresponds to the Hangul syllables.
 
- // The few other chars are some CJK Compatibility Ideographs.
 
- const SYLLABLES_REG_EXP = /[\uAC00-\uD7AF\uFA6C\uFACF-\uFAD1\uFAD5-\uFAD7]+/g;
 
- const SYLLABLES_LENGTHS = new Map();
 
- // When decomposed (in using NFD) the above syllables will start
 
- // with one of the chars in this regexp.
 
- const FIRST_CHAR_SYLLABLES_REG_EXP =
 
-   "[\\u1100-\\u1112\\ud7a4-\\ud7af\\ud84a\\ud84c\\ud850\\ud854\\ud857\\ud85f]";
 
- const NFKC_CHARS_TO_NORMALIZE = new Map();
 
- let noSyllablesRegExp = null;
 
- let withSyllablesRegExp = null;
 
- function normalize(text) {
 
-   // The diacritics in the text or in the query can be composed or not.
 
-   // So we use a decomposed text using NFD (and the same for the query)
 
-   // in order to be sure that diacritics are in the same order.
 
-   // Collect syllables length and positions.
 
-   const syllablePositions = [];
 
-   let m;
 
-   while ((m = SYLLABLES_REG_EXP.exec(text)) !== null) {
 
-     let { index } = m;
 
-     for (const char of m[0]) {
 
-       let len = SYLLABLES_LENGTHS.get(char);
 
-       if (!len) {
 
-         len = char.normalize("NFD").length;
 
-         SYLLABLES_LENGTHS.set(char, len);
 
-       }
 
-       syllablePositions.push([len, index++]);
 
-     }
 
-   }
 
-   let normalizationRegex;
 
-   if (syllablePositions.length === 0 && noSyllablesRegExp) {
 
-     normalizationRegex = noSyllablesRegExp;
 
-   } else if (syllablePositions.length > 0 && withSyllablesRegExp) {
 
-     normalizationRegex = withSyllablesRegExp;
 
-   } else {
 
-     // Compile the regular expression for text normalization once.
 
-     const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
 
-     const toNormalizeWithNFKC =
 
-       "\u2460-\u2473" + // Circled numbers.
 
-       "\u24b6-\u24ff" + // Circled letters/numbers.
 
-       "\u3244-\u32bf" + // Circled ideograms/numbers.
 
-       "\u32d0-\u32fe" + // Circled ideograms.
 
-       "\uff00-\uffef"; // Halfwidth, fullwidth forms.
 
-     // 3040-309F: Hiragana
 
-     // 30A0-30FF: Katakana
 
-     const CJK = "(?:\\p{Ideographic}|[\u3040-\u30FF])";
 
-     const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(${CJK}\\n)|(\\n)`;
 
-     if (syllablePositions.length === 0) {
 
-       // Most of the syllables belong to Hangul so there are no need
 
-       // to search for them in a non-Hangul document.
 
-       // We use the \0 in order to have the same number of groups.
 
-       normalizationRegex = noSyllablesRegExp = new RegExp(
 
-         regexp + "|(\\u0000)",
 
-         "gum"
 
-       );
 
-     } else {
 
-       normalizationRegex = withSyllablesRegExp = new RegExp(
 
-         regexp + `|(${FIRST_CHAR_SYLLABLES_REG_EXP})`,
 
-         "gum"
 
-       );
 
-     }
 
-   }
 
-   // The goal of this function is to normalize the string and
 
-   // be able to get from an index in the new string the
 
-   // corresponding index in the old string.
 
-   // For example if we have: abCd12ef456gh where C is replaced by ccc
 
-   // and numbers replaced by nothing (it's the case for diacritics), then
 
-   // we'll obtain the normalized string: abcccdefgh.
 
-   // So here the reverse map is: [0,1,2,2,2,3,6,7,11,12].
 
-   // The goal is to obtain the array: [[0, 0], [3, -1], [4, -2],
 
-   // [6, 0], [8, 3]].
 
-   // which can be used like this:
 
-   //  - let say that i is the index in new string and j the index
 
-   //    the old string.
 
-   //  - if i is in [0; 3[ then j = i + 0
 
-   //  - if i is in [3; 4[ then j = i - 1
 
-   //  - if i is in [4; 6[ then j = i - 2
 
-   //  ...
 
-   // Thanks to a binary search it's easy to know where is i and what's the
 
-   // shift.
 
-   // Let say that the last entry in the array is [x, s] and we have a
 
-   // substitution at index y (old string) which will replace o chars by n chars.
 
-   // Firstly, if o === n, then no need to add a new entry: the shift is
 
-   // the same.
 
-   // Secondly, if o < n, then we push the n - o elements:
 
-   // [y - (s - 1), s - 1], [y - (s - 2), s - 2], ...
 
-   // Thirdly, if o > n, then we push the element: [y - (s - n), o + s - n]
 
-   // Collect diacritics length and positions.
 
-   const rawDiacriticsPositions = [];
 
-   while ((m = DIACRITICS_REG_EXP.exec(text)) !== null) {
 
-     rawDiacriticsPositions.push([m[0].length, m.index]);
 
-   }
 
-   let normalized = text.normalize("NFD");
 
-   const positions = [[0, 0]];
 
-   let rawDiacriticsIndex = 0;
 
-   let syllableIndex = 0;
 
-   let shift = 0;
 
-   let shiftOrigin = 0;
 
-   let eol = 0;
 
-   let hasDiacritics = false;
 
-   normalized = normalized.replace(
 
-     normalizationRegex,
 
-     (match, p1, p2, p3, p4, p5, p6, p7, i) => {
 
-       i -= shiftOrigin;
 
-       if (p1) {
 
-         // Maybe fractions or quotations mark...
 
-         const replacement = CHARACTERS_TO_NORMALIZE[p1];
 
-         const jj = replacement.length;
 
-         for (let j = 1; j < jj; j++) {
 
-           positions.push([i - shift + j, shift - j]);
 
-         }
 
-         shift -= jj - 1;
 
-         return replacement;
 
-       }
 
-       if (p2) {
 
-         // Use the NFKC representation to normalize the char.
 
-         let replacement = NFKC_CHARS_TO_NORMALIZE.get(p2);
 
-         if (!replacement) {
 
-           replacement = p2.normalize("NFKC");
 
-           NFKC_CHARS_TO_NORMALIZE.set(p2, replacement);
 
-         }
 
-         const jj = replacement.length;
 
-         for (let j = 1; j < jj; j++) {
 
-           positions.push([i - shift + j, shift - j]);
 
-         }
 
-         shift -= jj - 1;
 
-         return replacement;
 
-       }
 
-       if (p3) {
 
-         const hasTrailingDashEOL = p3.endsWith("\n");
 
-         const len = hasTrailingDashEOL ? p3.length - 2 : p3.length;
 
-         // Diacritics.
 
-         hasDiacritics = true;
 
-         let jj = len;
 
-         if (i + eol === rawDiacriticsPositions[rawDiacriticsIndex]?.[1]) {
 
-           jj -= rawDiacriticsPositions[rawDiacriticsIndex][0];
 
-           ++rawDiacriticsIndex;
 
-         }
 
-         for (let j = 1; j <= jj; j++) {
 
-           // i is the position of the first diacritic
 
-           // so (i - 1) is the position for the letter before.
 
-           positions.push([i - 1 - shift + j, shift - j]);
 
-         }
 
-         shift -= jj;
 
-         shiftOrigin += jj;
 
-         if (hasTrailingDashEOL) {
 
-           // Diacritics are followed by a -\n.
 
-           // See comments in `if (p4)` block.
 
-           i += len - 1;
 
-           positions.push([i - shift + 1, 1 + shift]);
 
-           shift += 1;
 
-           shiftOrigin += 1;
 
-           eol += 1;
 
-           return p3.slice(0, len);
 
-         }
 
-         return p3;
 
-       }
 
-       if (p4) {
 
-         // "X-\n" is removed because an hyphen at the end of a line
 
-         // with not a space before is likely here to mark a break
 
-         // in a word.
 
-         // The \n isn't in the original text so here y = i, n = 1 and o = 2.
 
-         positions.push([i - shift + 1, 1 + shift]);
 
-         shift += 1;
 
-         shiftOrigin += 1;
 
-         eol += 1;
 
-         return p4.charAt(0);
 
-       }
 
-       if (p5) {
 
-         // An ideographic at the end of a line doesn't imply adding an extra
 
-         // white space.
 
-         positions.push([i - shift + 1, shift]);
 
-         shiftOrigin += 1;
 
-         eol += 1;
 
-         return p5.charAt(0);
 
-       }
 
-       if (p6) {
 
-         // eol is replaced by space: "foo\nbar" is likely equivalent to
 
-         // "foo bar".
 
-         positions.push([i - shift + 1, shift - 1]);
 
-         shift -= 1;
 
-         shiftOrigin += 1;
 
-         eol += 1;
 
-         return " ";
 
-       }
 
-       // p7
 
-       if (i + eol === syllablePositions[syllableIndex]?.[1]) {
 
-         // A syllable (1 char) is replaced with several chars (n) so
 
-         // newCharsLen = n - 1.
 
-         const newCharLen = syllablePositions[syllableIndex][0] - 1;
 
-         ++syllableIndex;
 
-         for (let j = 1; j <= newCharLen; j++) {
 
-           positions.push([i - (shift - j), shift - j]);
 
-         }
 
-         shift -= newCharLen;
 
-         shiftOrigin += newCharLen;
 
-       }
 
-       return p7;
 
-     }
 
-   );
 
-   positions.push([normalized.length, shift]);
 
-   return [normalized, positions, hasDiacritics];
 
- }
 
- // Determine the original, non-normalized, match index such that highlighting of
 
- // search results is correct in the `textLayer` for strings containing e.g. "½"
 
- // characters; essentially "inverting" the result of the `normalize` function.
 
- function getOriginalIndex(diffs, pos, len) {
 
-   if (!diffs) {
 
-     return [pos, len];
 
-   }
 
-   const start = pos;
 
-   const end = pos + len;
 
-   let i = binarySearchFirstItem(diffs, x => x[0] >= start);
 
-   if (diffs[i][0] > start) {
 
-     --i;
 
-   }
 
-   let j = binarySearchFirstItem(diffs, x => x[0] >= end, i);
 
-   if (diffs[j][0] > end) {
 
-     --j;
 
-   }
 
-   return [start + diffs[i][1], len + diffs[j][1] - diffs[i][1]];
 
- }
 
- /**
 
-  * @typedef {Object} PDFFindControllerOptions
 
-  * @property {IPDFLinkService} linkService - The navigation/linking service.
 
-  * @property {EventBus} eventBus - The application event bus.
 
-  */
 
- /**
 
-  * Provides search functionality to find a given string in a PDF document.
 
-  */
 
- class PDFFindController {
 
-   /**
 
-    * @param {PDFFindControllerOptions} options
 
-    */
 
-   constructor({ linkService, eventBus }) {
 
-     this._linkService = linkService;
 
-     this._eventBus = eventBus;
 
-     this.#reset();
 
-     eventBus._on("find", this.#onFind.bind(this));
 
-     eventBus._on("findbarclose", this.#onFindBarClose.bind(this));
 
-   }
 
-   get highlightMatches() {
 
-     return this._highlightMatches;
 
-   }
 
-   get pageMatches() {
 
-     return this._pageMatches;
 
-   }
 
-   get pageMatchesLength() {
 
-     return this._pageMatchesLength;
 
-   }
 
-   get selected() {
 
-     return this._selected;
 
-   }
 
-   get state() {
 
-     return this._state;
 
-   }
 
-   /**
 
-    * Set a reference to the PDF document in order to search it.
 
-    * Note that searching is not possible if this method is not called.
 
-    *
 
-    * @param {PDFDocumentProxy} pdfDocument - The PDF document to search.
 
-    */
 
-   setDocument(pdfDocument) {
 
-     if (this._pdfDocument) {
 
-       this.#reset();
 
-     }
 
-     if (!pdfDocument) {
 
-       return;
 
-     }
 
-     this._pdfDocument = pdfDocument;
 
-     this._firstPageCapability.resolve();
 
-   }
 
-   #onFind(state) {
 
-     if (!state) {
 
-       return;
 
-     }
 
-     const pdfDocument = this._pdfDocument;
 
-     const { type } = state;
 
-     if (this._state === null || this.#shouldDirtyMatch(state)) {
 
-       this._dirtyMatch = true;
 
-     }
 
-     this._state = state;
 
-     if (type !== "highlightallchange") {
 
-       this.#updateUIState(FindState.PENDING);
 
-     }
 
-     this._firstPageCapability.promise.then(() => {
 
-       // If the document was closed before searching began, or if the search
 
-       // operation was relevant for a previously opened document, do nothing.
 
-       if (
 
-         !this._pdfDocument ||
 
-         (pdfDocument && this._pdfDocument !== pdfDocument)
 
-       ) {
 
-         return;
 
-       }
 
-       this.#extractText();
 
-       const findbarClosed = !this._highlightMatches;
 
-       const pendingTimeout = !!this._findTimeout;
 
-       if (this._findTimeout) {
 
-         clearTimeout(this._findTimeout);
 
-         this._findTimeout = null;
 
-       }
 
-       if (!type) {
 
-         // Trigger the find action with a small delay to avoid starting the
 
-         // search when the user is still typing (saving resources).
 
-         this._findTimeout = setTimeout(() => {
 
-           this.#nextMatch();
 
-           this._findTimeout = null;
 
-         }, FIND_TIMEOUT);
 
-       } else if (this._dirtyMatch) {
 
-         // Immediately trigger searching for non-'find' operations, when the
 
-         // current state needs to be reset and matches re-calculated.
 
-         this.#nextMatch();
 
-       } else if (type === "again") {
 
-         this.#nextMatch();
 
-         // When the findbar was previously closed, and `highlightAll` is set,
 
-         // ensure that the matches on all active pages are highlighted again.
 
-         if (findbarClosed && this._state.highlightAll) {
 
-           this.#updateAllPages();
 
-         }
 
-       } else if (type === "highlightallchange") {
 
-         // If there was a pending search operation, synchronously trigger a new
 
-         // search *first* to ensure that the correct matches are highlighted.
 
-         if (pendingTimeout) {
 
-           this.#nextMatch();
 
-         } else {
 
-           this._highlightMatches = true;
 
-         }
 
-         this.#updateAllPages(); // Update the highlighting on all active pages.
 
-       } else {
 
-         this.#nextMatch();
 
-       }
 
-     });
 
-   }
 
-   scrollMatchIntoView({
 
-     element = null,
 
-     selectedLeft = 0,
 
-     pageIndex = -1,
 
-     matchIndex = -1,
 
-   }) {
 
-     if (!this._scrollMatches || !element) {
 
-       return;
 
-     } else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
 
-       return;
 
-     } else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
 
-       return;
 
-     }
 
-     this._scrollMatches = false; // Ensure that scrolling only happens once.
 
-     const spot = {
 
-       top: MATCH_SCROLL_OFFSET_TOP,
 
-       left: selectedLeft + MATCH_SCROLL_OFFSET_LEFT,
 
-     };
 
-     scrollIntoView(element, spot, /* scrollMatches = */ true);
 
-   }
 
-   #reset() {
 
-     this._highlightMatches = false;
 
-     this._scrollMatches = false;
 
-     this._pdfDocument = null;
 
-     this._pageMatches = [];
 
-     this._pageMatchesLength = [];
 
-     this._state = null;
 
-     // Currently selected match.
 
-     this._selected = {
 
-       pageIdx: -1,
 
-       matchIdx: -1,
 
-     };
 
-     // Where the find algorithm currently is in the document.
 
-     this._offset = {
 
-       pageIdx: null,
 
-       matchIdx: null,
 
-       wrapped: false,
 
-     };
 
-     this._extractTextPromises = [];
 
-     this._pageContents = []; // Stores the normalized text for each page.
 
-     this._pageDiffs = [];
 
-     this._hasDiacritics = [];
 
-     this._matchesCountTotal = 0;
 
-     this._pagesToSearch = null;
 
-     this._pendingFindMatches = new Set();
 
-     this._resumePageIdx = null;
 
-     this._dirtyMatch = false;
 
-     clearTimeout(this._findTimeout);
 
-     this._findTimeout = null;
 
-     this._firstPageCapability = createPromiseCapability();
 
-   }
 
-   /**
 
-    * @type {string} The (current) normalized search query.
 
-    */
 
-   get #query() {
 
-     if (this._state.query !== this._rawQuery) {
 
-       this._rawQuery = this._state.query;
 
-       [this._normalizedQuery] = normalize(this._state.query);
 
-     }
 
-     return this._normalizedQuery;
 
-   }
 
-   #shouldDirtyMatch(state) {
 
-     // When the search query changes, regardless of the actual search command
 
-     // used, always re-calculate matches to avoid errors (fixes bug 1030622).
 
-     if (state.query !== this._state.query) {
 
-       return true;
 
-     }
 
-     switch (state.type) {
 
-       case "again":
 
-         const pageNumber = this._selected.pageIdx + 1;
 
-         const linkService = this._linkService;
 
-         // Only treat a 'findagain' event as a new search operation when it's
 
-         // *absolutely* certain that the currently selected match is no longer
 
-         // visible, e.g. as a result of the user scrolling in the document.
 
-         //
 
-         // NOTE: If only a simple `this._linkService.page` check was used here,
 
-         // there's a risk that consecutive 'findagain' operations could "skip"
 
-         // over matches at the top/bottom of pages thus making them completely
 
-         // inaccessible when there's multiple pages visible in the viewer.
 
-         if (
 
-           pageNumber >= 1 &&
 
-           pageNumber <= linkService.pagesCount &&
 
-           pageNumber !== linkService.page &&
 
-           !linkService.isPageVisible(pageNumber)
 
-         ) {
 
-           return true;
 
-         }
 
-         return false;
 
-       case "highlightallchange":
 
-         return false;
 
-     }
 
-     return true;
 
-   }
 
-   /**
 
-    * Determine if the search query constitutes a "whole word", by comparing the
 
-    * first/last character type with the preceding/following character type.
 
-    */
 
-   #isEntireWord(content, startIdx, length) {
 
-     let match = content
 
-       .slice(0, startIdx)
 
-       .match(NOT_DIACRITIC_FROM_END_REG_EXP);
 
-     if (match) {
 
-       const first = content.charCodeAt(startIdx);
 
-       const limit = match[1].charCodeAt(0);
 
-       if (getCharacterType(first) === getCharacterType(limit)) {
 
-         return false;
 
-       }
 
-     }
 
-     match = content
 
-       .slice(startIdx + length)
 
-       .match(NOT_DIACRITIC_FROM_START_REG_EXP);
 
-     if (match) {
 
-       const last = content.charCodeAt(startIdx + length - 1);
 
-       const limit = match[1].charCodeAt(0);
 
-       if (getCharacterType(last) === getCharacterType(limit)) {
 
-         return false;
 
-       }
 
-     }
 
-     return true;
 
-   }
 
-   #calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
 
-     const matches = [],
 
-       matchesLength = [];
 
-     const diffs = this._pageDiffs[pageIndex];
 
-     let match;
 
-     while ((match = query.exec(pageContent)) !== null) {
 
-       if (
 
-         entireWord &&
 
-         !this.#isEntireWord(pageContent, match.index, match[0].length)
 
-       ) {
 
-         continue;
 
-       }
 
-       const [matchPos, matchLen] = getOriginalIndex(
 
-         diffs,
 
-         match.index,
 
-         match[0].length
 
-       );
 
-       if (matchLen) {
 
-         matches.push(matchPos);
 
-         matchesLength.push(matchLen);
 
-       }
 
-     }
 
-     this._pageMatches[pageIndex] = matches;
 
-     this._pageMatchesLength[pageIndex] = matchesLength;
 
-   }
 
-   #convertToRegExpString(query, hasDiacritics) {
 
-     const { matchDiacritics } = this._state;
 
-     let isUnicode = false;
 
-     query = query.replace(
 
-       SPECIAL_CHARS_REG_EXP,
 
-       (
 
-         match,
 
-         p1 /* to escape */,
 
-         p2 /* punctuation */,
 
-         p3 /* whitespaces */,
 
-         p4 /* diacritics */,
 
-         p5 /* letters */
 
-       ) => {
 
-         // We don't need to use a \s for whitespaces since all the different
 
-         // kind of whitespaces are replaced by a single " ".
 
-         if (p1) {
 
-           // Escape characters like *+?... to not interfer with regexp syntax.
 
-           return `[ ]*\\${p1}[ ]*`;
 
-         }
 
-         if (p2) {
 
-           // Allow whitespaces around punctuation signs.
 
-           return `[ ]*${p2}[ ]*`;
 
-         }
 
-         if (p3) {
 
-           // Replace spaces by \s+ to be sure to match any spaces.
 
-           return "[ ]+";
 
-         }
 
-         if (matchDiacritics) {
 
-           return p4 || p5;
 
-         }
 
-         if (p4) {
 
-           // Diacritics are removed with few exceptions.
 
-           return DIACRITICS_EXCEPTION.has(p4.charCodeAt(0)) ? p4 : "";
 
-         }
 
-         // A letter has been matched and it can be followed by any diacritics
 
-         // in normalized text.
 
-         if (hasDiacritics) {
 
-           isUnicode = true;
 
-           return `${p5}\\p{M}*`;
 
-         }
 
-         return p5;
 
-       }
 
-     );
 
-     const trailingSpaces = "[ ]*";
 
-     if (query.endsWith(trailingSpaces)) {
 
-       // The [ ]* has been added in order to help to match "foo . bar" but
 
-       // it doesn't make sense to match some whitespaces after the dot
 
-       // when it's the last character.
 
-       query = query.slice(0, query.length - trailingSpaces.length);
 
-     }
 
-     if (matchDiacritics) {
 
-       // aX must not match aXY.
 
-       if (hasDiacritics) {
 
-         DIACRITICS_EXCEPTION_STR ||= String.fromCharCode(
 
-           ...DIACRITICS_EXCEPTION
 
-         );
 
-         isUnicode = true;
 
-         query = `${query}(?=[${DIACRITICS_EXCEPTION_STR}]|[^\\p{M}]|$)`;
 
-       }
 
-     }
 
-     return [isUnicode, query];
 
-   }
 
-   #calculateMatch(pageIndex) {
 
-     let query = this.#query;
 
-     if (query.length === 0) {
 
-       // Do nothing: the matches should be wiped out already.
 
-       return;
 
-     }
 
-     const { caseSensitive, entireWord, phraseSearch } = this._state;
 
-     const pageContent = this._pageContents[pageIndex];
 
-     const hasDiacritics = this._hasDiacritics[pageIndex];
 
-     let isUnicode = false;
 
-     if (phraseSearch) {
 
-       [isUnicode, query] = this.#convertToRegExpString(query, hasDiacritics);
 
-     } else {
 
-       // Words are sorted in reverse order to be sure that "foobar" is matched
 
-       // before "foo" in case the query is "foobar foo".
 
-       const match = query.match(/\S+/g);
 
-       if (match) {
 
-         query = match
 
-           .sort()
 
-           .reverse()
 
-           .map(q => {
 
-             const [isUnicodePart, queryPart] = this.#convertToRegExpString(
 
-               q,
 
-               hasDiacritics
 
-             );
 
-             isUnicode ||= isUnicodePart;
 
-             return `(${queryPart})`;
 
-           })
 
-           .join("|");
 
-       }
 
-     }
 
-     const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
 
-     query = new RegExp(query, flags);
 
-     this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
 
-     // When `highlightAll` is set, ensure that the matches on previously
 
-     // rendered (and still active) pages are correctly highlighted.
 
-     if (this._state.highlightAll) {
 
-       this.#updatePage(pageIndex);
 
-     }
 
-     if (this._resumePageIdx === pageIndex) {
 
-       this._resumePageIdx = null;
 
-       this.#nextPageMatch();
 
-     }
 
-     // Update the match count.
 
-     const pageMatchesCount = this._pageMatches[pageIndex].length;
 
-     if (pageMatchesCount > 0) {
 
-       this._matchesCountTotal += pageMatchesCount;
 
-       this.#updateUIResultsCount();
 
-     }
 
-   }
 
-   #extractText() {
 
-     // Perform text extraction once if this method is called multiple times.
 
-     if (this._extractTextPromises.length > 0) {
 
-       return;
 
-     }
 
-     let promise = Promise.resolve();
 
-     for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
 
-       const extractTextCapability = createPromiseCapability();
 
-       this._extractTextPromises[i] = extractTextCapability.promise;
 
-       promise = promise.then(() => {
 
-         return this._pdfDocument
 
-           .getPage(i + 1)
 
-           .then(pdfPage => {
 
-             return pdfPage.getTextContent();
 
-           })
 
-           .then(
 
-             textContent => {
 
-               const strBuf = [];
 
-               for (const textItem of textContent.items) {
 
-                 strBuf.push(textItem.str);
 
-                 if (textItem.hasEOL) {
 
-                   strBuf.push("\n");
 
-                 }
 
-               }
 
-               // Store the normalized page content (text items) as one string.
 
-               [
 
-                 this._pageContents[i],
 
-                 this._pageDiffs[i],
 
-                 this._hasDiacritics[i],
 
-               ] = normalize(strBuf.join(""));
 
-               extractTextCapability.resolve();
 
-             },
 
-             reason => {
 
-               console.error(
 
-                 `Unable to get text content for page ${i + 1}`,
 
-                 reason
 
-               );
 
-               // Page error -- assuming no text content.
 
-               this._pageContents[i] = "";
 
-               this._pageDiffs[i] = null;
 
-               this._hasDiacritics[i] = false;
 
-               extractTextCapability.resolve();
 
-             }
 
-           );
 
-       });
 
-     }
 
-   }
 
-   #updatePage(index) {
 
-     if (this._scrollMatches && this._selected.pageIdx === index) {
 
-       // If the page is selected, scroll the page into view, which triggers
 
-       // rendering the page, which adds the text layer. Once the text layer
 
-       // is built, it will attempt to scroll the selected match into view.
 
-       this._linkService.page = index + 1;
 
-     }
 
-     this._eventBus.dispatch("updatetextlayermatches", {
 
-       source: this,
 
-       pageIndex: index,
 
-     });
 
-   }
 
-   #updateAllPages() {
 
-     this._eventBus.dispatch("updatetextlayermatches", {
 
-       source: this,
 
-       pageIndex: -1,
 
-     });
 
-   }
 
-   #nextMatch() {
 
-     const previous = this._state.findPrevious;
 
-     const currentPageIndex = this._linkService.page - 1;
 
-     const numPages = this._linkService.pagesCount;
 
-     this._highlightMatches = true;
 
-     if (this._dirtyMatch) {
 
-       // Need to recalculate the matches, reset everything.
 
-       this._dirtyMatch = false;
 
-       this._selected.pageIdx = this._selected.matchIdx = -1;
 
-       this._offset.pageIdx = currentPageIndex;
 
-       this._offset.matchIdx = null;
 
-       this._offset.wrapped = false;
 
-       this._resumePageIdx = null;
 
-       this._pageMatches.length = 0;
 
-       this._pageMatchesLength.length = 0;
 
-       this._matchesCountTotal = 0;
 
-       this.#updateAllPages(); // Wipe out any previously highlighted matches.
 
-       for (let i = 0; i < numPages; i++) {
 
-         // Start finding the matches as soon as the text is extracted.
 
-         if (this._pendingFindMatches.has(i)) {
 
-           continue;
 
-         }
 
-         this._pendingFindMatches.add(i);
 
-         this._extractTextPromises[i].then(() => {
 
-           this._pendingFindMatches.delete(i);
 
-           this.#calculateMatch(i);
 
-         });
 
-       }
 
-     }
 
-     // If there's no query there's no point in searching.
 
-     if (this.#query === "") {
 
-       this.#updateUIState(FindState.FOUND);
 
-       return;
 
-     }
 
-     // If we're waiting on a page, we return since we can't do anything else.
 
-     if (this._resumePageIdx) {
 
-       return;
 
-     }
 
-     const offset = this._offset;
 
-     // Keep track of how many pages we should maximally iterate through.
 
-     this._pagesToSearch = numPages;
 
-     // If there's already a `matchIdx` that means we are iterating through a
 
-     // page's matches.
 
-     if (offset.matchIdx !== null) {
 
-       const numPageMatches = this._pageMatches[offset.pageIdx].length;
 
-       if (
 
-         (!previous && offset.matchIdx + 1 < numPageMatches) ||
 
-         (previous && offset.matchIdx > 0)
 
-       ) {
 
-         // The simple case; we just have advance the matchIdx to select
 
-         // the next match on the page.
 
-         offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
 
-         this.#updateMatch(/* found = */ true);
 
-         return;
 
-       }
 
-       // We went beyond the current page's matches, so we advance to
 
-       // the next page.
 
-       this.#advanceOffsetPage(previous);
 
-     }
 
-     // Start searching through the page.
 
-     this.#nextPageMatch();
 
-   }
 
-   #matchesReady(matches) {
 
-     const offset = this._offset;
 
-     const numMatches = matches.length;
 
-     const previous = this._state.findPrevious;
 
-     if (numMatches) {
 
-       // There were matches for the page, so initialize `matchIdx`.
 
-       offset.matchIdx = previous ? numMatches - 1 : 0;
 
-       this.#updateMatch(/* found = */ true);
 
-       return true;
 
-     }
 
-     // No matches, so attempt to search the next page.
 
-     this.#advanceOffsetPage(previous);
 
-     if (offset.wrapped) {
 
-       offset.matchIdx = null;
 
-       if (this._pagesToSearch < 0) {
 
-         // No point in wrapping again, there were no matches.
 
-         this.#updateMatch(/* found = */ false);
 
-         // While matches were not found, searching for a page
 
-         // with matches should nevertheless halt.
 
-         return true;
 
-       }
 
-     }
 
-     // Matches were not found (and searching is not done).
 
-     return false;
 
-   }
 
-   #nextPageMatch() {
 
-     if (this._resumePageIdx !== null) {
 
-       console.error("There can only be one pending page.");
 
-     }
 
-     let matches = null;
 
-     do {
 
-       const pageIdx = this._offset.pageIdx;
 
-       matches = this._pageMatches[pageIdx];
 
-       if (!matches) {
 
-         // The matches don't exist yet for processing by `_matchesReady`,
 
-         // so set a resume point for when they do exist.
 
-         this._resumePageIdx = pageIdx;
 
-         break;
 
-       }
 
-     } while (!this.#matchesReady(matches));
 
-   }
 
-   #advanceOffsetPage(previous) {
 
-     const offset = this._offset;
 
-     const numPages = this._linkService.pagesCount;
 
-     offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
 
-     offset.matchIdx = null;
 
-     this._pagesToSearch--;
 
-     if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
 
-       offset.pageIdx = previous ? numPages - 1 : 0;
 
-       offset.wrapped = true;
 
-     }
 
-   }
 
-   #updateMatch(found = false) {
 
-     let state = FindState.NOT_FOUND;
 
-     const wrapped = this._offset.wrapped;
 
-     this._offset.wrapped = false;
 
-     if (found) {
 
-       const previousPage = this._selected.pageIdx;
 
-       this._selected.pageIdx = this._offset.pageIdx;
 
-       this._selected.matchIdx = this._offset.matchIdx;
 
-       state = wrapped ? FindState.WRAPPED : FindState.FOUND;
 
-       // Update the currently selected page to wipe out any selected matches.
 
-       if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
 
-         this.#updatePage(previousPage);
 
-       }
 
-     }
 
-     this.#updateUIState(state, this._state.findPrevious);
 
-     if (this._selected.pageIdx !== -1) {
 
-       // Ensure that the match will be scrolled into view.
 
-       this._scrollMatches = true;
 
-       this.#updatePage(this._selected.pageIdx);
 
-     }
 
-   }
 
-   #onFindBarClose(evt) {
 
-     const pdfDocument = this._pdfDocument;
 
-     // Since searching is asynchronous, ensure that the removal of highlighted
 
-     // matches (from the UI) is async too such that the 'updatetextlayermatches'
 
-     // events will always be dispatched in the expected order.
 
-     this._firstPageCapability.promise.then(() => {
 
-       // Only update the UI if the document is open, and is the current one.
 
-       if (
 
-         !this._pdfDocument ||
 
-         (pdfDocument && this._pdfDocument !== pdfDocument)
 
-       ) {
 
-         return;
 
-       }
 
-       // Ensure that a pending, not yet started, search operation is aborted.
 
-       if (this._findTimeout) {
 
-         clearTimeout(this._findTimeout);
 
-         this._findTimeout = null;
 
-       }
 
-       // Abort any long running searches, to avoid a match being scrolled into
 
-       // view *after* the findbar has been closed. In this case `this._offset`
 
-       // will most likely differ from `this._selected`, hence we also ensure
 
-       // that any new search operation will always start with a clean slate.
 
-       if (this._resumePageIdx) {
 
-         this._resumePageIdx = null;
 
-         this._dirtyMatch = true;
 
-       }
 
-       // Avoid the UI being in a pending state when the findbar is re-opened.
 
-       this.#updateUIState(FindState.FOUND);
 
-       this._highlightMatches = false;
 
-       this.#updateAllPages(); // Wipe out any previously highlighted matches.
 
-     });
 
-   }
 
-   #requestMatchesCount() {
 
-     const { pageIdx, matchIdx } = this._selected;
 
-     let current = 0,
 
-       total = this._matchesCountTotal;
 
-     if (matchIdx !== -1) {
 
-       for (let i = 0; i < pageIdx; i++) {
 
-         current += this._pageMatches[i]?.length || 0;
 
-       }
 
-       current += matchIdx + 1;
 
-     }
 
-     // When searching starts, this method may be called before the `pageMatches`
 
-     // have been counted (in `_calculateMatch`). Ensure that the UI won't show
 
-     // temporarily broken state when the active find result doesn't make sense.
 
-     if (current < 1 || current > total) {
 
-       current = total = 0;
 
-     }
 
-     return { current, total };
 
-   }
 
-   #updateUIResultsCount() {
 
-     this._eventBus.dispatch("updatefindmatchescount", {
 
-       source: this,
 
-       matchesCount: this.#requestMatchesCount(),
 
-     });
 
-   }
 
-   #updateUIState(state, previous = false) {
 
-     this._eventBus.dispatch("updatefindcontrolstate", {
 
-       source: this,
 
-       state,
 
-       previous,
 
-       matchesCount: this.#requestMatchesCount(),
 
-       rawQuery: this._state?.query ?? null,
 
-     });
 
-   }
 
- }
 
- export { FindState, PDFFindController };
 
 
  |