pdf_find_utils_spec.js 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. /* Copyright 2018 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import { CharacterType, getCharacterType } from "../../web/pdf_find_utils.js";
  16. describe("pdf_find_utils", function () {
  17. describe("getCharacterType", function () {
  18. it("gets expected character types", function () {
  19. const characters = {
  20. A: CharacterType.ALPHA_LETTER,
  21. a: CharacterType.ALPHA_LETTER,
  22. 0: CharacterType.ALPHA_LETTER,
  23. 5: CharacterType.ALPHA_LETTER,
  24. "\xC4": CharacterType.ALPHA_LETTER, // "Ä"
  25. "\xE4": CharacterType.ALPHA_LETTER, // "ä"
  26. _: CharacterType.ALPHA_LETTER,
  27. " ": CharacterType.SPACE,
  28. "\t": CharacterType.SPACE,
  29. "\r": CharacterType.SPACE,
  30. "\n": CharacterType.SPACE,
  31. "\xA0": CharacterType.SPACE, // nbsp
  32. "-": CharacterType.PUNCT,
  33. ",": CharacterType.PUNCT,
  34. ".": CharacterType.PUNCT,
  35. ";": CharacterType.PUNCT,
  36. ":": CharacterType.PUNCT,
  37. "\u2122": CharacterType.ALPHA_LETTER, // trademark
  38. "\u0E25": CharacterType.THAI_LETTER,
  39. "\u4000": CharacterType.HAN_LETTER,
  40. "\uF950": CharacterType.HAN_LETTER,
  41. "\u30C0": CharacterType.KATAKANA_LETTER,
  42. "\u3050": CharacterType.HIRAGANA_LETTER,
  43. "\uFF80": CharacterType.HALFWIDTH_KATAKANA_LETTER,
  44. };
  45. for (const character in characters) {
  46. const charCode = character.charCodeAt(0);
  47. const type = characters[character];
  48. expect(getCharacterType(charCode)).toEqual(type);
  49. }
  50. });
  51. });
  52. });