unicode_spec.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /* Copyright 2017 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import {
  16. getCharUnicodeCategory,
  17. getNormalizedUnicodes,
  18. getUnicodeForGlyph,
  19. getUnicodeRangeFor,
  20. mapSpecialUnicodeValues,
  21. reverseIfRtl,
  22. } from "../../src/core/unicode.js";
  23. import {
  24. getDingbatsGlyphsUnicode,
  25. getGlyphsUnicode,
  26. } from "../../src/core/glyphlist.js";
  27. describe("unicode", function () {
  28. describe("mapSpecialUnicodeValues", function () {
  29. it("should not re-map normal Unicode values", function () {
  30. // A
  31. expect(mapSpecialUnicodeValues(0x0041)).toEqual(0x0041);
  32. // fi
  33. expect(mapSpecialUnicodeValues(0xfb01)).toEqual(0xfb01);
  34. });
  35. it("should re-map special Unicode values", function () {
  36. // copyrightsans => copyright
  37. expect(mapSpecialUnicodeValues(0xf8e9)).toEqual(0x00a9);
  38. // Private Use Area characters
  39. expect(mapSpecialUnicodeValues(0xffff)).toEqual(0);
  40. });
  41. });
  42. describe("getCharUnicodeCategory", function () {
  43. it("should correctly determine the character category", function () {
  44. const tests = {
  45. // Whitespace
  46. " ": {
  47. isZeroWidthDiacritic: false,
  48. isInvisibleFormatMark: false,
  49. isWhitespace: true,
  50. },
  51. "\t": {
  52. isZeroWidthDiacritic: false,
  53. isInvisibleFormatMark: false,
  54. isWhitespace: true,
  55. },
  56. "\u2001": {
  57. isZeroWidthDiacritic: false,
  58. isInvisibleFormatMark: false,
  59. isWhitespace: true,
  60. },
  61. "\uFEFF": {
  62. isZeroWidthDiacritic: false,
  63. isInvisibleFormatMark: false,
  64. isWhitespace: true,
  65. },
  66. // Diacritic
  67. "\u0302": {
  68. isZeroWidthDiacritic: true,
  69. isInvisibleFormatMark: false,
  70. isWhitespace: false,
  71. },
  72. "\u0344": {
  73. isZeroWidthDiacritic: true,
  74. isInvisibleFormatMark: false,
  75. isWhitespace: false,
  76. },
  77. "\u0361": {
  78. isZeroWidthDiacritic: true,
  79. isInvisibleFormatMark: false,
  80. isWhitespace: false,
  81. },
  82. // Invisible format mark
  83. "\u200B": {
  84. isZeroWidthDiacritic: false,
  85. isInvisibleFormatMark: true,
  86. isWhitespace: false,
  87. },
  88. "\u200D": {
  89. isZeroWidthDiacritic: false,
  90. isInvisibleFormatMark: true,
  91. isWhitespace: false,
  92. },
  93. // No whitespace or diacritic or invisible format mark
  94. a: {
  95. isZeroWidthDiacritic: false,
  96. isInvisibleFormatMark: false,
  97. isWhitespace: false,
  98. },
  99. 1: {
  100. isZeroWidthDiacritic: false,
  101. isInvisibleFormatMark: false,
  102. isWhitespace: false,
  103. },
  104. };
  105. for (const [character, expectation] of Object.entries(tests)) {
  106. expect(getCharUnicodeCategory(character)).toEqual(expectation);
  107. }
  108. });
  109. });
  110. describe("getUnicodeForGlyph", function () {
  111. let standardMap, dingbatsMap;
  112. beforeAll(function () {
  113. standardMap = getGlyphsUnicode();
  114. dingbatsMap = getDingbatsGlyphsUnicode();
  115. });
  116. afterAll(function () {
  117. standardMap = dingbatsMap = null;
  118. });
  119. it("should get Unicode values for valid glyph names", function () {
  120. expect(getUnicodeForGlyph("A", standardMap)).toEqual(0x0041);
  121. expect(getUnicodeForGlyph("a1", dingbatsMap)).toEqual(0x2701);
  122. });
  123. it("should recover Unicode values from uniXXXX/uXXXX{XX} glyph names", function () {
  124. expect(getUnicodeForGlyph("uni0041", standardMap)).toEqual(0x0041);
  125. expect(getUnicodeForGlyph("u0041", standardMap)).toEqual(0x0041);
  126. expect(getUnicodeForGlyph("uni2701", dingbatsMap)).toEqual(0x2701);
  127. expect(getUnicodeForGlyph("u2701", dingbatsMap)).toEqual(0x2701);
  128. });
  129. it("should not get Unicode values for invalid glyph names", function () {
  130. expect(getUnicodeForGlyph("Qwerty", standardMap)).toEqual(-1);
  131. expect(getUnicodeForGlyph("Qwerty", dingbatsMap)).toEqual(-1);
  132. });
  133. });
  134. describe("getUnicodeRangeFor", function () {
  135. it("should get correct Unicode range", function () {
  136. // A (Basic Latin)
  137. expect(getUnicodeRangeFor(0x0041)).toEqual(0);
  138. // fi (Alphabetic Presentation Forms)
  139. expect(getUnicodeRangeFor(0xfb01)).toEqual(62);
  140. });
  141. it("should not get a Unicode range", function () {
  142. expect(getUnicodeRangeFor(0x05ff)).toEqual(-1);
  143. });
  144. });
  145. describe("getNormalizedUnicodes", function () {
  146. let NormalizedUnicodes;
  147. beforeAll(function () {
  148. NormalizedUnicodes = getNormalizedUnicodes();
  149. });
  150. afterAll(function () {
  151. NormalizedUnicodes = null;
  152. });
  153. it("should get normalized Unicode values for ligatures", function () {
  154. // fi => f + i
  155. expect(NormalizedUnicodes["\uFB01"]).toEqual("fi");
  156. // Arabic
  157. expect(NormalizedUnicodes["\u0675"]).toEqual("\u0627\u0674");
  158. });
  159. it("should not normalize standard characters", function () {
  160. expect(NormalizedUnicodes.A).toEqual(undefined);
  161. });
  162. });
  163. describe("reverseIfRtl", function () {
  164. let NormalizedUnicodes;
  165. function getGlyphUnicode(char) {
  166. if (NormalizedUnicodes[char] !== undefined) {
  167. return NormalizedUnicodes[char];
  168. }
  169. return char;
  170. }
  171. beforeAll(function () {
  172. NormalizedUnicodes = getNormalizedUnicodes();
  173. });
  174. afterAll(function () {
  175. NormalizedUnicodes = null;
  176. });
  177. it("should not reverse LTR characters", function () {
  178. const A = getGlyphUnicode("A");
  179. expect(reverseIfRtl(A)).toEqual("A");
  180. const fi = getGlyphUnicode("\uFB01");
  181. expect(reverseIfRtl(fi)).toEqual("fi");
  182. });
  183. it("should reverse RTL characters", function () {
  184. // Hebrew (no-op, since it's not a combined character)
  185. const heAlef = getGlyphUnicode("\u05D0");
  186. expect(reverseIfRtl(heAlef)).toEqual("\u05D0");
  187. // Arabic
  188. const arAlef = getGlyphUnicode("\u0675");
  189. expect(reverseIfRtl(arAlef)).toEqual("\u0674\u0627");
  190. });
  191. });
  192. });