cmap.js 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044
  1. /* Copyright 2012 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. import {
  16. CMapCompressionType,
  17. FormatError,
  18. unreachable,
  19. warn,
  20. } from "../shared/util.js";
  21. import { Cmd, EOF, isCmd, Name } from "./primitives.js";
  22. import { BaseStream } from "./base_stream.js";
  23. import { Lexer } from "./parser.js";
  24. import { MissingDataException } from "./core_utils.js";
  25. import { Stream } from "./stream.js";
  26. const BUILT_IN_CMAPS = [
  27. // << Start unicode maps.
  28. "Adobe-GB1-UCS2",
  29. "Adobe-CNS1-UCS2",
  30. "Adobe-Japan1-UCS2",
  31. "Adobe-Korea1-UCS2",
  32. // >> End unicode maps.
  33. "78-EUC-H",
  34. "78-EUC-V",
  35. "78-H",
  36. "78-RKSJ-H",
  37. "78-RKSJ-V",
  38. "78-V",
  39. "78ms-RKSJ-H",
  40. "78ms-RKSJ-V",
  41. "83pv-RKSJ-H",
  42. "90ms-RKSJ-H",
  43. "90ms-RKSJ-V",
  44. "90msp-RKSJ-H",
  45. "90msp-RKSJ-V",
  46. "90pv-RKSJ-H",
  47. "90pv-RKSJ-V",
  48. "Add-H",
  49. "Add-RKSJ-H",
  50. "Add-RKSJ-V",
  51. "Add-V",
  52. "Adobe-CNS1-0",
  53. "Adobe-CNS1-1",
  54. "Adobe-CNS1-2",
  55. "Adobe-CNS1-3",
  56. "Adobe-CNS1-4",
  57. "Adobe-CNS1-5",
  58. "Adobe-CNS1-6",
  59. "Adobe-GB1-0",
  60. "Adobe-GB1-1",
  61. "Adobe-GB1-2",
  62. "Adobe-GB1-3",
  63. "Adobe-GB1-4",
  64. "Adobe-GB1-5",
  65. "Adobe-Japan1-0",
  66. "Adobe-Japan1-1",
  67. "Adobe-Japan1-2",
  68. "Adobe-Japan1-3",
  69. "Adobe-Japan1-4",
  70. "Adobe-Japan1-5",
  71. "Adobe-Japan1-6",
  72. "Adobe-Korea1-0",
  73. "Adobe-Korea1-1",
  74. "Adobe-Korea1-2",
  75. "B5-H",
  76. "B5-V",
  77. "B5pc-H",
  78. "B5pc-V",
  79. "CNS-EUC-H",
  80. "CNS-EUC-V",
  81. "CNS1-H",
  82. "CNS1-V",
  83. "CNS2-H",
  84. "CNS2-V",
  85. "ETHK-B5-H",
  86. "ETHK-B5-V",
  87. "ETen-B5-H",
  88. "ETen-B5-V",
  89. "ETenms-B5-H",
  90. "ETenms-B5-V",
  91. "EUC-H",
  92. "EUC-V",
  93. "Ext-H",
  94. "Ext-RKSJ-H",
  95. "Ext-RKSJ-V",
  96. "Ext-V",
  97. "GB-EUC-H",
  98. "GB-EUC-V",
  99. "GB-H",
  100. "GB-V",
  101. "GBK-EUC-H",
  102. "GBK-EUC-V",
  103. "GBK2K-H",
  104. "GBK2K-V",
  105. "GBKp-EUC-H",
  106. "GBKp-EUC-V",
  107. "GBT-EUC-H",
  108. "GBT-EUC-V",
  109. "GBT-H",
  110. "GBT-V",
  111. "GBTpc-EUC-H",
  112. "GBTpc-EUC-V",
  113. "GBpc-EUC-H",
  114. "GBpc-EUC-V",
  115. "H",
  116. "HKdla-B5-H",
  117. "HKdla-B5-V",
  118. "HKdlb-B5-H",
  119. "HKdlb-B5-V",
  120. "HKgccs-B5-H",
  121. "HKgccs-B5-V",
  122. "HKm314-B5-H",
  123. "HKm314-B5-V",
  124. "HKm471-B5-H",
  125. "HKm471-B5-V",
  126. "HKscs-B5-H",
  127. "HKscs-B5-V",
  128. "Hankaku",
  129. "Hiragana",
  130. "KSC-EUC-H",
  131. "KSC-EUC-V",
  132. "KSC-H",
  133. "KSC-Johab-H",
  134. "KSC-Johab-V",
  135. "KSC-V",
  136. "KSCms-UHC-H",
  137. "KSCms-UHC-HW-H",
  138. "KSCms-UHC-HW-V",
  139. "KSCms-UHC-V",
  140. "KSCpc-EUC-H",
  141. "KSCpc-EUC-V",
  142. "Katakana",
  143. "NWP-H",
  144. "NWP-V",
  145. "RKSJ-H",
  146. "RKSJ-V",
  147. "Roman",
  148. "UniCNS-UCS2-H",
  149. "UniCNS-UCS2-V",
  150. "UniCNS-UTF16-H",
  151. "UniCNS-UTF16-V",
  152. "UniCNS-UTF32-H",
  153. "UniCNS-UTF32-V",
  154. "UniCNS-UTF8-H",
  155. "UniCNS-UTF8-V",
  156. "UniGB-UCS2-H",
  157. "UniGB-UCS2-V",
  158. "UniGB-UTF16-H",
  159. "UniGB-UTF16-V",
  160. "UniGB-UTF32-H",
  161. "UniGB-UTF32-V",
  162. "UniGB-UTF8-H",
  163. "UniGB-UTF8-V",
  164. "UniJIS-UCS2-H",
  165. "UniJIS-UCS2-HW-H",
  166. "UniJIS-UCS2-HW-V",
  167. "UniJIS-UCS2-V",
  168. "UniJIS-UTF16-H",
  169. "UniJIS-UTF16-V",
  170. "UniJIS-UTF32-H",
  171. "UniJIS-UTF32-V",
  172. "UniJIS-UTF8-H",
  173. "UniJIS-UTF8-V",
  174. "UniJIS2004-UTF16-H",
  175. "UniJIS2004-UTF16-V",
  176. "UniJIS2004-UTF32-H",
  177. "UniJIS2004-UTF32-V",
  178. "UniJIS2004-UTF8-H",
  179. "UniJIS2004-UTF8-V",
  180. "UniJISPro-UCS2-HW-V",
  181. "UniJISPro-UCS2-V",
  182. "UniJISPro-UTF8-V",
  183. "UniJISX0213-UTF32-H",
  184. "UniJISX0213-UTF32-V",
  185. "UniJISX02132004-UTF32-H",
  186. "UniJISX02132004-UTF32-V",
  187. "UniKS-UCS2-H",
  188. "UniKS-UCS2-V",
  189. "UniKS-UTF16-H",
  190. "UniKS-UTF16-V",
  191. "UniKS-UTF32-H",
  192. "UniKS-UTF32-V",
  193. "UniKS-UTF8-H",
  194. "UniKS-UTF8-V",
  195. "V",
  196. "WP-Symbol",
  197. ];
  198. // Heuristic to avoid hanging the worker-thread for CMap data with ridiculously
  199. // large ranges, such as e.g. 0xFFFFFFFF (fixes issue11922_reduced.pdf).
  200. const MAX_MAP_RANGE = 2 ** 24 - 1; // = 0xFFFFFF
  201. // CMap, not to be confused with TrueType's cmap.
  202. class CMap {
  203. constructor(builtInCMap = false) {
  204. // Codespace ranges are stored as follows:
  205. // [[1BytePairs], [2BytePairs], [3BytePairs], [4BytePairs]]
  206. // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
  207. this.codespaceRanges = [[], [], [], []];
  208. this.numCodespaceRanges = 0;
  209. // Map entries have one of two forms.
  210. // - cid chars are 16-bit unsigned integers, stored as integers.
  211. // - bf chars are variable-length byte sequences, stored as strings, with
  212. // one byte per character.
  213. this._map = [];
  214. this.name = "";
  215. this.vertical = false;
  216. this.useCMap = null;
  217. this.builtInCMap = builtInCMap;
  218. }
  219. addCodespaceRange(n, low, high) {
  220. this.codespaceRanges[n - 1].push(low, high);
  221. this.numCodespaceRanges++;
  222. }
  223. mapCidRange(low, high, dstLow) {
  224. if (high - low > MAX_MAP_RANGE) {
  225. throw new Error("mapCidRange - ignoring data above MAX_MAP_RANGE.");
  226. }
  227. while (low <= high) {
  228. this._map[low++] = dstLow++;
  229. }
  230. }
  231. mapBfRange(low, high, dstLow) {
  232. if (high - low > MAX_MAP_RANGE) {
  233. throw new Error("mapBfRange - ignoring data above MAX_MAP_RANGE.");
  234. }
  235. const lastByte = dstLow.length - 1;
  236. while (low <= high) {
  237. this._map[low++] = dstLow;
  238. // Only the last byte has to be incremented (in the normal case).
  239. const nextCharCode = dstLow.charCodeAt(lastByte) + 1;
  240. if (nextCharCode > 0xff) {
  241. dstLow =
  242. dstLow.substring(0, lastByte - 1) +
  243. String.fromCharCode(dstLow.charCodeAt(lastByte - 1) + 1) +
  244. "\x00";
  245. continue;
  246. }
  247. dstLow =
  248. dstLow.substring(0, lastByte) + String.fromCharCode(nextCharCode);
  249. }
  250. }
  251. mapBfRangeToArray(low, high, array) {
  252. if (high - low > MAX_MAP_RANGE) {
  253. throw new Error("mapBfRangeToArray - ignoring data above MAX_MAP_RANGE.");
  254. }
  255. const ii = array.length;
  256. let i = 0;
  257. while (low <= high && i < ii) {
  258. this._map[low] = array[i++];
  259. ++low;
  260. }
  261. }
  262. // This is used for both bf and cid chars.
  263. mapOne(src, dst) {
  264. this._map[src] = dst;
  265. }
  266. lookup(code) {
  267. return this._map[code];
  268. }
  269. contains(code) {
  270. return this._map[code] !== undefined;
  271. }
  272. forEach(callback) {
  273. // Most maps have fewer than 65536 entries, and for those we use normal
  274. // array iteration. But really sparse tables are possible -- e.g. with
  275. // indices in the *billions*. For such tables we use for..in, which isn't
  276. // ideal because it stringifies the indices for all present elements, but
  277. // it does avoid iterating over every undefined entry.
  278. const map = this._map;
  279. const length = map.length;
  280. if (length <= 0x10000) {
  281. for (let i = 0; i < length; i++) {
  282. if (map[i] !== undefined) {
  283. callback(i, map[i]);
  284. }
  285. }
  286. } else {
  287. for (const i in map) {
  288. callback(i, map[i]);
  289. }
  290. }
  291. }
  292. charCodeOf(value) {
  293. // `Array.prototype.indexOf` is *extremely* inefficient for arrays which
  294. // are both very sparse and very large (see issue8372.pdf).
  295. const map = this._map;
  296. if (map.length <= 0x10000) {
  297. return map.indexOf(value);
  298. }
  299. for (const charCode in map) {
  300. if (map[charCode] === value) {
  301. return charCode | 0;
  302. }
  303. }
  304. return -1;
  305. }
  306. getMap() {
  307. return this._map;
  308. }
  309. readCharCode(str, offset, out) {
  310. let c = 0;
  311. const codespaceRanges = this.codespaceRanges;
  312. // 9.7.6.2 CMap Mapping
  313. // The code length is at most 4.
  314. for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
  315. c = ((c << 8) | str.charCodeAt(offset + n)) >>> 0;
  316. // Check each codespace range to see if it falls within.
  317. const codespaceRange = codespaceRanges[n];
  318. for (let k = 0, kk = codespaceRange.length; k < kk; ) {
  319. const low = codespaceRange[k++];
  320. const high = codespaceRange[k++];
  321. if (c >= low && c <= high) {
  322. out.charcode = c;
  323. out.length = n + 1;
  324. return;
  325. }
  326. }
  327. }
  328. out.charcode = 0;
  329. out.length = 1;
  330. }
  331. getCharCodeLength(charCode) {
  332. const codespaceRanges = this.codespaceRanges;
  333. for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
  334. // Check each codespace range to see if it falls within.
  335. const codespaceRange = codespaceRanges[n];
  336. for (let k = 0, kk = codespaceRange.length; k < kk; ) {
  337. const low = codespaceRange[k++];
  338. const high = codespaceRange[k++];
  339. if (charCode >= low && charCode <= high) {
  340. return n + 1;
  341. }
  342. }
  343. }
  344. return 1;
  345. }
  346. get length() {
  347. return this._map.length;
  348. }
  349. get isIdentityCMap() {
  350. if (!(this.name === "Identity-H" || this.name === "Identity-V")) {
  351. return false;
  352. }
  353. if (this._map.length !== 0x10000) {
  354. return false;
  355. }
  356. for (let i = 0; i < 0x10000; i++) {
  357. if (this._map[i] !== i) {
  358. return false;
  359. }
  360. }
  361. return true;
  362. }
  363. }
  364. // A special case of CMap, where the _map array implicitly has a length of
  365. // 65536 and each element is equal to its index.
  366. class IdentityCMap extends CMap {
  367. constructor(vertical, n) {
  368. super();
  369. this.vertical = vertical;
  370. this.addCodespaceRange(n, 0, 0xffff);
  371. }
  372. mapCidRange(low, high, dstLow) {
  373. unreachable("should not call mapCidRange");
  374. }
  375. mapBfRange(low, high, dstLow) {
  376. unreachable("should not call mapBfRange");
  377. }
  378. mapBfRangeToArray(low, high, array) {
  379. unreachable("should not call mapBfRangeToArray");
  380. }
  381. mapOne(src, dst) {
  382. unreachable("should not call mapCidOne");
  383. }
  384. lookup(code) {
  385. return Number.isInteger(code) && code <= 0xffff ? code : undefined;
  386. }
  387. contains(code) {
  388. return Number.isInteger(code) && code <= 0xffff;
  389. }
  390. forEach(callback) {
  391. for (let i = 0; i <= 0xffff; i++) {
  392. callback(i, i);
  393. }
  394. }
  395. charCodeOf(value) {
  396. return Number.isInteger(value) && value <= 0xffff ? value : -1;
  397. }
  398. getMap() {
  399. // Sometimes identity maps must be instantiated, but it's rare.
  400. const map = new Array(0x10000);
  401. for (let i = 0; i <= 0xffff; i++) {
  402. map[i] = i;
  403. }
  404. return map;
  405. }
  406. get length() {
  407. return 0x10000;
  408. }
  409. // eslint-disable-next-line getter-return
  410. get isIdentityCMap() {
  411. unreachable("should not access .isIdentityCMap");
  412. }
  413. }
  414. const BinaryCMapReader = (function BinaryCMapReaderClosure() {
  415. function hexToInt(a, size) {
  416. let n = 0;
  417. for (let i = 0; i <= size; i++) {
  418. n = (n << 8) | a[i];
  419. }
  420. return n >>> 0;
  421. }
  422. function hexToStr(a, size) {
  423. // This code is hot. Special-case some common values to avoid creating an
  424. // object with subarray().
  425. if (size === 1) {
  426. return String.fromCharCode(a[0], a[1]);
  427. }
  428. if (size === 3) {
  429. return String.fromCharCode(a[0], a[1], a[2], a[3]);
  430. }
  431. return String.fromCharCode.apply(null, a.subarray(0, size + 1));
  432. }
  433. function addHex(a, b, size) {
  434. let c = 0;
  435. for (let i = size; i >= 0; i--) {
  436. c += a[i] + b[i];
  437. a[i] = c & 255;
  438. c >>= 8;
  439. }
  440. }
  441. function incHex(a, size) {
  442. let c = 1;
  443. for (let i = size; i >= 0 && c > 0; i--) {
  444. c += a[i];
  445. a[i] = c & 255;
  446. c >>= 8;
  447. }
  448. }
  449. const MAX_NUM_SIZE = 16;
  450. const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
  451. class BinaryCMapStream {
  452. constructor(data) {
  453. this.buffer = data;
  454. this.pos = 0;
  455. this.end = data.length;
  456. this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
  457. }
  458. readByte() {
  459. if (this.pos >= this.end) {
  460. return -1;
  461. }
  462. return this.buffer[this.pos++];
  463. }
  464. readNumber() {
  465. let n = 0;
  466. let last;
  467. do {
  468. const b = this.readByte();
  469. if (b < 0) {
  470. throw new FormatError("unexpected EOF in bcmap");
  471. }
  472. last = !(b & 0x80);
  473. n = (n << 7) | (b & 0x7f);
  474. } while (!last);
  475. return n;
  476. }
  477. readSigned() {
  478. const n = this.readNumber();
  479. return n & 1 ? ~(n >>> 1) : n >>> 1;
  480. }
  481. readHex(num, size) {
  482. num.set(this.buffer.subarray(this.pos, this.pos + size + 1));
  483. this.pos += size + 1;
  484. }
  485. readHexNumber(num, size) {
  486. let last;
  487. const stack = this.tmpBuf;
  488. let sp = 0;
  489. do {
  490. const b = this.readByte();
  491. if (b < 0) {
  492. throw new FormatError("unexpected EOF in bcmap");
  493. }
  494. last = !(b & 0x80);
  495. stack[sp++] = b & 0x7f;
  496. } while (!last);
  497. let i = size,
  498. buffer = 0,
  499. bufferSize = 0;
  500. while (i >= 0) {
  501. while (bufferSize < 8 && stack.length > 0) {
  502. buffer |= stack[--sp] << bufferSize;
  503. bufferSize += 7;
  504. }
  505. num[i] = buffer & 255;
  506. i--;
  507. buffer >>= 8;
  508. bufferSize -= 8;
  509. }
  510. }
  511. readHexSigned(num, size) {
  512. this.readHexNumber(num, size);
  513. const sign = num[size] & 1 ? 255 : 0;
  514. let c = 0;
  515. for (let i = 0; i <= size; i++) {
  516. c = ((c & 1) << 8) | num[i];
  517. num[i] = (c >> 1) ^ sign;
  518. }
  519. }
  520. readString() {
  521. const len = this.readNumber();
  522. let s = "";
  523. for (let i = 0; i < len; i++) {
  524. s += String.fromCharCode(this.readNumber());
  525. }
  526. return s;
  527. }
  528. }
  529. // eslint-disable-next-line no-shadow
  530. class BinaryCMapReader {
  531. async process(data, cMap, extend) {
  532. const stream = new BinaryCMapStream(data);
  533. const header = stream.readByte();
  534. cMap.vertical = !!(header & 1);
  535. let useCMap = null;
  536. const start = new Uint8Array(MAX_NUM_SIZE);
  537. const end = new Uint8Array(MAX_NUM_SIZE);
  538. const char = new Uint8Array(MAX_NUM_SIZE);
  539. const charCode = new Uint8Array(MAX_NUM_SIZE);
  540. const tmp = new Uint8Array(MAX_NUM_SIZE);
  541. let code;
  542. let b;
  543. while ((b = stream.readByte()) >= 0) {
  544. const type = b >> 5;
  545. if (type === 7) {
  546. // metadata, e.g. comment or usecmap
  547. switch (b & 0x1f) {
  548. case 0:
  549. stream.readString(); // skipping comment
  550. break;
  551. case 1:
  552. useCMap = stream.readString();
  553. break;
  554. }
  555. continue;
  556. }
  557. const sequence = !!(b & 0x10);
  558. const dataSize = b & 15;
  559. if (dataSize + 1 > MAX_NUM_SIZE) {
  560. throw new Error("BinaryCMapReader.process: Invalid dataSize.");
  561. }
  562. const ucs2DataSize = 1;
  563. const subitemsCount = stream.readNumber();
  564. switch (type) {
  565. case 0: // codespacerange
  566. stream.readHex(start, dataSize);
  567. stream.readHexNumber(end, dataSize);
  568. addHex(end, start, dataSize);
  569. cMap.addCodespaceRange(
  570. dataSize + 1,
  571. hexToInt(start, dataSize),
  572. hexToInt(end, dataSize)
  573. );
  574. for (let i = 1; i < subitemsCount; i++) {
  575. incHex(end, dataSize);
  576. stream.readHexNumber(start, dataSize);
  577. addHex(start, end, dataSize);
  578. stream.readHexNumber(end, dataSize);
  579. addHex(end, start, dataSize);
  580. cMap.addCodespaceRange(
  581. dataSize + 1,
  582. hexToInt(start, dataSize),
  583. hexToInt(end, dataSize)
  584. );
  585. }
  586. break;
  587. case 1: // notdefrange
  588. stream.readHex(start, dataSize);
  589. stream.readHexNumber(end, dataSize);
  590. addHex(end, start, dataSize);
  591. stream.readNumber(); // code
  592. // undefined range, skipping
  593. for (let i = 1; i < subitemsCount; i++) {
  594. incHex(end, dataSize);
  595. stream.readHexNumber(start, dataSize);
  596. addHex(start, end, dataSize);
  597. stream.readHexNumber(end, dataSize);
  598. addHex(end, start, dataSize);
  599. stream.readNumber(); // code
  600. // nop
  601. }
  602. break;
  603. case 2: // cidchar
  604. stream.readHex(char, dataSize);
  605. code = stream.readNumber();
  606. cMap.mapOne(hexToInt(char, dataSize), code);
  607. for (let i = 1; i < subitemsCount; i++) {
  608. incHex(char, dataSize);
  609. if (!sequence) {
  610. stream.readHexNumber(tmp, dataSize);
  611. addHex(char, tmp, dataSize);
  612. }
  613. code = stream.readSigned() + (code + 1);
  614. cMap.mapOne(hexToInt(char, dataSize), code);
  615. }
  616. break;
  617. case 3: // cidrange
  618. stream.readHex(start, dataSize);
  619. stream.readHexNumber(end, dataSize);
  620. addHex(end, start, dataSize);
  621. code = stream.readNumber();
  622. cMap.mapCidRange(
  623. hexToInt(start, dataSize),
  624. hexToInt(end, dataSize),
  625. code
  626. );
  627. for (let i = 1; i < subitemsCount; i++) {
  628. incHex(end, dataSize);
  629. if (!sequence) {
  630. stream.readHexNumber(start, dataSize);
  631. addHex(start, end, dataSize);
  632. } else {
  633. start.set(end);
  634. }
  635. stream.readHexNumber(end, dataSize);
  636. addHex(end, start, dataSize);
  637. code = stream.readNumber();
  638. cMap.mapCidRange(
  639. hexToInt(start, dataSize),
  640. hexToInt(end, dataSize),
  641. code
  642. );
  643. }
  644. break;
  645. case 4: // bfchar
  646. stream.readHex(char, ucs2DataSize);
  647. stream.readHex(charCode, dataSize);
  648. cMap.mapOne(
  649. hexToInt(char, ucs2DataSize),
  650. hexToStr(charCode, dataSize)
  651. );
  652. for (let i = 1; i < subitemsCount; i++) {
  653. incHex(char, ucs2DataSize);
  654. if (!sequence) {
  655. stream.readHexNumber(tmp, ucs2DataSize);
  656. addHex(char, tmp, ucs2DataSize);
  657. }
  658. incHex(charCode, dataSize);
  659. stream.readHexSigned(tmp, dataSize);
  660. addHex(charCode, tmp, dataSize);
  661. cMap.mapOne(
  662. hexToInt(char, ucs2DataSize),
  663. hexToStr(charCode, dataSize)
  664. );
  665. }
  666. break;
  667. case 5: // bfrange
  668. stream.readHex(start, ucs2DataSize);
  669. stream.readHexNumber(end, ucs2DataSize);
  670. addHex(end, start, ucs2DataSize);
  671. stream.readHex(charCode, dataSize);
  672. cMap.mapBfRange(
  673. hexToInt(start, ucs2DataSize),
  674. hexToInt(end, ucs2DataSize),
  675. hexToStr(charCode, dataSize)
  676. );
  677. for (let i = 1; i < subitemsCount; i++) {
  678. incHex(end, ucs2DataSize);
  679. if (!sequence) {
  680. stream.readHexNumber(start, ucs2DataSize);
  681. addHex(start, end, ucs2DataSize);
  682. } else {
  683. start.set(end);
  684. }
  685. stream.readHexNumber(end, ucs2DataSize);
  686. addHex(end, start, ucs2DataSize);
  687. stream.readHex(charCode, dataSize);
  688. cMap.mapBfRange(
  689. hexToInt(start, ucs2DataSize),
  690. hexToInt(end, ucs2DataSize),
  691. hexToStr(charCode, dataSize)
  692. );
  693. }
  694. break;
  695. default:
  696. throw new Error(`BinaryCMapReader.process - unknown type: ${type}`);
  697. }
  698. }
  699. if (useCMap) {
  700. return extend(useCMap);
  701. }
  702. return cMap;
  703. }
  704. }
  705. return BinaryCMapReader;
  706. })();
  707. const CMapFactory = (function CMapFactoryClosure() {
  708. function strToInt(str) {
  709. let a = 0;
  710. for (let i = 0; i < str.length; i++) {
  711. a = (a << 8) | str.charCodeAt(i);
  712. }
  713. return a >>> 0;
  714. }
  715. function expectString(obj) {
  716. if (typeof obj !== "string") {
  717. throw new FormatError("Malformed CMap: expected string.");
  718. }
  719. }
  720. function expectInt(obj) {
  721. if (!Number.isInteger(obj)) {
  722. throw new FormatError("Malformed CMap: expected int.");
  723. }
  724. }
  725. function parseBfChar(cMap, lexer) {
  726. while (true) {
  727. let obj = lexer.getObj();
  728. if (obj === EOF) {
  729. break;
  730. }
  731. if (isCmd(obj, "endbfchar")) {
  732. return;
  733. }
  734. expectString(obj);
  735. const src = strToInt(obj);
  736. obj = lexer.getObj();
  737. // TODO are /dstName used?
  738. expectString(obj);
  739. const dst = obj;
  740. cMap.mapOne(src, dst);
  741. }
  742. }
  743. function parseBfRange(cMap, lexer) {
  744. while (true) {
  745. let obj = lexer.getObj();
  746. if (obj === EOF) {
  747. break;
  748. }
  749. if (isCmd(obj, "endbfrange")) {
  750. return;
  751. }
  752. expectString(obj);
  753. const low = strToInt(obj);
  754. obj = lexer.getObj();
  755. expectString(obj);
  756. const high = strToInt(obj);
  757. obj = lexer.getObj();
  758. if (Number.isInteger(obj) || typeof obj === "string") {
  759. const dstLow = Number.isInteger(obj) ? String.fromCharCode(obj) : obj;
  760. cMap.mapBfRange(low, high, dstLow);
  761. } else if (isCmd(obj, "[")) {
  762. obj = lexer.getObj();
  763. const array = [];
  764. while (!isCmd(obj, "]") && obj !== EOF) {
  765. array.push(obj);
  766. obj = lexer.getObj();
  767. }
  768. cMap.mapBfRangeToArray(low, high, array);
  769. } else {
  770. break;
  771. }
  772. }
  773. throw new FormatError("Invalid bf range.");
  774. }
  775. function parseCidChar(cMap, lexer) {
  776. while (true) {
  777. let obj = lexer.getObj();
  778. if (obj === EOF) {
  779. break;
  780. }
  781. if (isCmd(obj, "endcidchar")) {
  782. return;
  783. }
  784. expectString(obj);
  785. const src = strToInt(obj);
  786. obj = lexer.getObj();
  787. expectInt(obj);
  788. const dst = obj;
  789. cMap.mapOne(src, dst);
  790. }
  791. }
  792. function parseCidRange(cMap, lexer) {
  793. while (true) {
  794. let obj = lexer.getObj();
  795. if (obj === EOF) {
  796. break;
  797. }
  798. if (isCmd(obj, "endcidrange")) {
  799. return;
  800. }
  801. expectString(obj);
  802. const low = strToInt(obj);
  803. obj = lexer.getObj();
  804. expectString(obj);
  805. const high = strToInt(obj);
  806. obj = lexer.getObj();
  807. expectInt(obj);
  808. const dstLow = obj;
  809. cMap.mapCidRange(low, high, dstLow);
  810. }
  811. }
  812. function parseCodespaceRange(cMap, lexer) {
  813. while (true) {
  814. let obj = lexer.getObj();
  815. if (obj === EOF) {
  816. break;
  817. }
  818. if (isCmd(obj, "endcodespacerange")) {
  819. return;
  820. }
  821. if (typeof obj !== "string") {
  822. break;
  823. }
  824. const low = strToInt(obj);
  825. obj = lexer.getObj();
  826. if (typeof obj !== "string") {
  827. break;
  828. }
  829. const high = strToInt(obj);
  830. cMap.addCodespaceRange(obj.length, low, high);
  831. }
  832. throw new FormatError("Invalid codespace range.");
  833. }
  834. function parseWMode(cMap, lexer) {
  835. const obj = lexer.getObj();
  836. if (Number.isInteger(obj)) {
  837. cMap.vertical = !!obj;
  838. }
  839. }
  840. function parseCMapName(cMap, lexer) {
  841. const obj = lexer.getObj();
  842. if (obj instanceof Name) {
  843. cMap.name = obj.name;
  844. }
  845. }
  846. async function parseCMap(cMap, lexer, fetchBuiltInCMap, useCMap) {
  847. let previous, embeddedUseCMap;
  848. objLoop: while (true) {
  849. try {
  850. const obj = lexer.getObj();
  851. if (obj === EOF) {
  852. break;
  853. } else if (obj instanceof Name) {
  854. if (obj.name === "WMode") {
  855. parseWMode(cMap, lexer);
  856. } else if (obj.name === "CMapName") {
  857. parseCMapName(cMap, lexer);
  858. }
  859. previous = obj;
  860. } else if (obj instanceof Cmd) {
  861. switch (obj.cmd) {
  862. case "endcmap":
  863. break objLoop;
  864. case "usecmap":
  865. if (previous instanceof Name) {
  866. embeddedUseCMap = previous.name;
  867. }
  868. break;
  869. case "begincodespacerange":
  870. parseCodespaceRange(cMap, lexer);
  871. break;
  872. case "beginbfchar":
  873. parseBfChar(cMap, lexer);
  874. break;
  875. case "begincidchar":
  876. parseCidChar(cMap, lexer);
  877. break;
  878. case "beginbfrange":
  879. parseBfRange(cMap, lexer);
  880. break;
  881. case "begincidrange":
  882. parseCidRange(cMap, lexer);
  883. break;
  884. }
  885. }
  886. } catch (ex) {
  887. if (ex instanceof MissingDataException) {
  888. throw ex;
  889. }
  890. warn("Invalid cMap data: " + ex);
  891. continue;
  892. }
  893. }
  894. if (!useCMap && embeddedUseCMap) {
  895. // Load the useCMap definition from the file only if there wasn't one
  896. // specified.
  897. useCMap = embeddedUseCMap;
  898. }
  899. if (useCMap) {
  900. return extendCMap(cMap, fetchBuiltInCMap, useCMap);
  901. }
  902. return cMap;
  903. }
  904. async function extendCMap(cMap, fetchBuiltInCMap, useCMap) {
  905. cMap.useCMap = await createBuiltInCMap(useCMap, fetchBuiltInCMap);
  906. // If there aren't any code space ranges defined clone all the parent ones
  907. // into this cMap.
  908. if (cMap.numCodespaceRanges === 0) {
  909. const useCodespaceRanges = cMap.useCMap.codespaceRanges;
  910. for (let i = 0; i < useCodespaceRanges.length; i++) {
  911. cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
  912. }
  913. cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
  914. }
  915. // Merge the map into the current one, making sure not to override
  916. // any previously defined entries.
  917. cMap.useCMap.forEach(function (key, value) {
  918. if (!cMap.contains(key)) {
  919. cMap.mapOne(key, cMap.useCMap.lookup(key));
  920. }
  921. });
  922. return cMap;
  923. }
  924. async function createBuiltInCMap(name, fetchBuiltInCMap) {
  925. if (name === "Identity-H") {
  926. return new IdentityCMap(false, 2);
  927. } else if (name === "Identity-V") {
  928. return new IdentityCMap(true, 2);
  929. }
  930. if (!BUILT_IN_CMAPS.includes(name)) {
  931. throw new Error("Unknown CMap name: " + name);
  932. }
  933. if (!fetchBuiltInCMap) {
  934. throw new Error("Built-in CMap parameters are not provided.");
  935. }
  936. const { cMapData, compressionType } = await fetchBuiltInCMap(name);
  937. const cMap = new CMap(true);
  938. if (compressionType === CMapCompressionType.BINARY) {
  939. return new BinaryCMapReader().process(cMapData, cMap, useCMap => {
  940. return extendCMap(cMap, fetchBuiltInCMap, useCMap);
  941. });
  942. }
  943. if (compressionType === CMapCompressionType.NONE) {
  944. const lexer = new Lexer(new Stream(cMapData));
  945. return parseCMap(cMap, lexer, fetchBuiltInCMap, null);
  946. }
  947. throw new Error(`Invalid CMap "compressionType" value: ${compressionType}`);
  948. }
  949. return {
  950. async create(params) {
  951. const encoding = params.encoding;
  952. const fetchBuiltInCMap = params.fetchBuiltInCMap;
  953. const useCMap = params.useCMap;
  954. if (encoding instanceof Name) {
  955. return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
  956. } else if (encoding instanceof BaseStream) {
  957. const parsedCMap = await parseCMap(
  958. /* cMap = */ new CMap(),
  959. /* lexer = */ new Lexer(encoding),
  960. fetchBuiltInCMap,
  961. useCMap
  962. );
  963. if (parsedCMap.isIdentityCMap) {
  964. return createBuiltInCMap(parsedCMap.name, fetchBuiltInCMap);
  965. }
  966. return parsedCMap;
  967. }
  968. throw new Error("Encoding required.");
  969. },
  970. };
  971. })();
  972. export { CMap, CMapFactory, IdentityCMap };