regenerate.js 34 KB


  1. /*! https://mths.be/regenerate v1.4.2 by @mathias | MIT license */
  2. ;(function(root) {
  3. // Detect free variables `exports`.
  4. var freeExports = typeof exports == 'object' && exports;
  5. // Detect free variable `module`.
  6. var freeModule = typeof module == 'object' && module &&
  7. module.exports == freeExports && module;
  8. // Detect free variable `global`, from Node.js/io.js or Browserified code,
  9. // and use it as `root`.
  10. var freeGlobal = typeof global == 'object' && global;
  11. if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
  12. root = freeGlobal;
  13. }
  14. /*--------------------------------------------------------------------------*/
  15. var ERRORS = {
  16. 'rangeOrder': 'A range\u2019s `stop` value must be greater than or equal ' +
  17. 'to the `start` value.',
  18. 'codePointRange': 'Invalid code point value. Code points range from ' +
  19. 'U+000000 to U+10FFFF.'
  20. };
  21. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs
  22. var HIGH_SURROGATE_MIN = 0xD800;
  23. var HIGH_SURROGATE_MAX = 0xDBFF;
  24. var LOW_SURROGATE_MIN = 0xDC00;
  25. var LOW_SURROGATE_MAX = 0xDFFF;
  26. // In Regenerate output, `\0` is never preceded by `\` because we sort by
  27. // code point value, so let’s keep this regular expression simple.
  28. var regexNull = /\\x00([^0123456789]|$)/g;
  29. var object = {};
  30. var hasOwnProperty = object.hasOwnProperty;
  31. var extend = function(destination, source) {
  32. var key;
  33. for (key in source) {
  34. if (hasOwnProperty.call(source, key)) {
  35. destination[key] = source[key];
  36. }
  37. }
  38. return destination;
  39. };
  40. var forEach = function(array, callback) {
  41. var index = -1;
  42. var length = array.length;
  43. while (++index < length) {
  44. callback(array[index], index);
  45. }
  46. };
  47. var toString = object.toString;
  48. var isArray = function(value) {
  49. return toString.call(value) == '[object Array]';
  50. };
  51. var isNumber = function(value) {
  52. return typeof value == 'number' ||
  53. toString.call(value) == '[object Number]';
  54. };
  55. // This assumes that `number` is a positive integer that `toString()`s nicely
  56. // (which is the case for all code point values).
  57. var zeroes = '0000';
  58. var pad = function(number, totalCharacters) {
  59. var string = String(number);
  60. return string.length < totalCharacters
  61. ? (zeroes + string).slice(-totalCharacters)
  62. : string;
  63. };
  64. var hex = function(number) {
  65. return Number(number).toString(16).toUpperCase();
  66. };
  67. var slice = [].slice;
  68. /*--------------------------------------------------------------------------*/
  69. var dataFromCodePoints = function(codePoints) {
  70. var index = -1;
  71. var length = codePoints.length;
  72. var max = length - 1;
  73. var result = [];
  74. var isStart = true;
  75. var tmp;
  76. var previous = 0;
  77. while (++index < length) {
  78. tmp = codePoints[index];
  79. if (isStart) {
  80. result.push(tmp);
  81. previous = tmp;
  82. isStart = false;
  83. } else {
  84. if (tmp == previous + 1) {
  85. if (index != max) {
  86. previous = tmp;
  87. continue;
  88. } else {
  89. isStart = true;
  90. result.push(tmp + 1);
  91. }
  92. } else {
  93. // End the previous range and start a new one.
  94. result.push(previous + 1, tmp);
  95. previous = tmp;
  96. }
  97. }
  98. }
  99. if (!isStart) {
  100. result.push(tmp + 1);
  101. }
  102. return result;
  103. };
  104. var dataRemove = function(data, codePoint) {
  105. // Iterate over the data per `(start, end)` pair.
  106. var index = 0;
  107. var start;
  108. var end;
  109. var length = data.length;
  110. while (index < length) {
  111. start = data[index];
  112. end = data[index + 1];
  113. if (codePoint >= start && codePoint < end) {
  114. // Modify this pair.
  115. if (codePoint == start) {
  116. if (end == start + 1) {
  117. // Just remove `start` and `end`.
  118. data.splice(index, 2);
  119. return data;
  120. } else {
  121. // Just replace `start` with a new value.
  122. data[index] = codePoint + 1;
  123. return data;
  124. }
  125. } else if (codePoint == end - 1) {
  126. // Just replace `end` with a new value.
  127. data[index + 1] = codePoint;
  128. return data;
  129. } else {
  130. // Replace `[start, end]` with `[startA, endA, startB, endB]`.
  131. data.splice(index, 2, start, codePoint, codePoint + 1, end);
  132. return data;
  133. }
  134. }
  135. index += 2;
  136. }
  137. return data;
  138. };
  139. var dataRemoveRange = function(data, rangeStart, rangeEnd) {
  140. if (rangeEnd < rangeStart) {
  141. throw Error(ERRORS.rangeOrder);
  142. }
  143. // Iterate over the data per `(start, end)` pair.
  144. var index = 0;
  145. var start;
  146. var end;
  147. while (index < data.length) {
  148. start = data[index];
  149. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  150. // Exit as soon as no more matching pairs can be found.
  151. if (start > rangeEnd) {
  152. return data;
  153. }
  154. // Check if this range pair is equal to, or forms a subset of, the range
  155. // to be removed.
  156. // E.g. we have `[0, 11, 40, 51]` and want to remove 0-10 → `[40, 51]`.
  157. // E.g. we have `[40, 51]` and want to remove 0-100 → `[]`.
  158. if (rangeStart <= start && rangeEnd >= end) {
  159. // Remove this pair.
  160. data.splice(index, 2);
  161. continue;
  162. }
  163. // Check if both `rangeStart` and `rangeEnd` are within the bounds of
  164. // this pair.
  165. // E.g. we have `[0, 11]` and want to remove 4-6 → `[0, 4, 7, 11]`.
  166. if (rangeStart >= start && rangeEnd < end) {
  167. if (rangeStart == start) {
  168. // Replace `[start, end]` with `[startB, endB]`.
  169. data[index] = rangeEnd + 1;
  170. data[index + 1] = end + 1;
  171. return data;
  172. }
  173. // Replace `[start, end]` with `[startA, endA, startB, endB]`.
  174. data.splice(index, 2, start, rangeStart, rangeEnd + 1, end + 1);
  175. return data;
  176. }
  177. // Check if only `rangeStart` is within the bounds of this pair.
  178. // E.g. we have `[0, 11]` and want to remove 4-20 → `[0, 4]`.
  179. if (rangeStart >= start && rangeStart <= end) {
  180. // Replace `end` with `rangeStart`.
  181. data[index + 1] = rangeStart;
  182. // Note: we cannot `return` just yet, in case any following pairs still
  183. // contain matching code points.
  184. // E.g. we have `[0, 11, 14, 31]` and want to remove 4-20
  185. // → `[0, 4, 21, 31]`.
  186. }
  187. // Check if only `rangeEnd` is within the bounds of this pair.
  188. // E.g. we have `[14, 31]` and want to remove 4-20 → `[21, 31]`.
  189. else if (rangeEnd >= start && rangeEnd <= end) {
  190. // Just replace `start`.
  191. data[index] = rangeEnd + 1;
  192. return data;
  193. }
  194. index += 2;
  195. }
  196. return data;
  197. };
  198. var dataAdd = function(data, codePoint) {
  199. // Iterate over the data per `(start, end)` pair.
  200. var index = 0;
  201. var start;
  202. var end;
  203. var lastIndex = null;
  204. var length = data.length;
  205. if (codePoint < 0x0 || codePoint > 0x10FFFF) {
  206. throw RangeError(ERRORS.codePointRange);
  207. }
  208. while (index < length) {
  209. start = data[index];
  210. end = data[index + 1];
  211. // Check if the code point is already in the set.
  212. if (codePoint >= start && codePoint < end) {
  213. return data;
  214. }
  215. if (codePoint == start - 1) {
  216. // Just replace `start` with a new value.
  217. data[index] = codePoint;
  218. return data;
  219. }
  220. // At this point, if `start` is `greater` than `codePoint`, insert a new
  221. // `[start, end]` pair before the current pair, or after the current pair
  222. // if there is a known `lastIndex`.
  223. if (start > codePoint) {
  224. data.splice(
  225. lastIndex != null ? lastIndex + 2 : 0,
  226. 0,
  227. codePoint,
  228. codePoint + 1
  229. );
  230. return data;
  231. }
  232. if (codePoint == end) {
  233. // Check if adding this code point causes two separate ranges to become
  234. // a single range, e.g. `dataAdd([0, 4, 5, 10], 4)` → `[0, 10]`.
  235. if (codePoint + 1 == data[index + 2]) {
  236. data.splice(index, 4, start, data[index + 3]);
  237. return data;
  238. }
  239. // Else, just replace `end` with a new value.
  240. data[index + 1] = codePoint + 1;
  241. return data;
  242. }
  243. lastIndex = index;
  244. index += 2;
  245. }
  246. // The loop has finished; add the new pair to the end of the data set.
  247. data.push(codePoint, codePoint + 1);
  248. return data;
  249. };
  250. var dataAddData = function(dataA, dataB) {
  251. // Iterate over the data per `(start, end)` pair.
  252. var index = 0;
  253. var start;
  254. var end;
  255. var data = dataA.slice();
  256. var length = dataB.length;
  257. while (index < length) {
  258. start = dataB[index];
  259. end = dataB[index + 1] - 1;
  260. if (start == end) {
  261. data = dataAdd(data, start);
  262. } else {
  263. data = dataAddRange(data, start, end);
  264. }
  265. index += 2;
  266. }
  267. return data;
  268. };
  269. var dataRemoveData = function(dataA, dataB) {
  270. // Iterate over the data per `(start, end)` pair.
  271. var index = 0;
  272. var start;
  273. var end;
  274. var data = dataA.slice();
  275. var length = dataB.length;
  276. while (index < length) {
  277. start = dataB[index];
  278. end = dataB[index + 1] - 1;
  279. if (start == end) {
  280. data = dataRemove(data, start);
  281. } else {
  282. data = dataRemoveRange(data, start, end);
  283. }
  284. index += 2;
  285. }
  286. return data;
  287. };
  288. var dataAddRange = function(data, rangeStart, rangeEnd) {
  289. if (rangeEnd < rangeStart) {
  290. throw Error(ERRORS.rangeOrder);
  291. }
  292. if (
  293. rangeStart < 0x0 || rangeStart > 0x10FFFF ||
  294. rangeEnd < 0x0 || rangeEnd > 0x10FFFF
  295. ) {
  296. throw RangeError(ERRORS.codePointRange);
  297. }
  298. // Iterate over the data per `(start, end)` pair.
  299. var index = 0;
  300. var start;
  301. var end;
  302. var added = false;
  303. var length = data.length;
  304. while (index < length) {
  305. start = data[index];
  306. end = data[index + 1];
  307. if (added) {
  308. // The range has already been added to the set; at this point, we just
  309. // need to get rid of the following ranges in case they overlap.
  310. // Check if this range can be combined with the previous range.
  311. if (start == rangeEnd + 1) {
  312. data.splice(index - 1, 2);
  313. return data;
  314. }
  315. // Exit as soon as no more possibly overlapping pairs can be found.
  316. if (start > rangeEnd) {
  317. return data;
  318. }
  319. // E.g. `[0, 11, 12, 16]` and we’ve added 5-15, so we now have
  320. // `[0, 16, 12, 16]`. Remove the `12,16` part, as it lies within the
  321. // `0,16` range that was previously added.
  322. if (start >= rangeStart && start <= rangeEnd) {
  323. // `start` lies within the range that was previously added.
  324. if (end > rangeStart && end - 1 <= rangeEnd) {
  325. // `end` lies within the range that was previously added as well,
  326. // so remove this pair.
  327. data.splice(index, 2);
  328. index -= 2;
  329. // Note: we cannot `return` just yet, as there may still be other
  330. // overlapping pairs.
  331. } else {
  332. // `start` lies within the range that was previously added, but
  333. // `end` doesn’t. E.g. `[0, 11, 12, 31]` and we’ve added 5-15, so
  334. // now we have `[0, 16, 12, 31]`. This must be written as `[0, 31]`.
  335. // Remove the previously added `end` and the current `start`.
  336. data.splice(index - 1, 2);
  337. index -= 2;
  338. }
  339. // Note: we cannot return yet.
  340. }
  341. }
  342. else if (start == rangeEnd + 1 || start == rangeEnd) {
  343. data[index] = rangeStart;
  344. return data;
  345. }
  346. // Check if a new pair must be inserted *before* the current one.
  347. else if (start > rangeEnd) {
  348. data.splice(index, 0, rangeStart, rangeEnd + 1);
  349. return data;
  350. }
  351. else if (rangeStart >= start && rangeStart < end && rangeEnd + 1 <= end) {
  352. // The new range lies entirely within an existing range pair. No action
  353. // needed.
  354. return data;
  355. }
  356. else if (
  357. // E.g. `[0, 11]` and you add 5-15 → `[0, 16]`.
  358. (rangeStart >= start && rangeStart < end) ||
  359. // E.g. `[0, 3]` and you add 3-6 → `[0, 7]`.
  360. end == rangeStart
  361. ) {
  362. // Replace `end` with the new value.
  363. data[index + 1] = rangeEnd + 1;
  364. // Make sure the next range pair doesn’t overlap, e.g. `[0, 11, 12, 14]`
  365. // and you add 5-15 → `[0, 16]`, i.e. remove the `12,14` part.
  366. added = true;
  367. // Note: we cannot `return` just yet.
  368. }
  369. else if (rangeStart <= start && rangeEnd + 1 >= end) {
  370. // The new range is a superset of the old range.
  371. data[index] = rangeStart;
  372. data[index + 1] = rangeEnd + 1;
  373. added = true;
  374. }
  375. index += 2;
  376. }
  377. // The loop has finished without doing anything; add the new pair to the end
  378. // of the data set.
  379. if (!added) {
  380. data.push(rangeStart, rangeEnd + 1);
  381. }
  382. return data;
  383. };
  384. var dataContains = function(data, codePoint) {
  385. var index = 0;
  386. var length = data.length;
  387. // Exit early if `codePoint` is not within `data`’s overall range.
  388. var start = data[index];
  389. var end = data[length - 1];
  390. if (length >= 2) {
  391. if (codePoint < start || codePoint > end) {
  392. return false;
  393. }
  394. }
  395. // Iterate over the data per `(start, end)` pair.
  396. while (index < length) {
  397. start = data[index];
  398. end = data[index + 1];
  399. if (codePoint >= start && codePoint < end) {
  400. return true;
  401. }
  402. index += 2;
  403. }
  404. return false;
  405. };
  406. var dataIntersection = function(data, codePoints) {
  407. var index = 0;
  408. var length = codePoints.length;
  409. var codePoint;
  410. var result = [];
  411. while (index < length) {
  412. codePoint = codePoints[index];
  413. if (dataContains(data, codePoint)) {
  414. result.push(codePoint);
  415. }
  416. ++index;
  417. }
  418. return dataFromCodePoints(result);
  419. };
  420. var dataIsEmpty = function(data) {
  421. return !data.length;
  422. };
  423. var dataIsSingleton = function(data) {
  424. // Check if the set only represents a single code point.
  425. return data.length == 2 && data[0] + 1 == data[1];
  426. };
  427. var dataToArray = function(data) {
  428. // Iterate over the data per `(start, end)` pair.
  429. var index = 0;
  430. var start;
  431. var end;
  432. var result = [];
  433. var length = data.length;
  434. while (index < length) {
  435. start = data[index];
  436. end = data[index + 1];
  437. while (start < end) {
  438. result.push(start);
  439. ++start;
  440. }
  441. index += 2;
  442. }
  443. return result;
  444. };
  445. /*--------------------------------------------------------------------------*/
  446. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  447. var floor = Math.floor;
  448. var highSurrogate = function(codePoint) {
  449. return parseInt(
  450. floor((codePoint - 0x10000) / 0x400) + HIGH_SURROGATE_MIN,
  451. 10
  452. );
  453. };
  454. var lowSurrogate = function(codePoint) {
  455. return parseInt(
  456. (codePoint - 0x10000) % 0x400 + LOW_SURROGATE_MIN,
  457. 10
  458. );
  459. };
  460. var stringFromCharCode = String.fromCharCode;
  461. var codePointToString = function(codePoint) {
  462. var string;
  463. // https://mathiasbynens.be/notes/javascript-escapes#single
  464. // Note: the `\b` escape sequence for U+0008 BACKSPACE in strings has a
  465. // different meaning in regular expressions (word boundary), so it cannot
  466. // be used here.
  467. if (codePoint == 0x09) {
  468. string = '\\t';
  469. }
  470. // Note: IE < 9 treats `'\v'` as `'v'`, so avoid using it.
  471. // else if (codePoint == 0x0B) {
  472. // string = '\\v';
  473. // }
  474. else if (codePoint == 0x0A) {
  475. string = '\\n';
  476. }
  477. else if (codePoint == 0x0C) {
  478. string = '\\f';
  479. }
  480. else if (codePoint == 0x0D) {
  481. string = '\\r';
  482. }
  483. else if (codePoint == 0x2D) {
  484. // https://mathiasbynens.be/notes/javascript-escapes#hexadecimal
  485. // Note: `-` (U+002D HYPHEN-MINUS) is escaped in this way rather
  486. // than by backslash-escaping, in case the output is used outside
  487. // of a character class in a `u` RegExp. /\-/u throws, but
  488. // /\x2D/u is fine.
  489. string = '\\x2D';
  490. }
  491. else if (codePoint == 0x5C) {
  492. string = '\\\\';
  493. }
  494. else if (
  495. codePoint == 0x24 ||
  496. (codePoint >= 0x28 && codePoint <= 0x2B) ||
  497. codePoint == 0x2E || codePoint == 0x2F ||
  498. codePoint == 0x3F ||
  499. (codePoint >= 0x5B && codePoint <= 0x5E) ||
  500. (codePoint >= 0x7B && codePoint <= 0x7D)
  501. ) {
  502. // The code point maps to an unsafe printable ASCII character;
  503. // backslash-escape it. Here’s the list of those symbols:
  504. //
  505. // $()*+./?[\]^{|}
  506. //
  507. // This matches SyntaxCharacters as well as `/` (U+002F SOLIDUS).
  508. // https://tc39.github.io/ecma262/#prod-SyntaxCharacter
  509. string = '\\' + stringFromCharCode(codePoint);
  510. }
  511. else if (codePoint >= 0x20 && codePoint <= 0x7E) {
  512. // The code point maps to one of these printable ASCII symbols
  513. // (including the space character):
  514. //
  515. // !"#%&',/0123456789:;<=>@ABCDEFGHIJKLMNO
  516. // PQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~
  517. //
  518. // These can safely be used directly.
  519. string = stringFromCharCode(codePoint);
  520. }
  521. else if (codePoint <= 0xFF) {
  522. string = '\\x' + pad(hex(codePoint), 2);
  523. }
  524. else { // `codePoint <= 0xFFFF` holds true.
  525. // https://mathiasbynens.be/notes/javascript-escapes#unicode
  526. string = '\\u' + pad(hex(codePoint), 4);
  527. }
  528. // There’s no need to account for astral symbols / surrogate pairs here,
  529. // since `codePointToString` is private and only used for BMP code points.
  530. // But if that’s what you need, just add an `else` block with this code:
  531. //
  532. // string = '\\u' + pad(hex(highSurrogate(codePoint)), 4)
  533. // + '\\u' + pad(hex(lowSurrogate(codePoint)), 4);
  534. return string;
  535. };
  536. var codePointToStringUnicode = function(codePoint) {
  537. if (codePoint <= 0xFFFF) {
  538. return codePointToString(codePoint);
  539. }
  540. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  541. };
  542. var symbolToCodePoint = function(symbol) {
  543. var length = symbol.length;
  544. var first = symbol.charCodeAt(0);
  545. var second;
  546. if (
  547. first >= HIGH_SURROGATE_MIN && first <= HIGH_SURROGATE_MAX &&
  548. length > 1 // There is a next code unit.
  549. ) {
  550. // `first` is a high surrogate, and there is a next character. Assume
  551. // it’s a low surrogate (else it’s invalid usage of Regenerate anyway).
  552. second = symbol.charCodeAt(1);
  553. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  554. return (first - HIGH_SURROGATE_MIN) * 0x400 +
  555. second - LOW_SURROGATE_MIN + 0x10000;
  556. }
  557. return first;
  558. };
  559. var createBMPCharacterClasses = function(data) {
  560. // Iterate over the data per `(start, end)` pair.
  561. var result = '';
  562. var index = 0;
  563. var start;
  564. var end;
  565. var length = data.length;
  566. if (dataIsSingleton(data)) {
  567. return codePointToString(data[0]);
  568. }
  569. while (index < length) {
  570. start = data[index];
  571. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  572. if (start == end) {
  573. result += codePointToString(start);
  574. } else if (start + 1 == end) {
  575. result += codePointToString(start) + codePointToString(end);
  576. } else {
  577. result += codePointToString(start) + '-' + codePointToString(end);
  578. }
  579. index += 2;
  580. }
  581. return '[' + result + ']';
  582. };
  583. var createUnicodeCharacterClasses = function(data) {
  584. // Iterate over the data per `(start, end)` pair.
  585. var result = '';
  586. var index = 0;
  587. var start;
  588. var end;
  589. var length = data.length;
  590. if (dataIsSingleton(data)) {
  591. return codePointToStringUnicode(data[0]);
  592. }
  593. while (index < length) {
  594. start = data[index];
  595. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  596. if (start == end) {
  597. result += codePointToStringUnicode(start);
  598. } else if (start + 1 == end) {
  599. result += codePointToStringUnicode(start) + codePointToStringUnicode(end);
  600. } else {
  601. result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end);
  602. }
  603. index += 2;
  604. }
  605. return '[' + result + ']';
  606. };
  607. var splitAtBMP = function(data) {
  608. // Iterate over the data per `(start, end)` pair.
  609. var loneHighSurrogates = [];
  610. var loneLowSurrogates = [];
  611. var bmp = [];
  612. var astral = [];
  613. var index = 0;
  614. var start;
  615. var end;
  616. var length = data.length;
  617. while (index < length) {
  618. start = data[index];
  619. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  620. if (start < HIGH_SURROGATE_MIN) {
  621. // The range starts and ends before the high surrogate range.
  622. // E.g. (0, 0x10).
  623. if (end < HIGH_SURROGATE_MIN) {
  624. bmp.push(start, end + 1);
  625. }
  626. // The range starts before the high surrogate range and ends within it.
  627. // E.g. (0, 0xD855).
  628. if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) {
  629. bmp.push(start, HIGH_SURROGATE_MIN);
  630. loneHighSurrogates.push(HIGH_SURROGATE_MIN, end + 1);
  631. }
  632. // The range starts before the high surrogate range and ends in the low
  633. // surrogate range. E.g. (0, 0xDCFF).
  634. if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
  635. bmp.push(start, HIGH_SURROGATE_MIN);
  636. loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1);
  637. loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1);
  638. }
  639. // The range starts before the high surrogate range and ends after the
  640. // low surrogate range. E.g. (0, 0x10FFFF).
  641. if (end > LOW_SURROGATE_MAX) {
  642. bmp.push(start, HIGH_SURROGATE_MIN);
  643. loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1);
  644. loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1);
  645. if (end <= 0xFFFF) {
  646. bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
  647. } else {
  648. bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
  649. astral.push(0xFFFF + 1, end + 1);
  650. }
  651. }
  652. } else if (start >= HIGH_SURROGATE_MIN && start <= HIGH_SURROGATE_MAX) {
  653. // The range starts and ends in the high surrogate range.
  654. // E.g. (0xD855, 0xD866).
  655. if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) {
  656. loneHighSurrogates.push(start, end + 1);
  657. }
  658. // The range starts in the high surrogate range and ends in the low
  659. // surrogate range. E.g. (0xD855, 0xDCFF).
  660. if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
  661. loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1);
  662. loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1);
  663. }
  664. // The range starts in the high surrogate range and ends after the low
  665. // surrogate range. E.g. (0xD855, 0x10FFFF).
  666. if (end > LOW_SURROGATE_MAX) {
  667. loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1);
  668. loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1);
  669. if (end <= 0xFFFF) {
  670. bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
  671. } else {
  672. bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
  673. astral.push(0xFFFF + 1, end + 1);
  674. }
  675. }
  676. } else if (start >= LOW_SURROGATE_MIN && start <= LOW_SURROGATE_MAX) {
  677. // The range starts and ends in the low surrogate range.
  678. // E.g. (0xDCFF, 0xDDFF).
  679. if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
  680. loneLowSurrogates.push(start, end + 1);
  681. }
  682. // The range starts in the low surrogate range and ends after the low
  683. // surrogate range. E.g. (0xDCFF, 0x10FFFF).
  684. if (end > LOW_SURROGATE_MAX) {
  685. loneLowSurrogates.push(start, LOW_SURROGATE_MAX + 1);
  686. if (end <= 0xFFFF) {
  687. bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
  688. } else {
  689. bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
  690. astral.push(0xFFFF + 1, end + 1);
  691. }
  692. }
  693. } else if (start > LOW_SURROGATE_MAX && start <= 0xFFFF) {
  694. // The range starts and ends after the low surrogate range.
  695. // E.g. (0xFFAA, 0x10FFFF).
  696. if (end <= 0xFFFF) {
  697. bmp.push(start, end + 1);
  698. } else {
  699. bmp.push(start, 0xFFFF + 1);
  700. astral.push(0xFFFF + 1, end + 1);
  701. }
  702. } else {
  703. // The range starts and ends in the astral range.
  704. astral.push(start, end + 1);
  705. }
  706. index += 2;
  707. }
  708. return {
  709. 'loneHighSurrogates': loneHighSurrogates,
  710. 'loneLowSurrogates': loneLowSurrogates,
  711. 'bmp': bmp,
  712. 'astral': astral
  713. };
  714. };
  715. var optimizeSurrogateMappings = function(surrogateMappings) {
  716. var result = [];
  717. var tmpLow = [];
  718. var addLow = false;
  719. var mapping;
  720. var nextMapping;
  721. var highSurrogates;
  722. var lowSurrogates;
  723. var nextHighSurrogates;
  724. var nextLowSurrogates;
  725. var index = -1;
  726. var length = surrogateMappings.length;
  727. while (++index < length) {
  728. mapping = surrogateMappings[index];
  729. nextMapping = surrogateMappings[index + 1];
  730. if (!nextMapping) {
  731. result.push(mapping);
  732. continue;
  733. }
  734. highSurrogates = mapping[0];
  735. lowSurrogates = mapping[1];
  736. nextHighSurrogates = nextMapping[0];
  737. nextLowSurrogates = nextMapping[1];
  738. // Check for identical high surrogate ranges.
  739. tmpLow = lowSurrogates;
  740. while (
  741. nextHighSurrogates &&
  742. highSurrogates[0] == nextHighSurrogates[0] &&
  743. highSurrogates[1] == nextHighSurrogates[1]
  744. ) {
  745. // Merge with the next item.
  746. if (dataIsSingleton(nextLowSurrogates)) {
  747. tmpLow = dataAdd(tmpLow, nextLowSurrogates[0]);
  748. } else {
  749. tmpLow = dataAddRange(
  750. tmpLow,
  751. nextLowSurrogates[0],
  752. nextLowSurrogates[1] - 1
  753. );
  754. }
  755. ++index;
  756. mapping = surrogateMappings[index];
  757. highSurrogates = mapping[0];
  758. lowSurrogates = mapping[1];
  759. nextMapping = surrogateMappings[index + 1];
  760. nextHighSurrogates = nextMapping && nextMapping[0];
  761. nextLowSurrogates = nextMapping && nextMapping[1];
  762. addLow = true;
  763. }
  764. result.push([
  765. highSurrogates,
  766. addLow ? tmpLow : lowSurrogates
  767. ]);
  768. addLow = false;
  769. }
  770. return optimizeByLowSurrogates(result);
  771. };
  772. var optimizeByLowSurrogates = function(surrogateMappings) {
  773. if (surrogateMappings.length == 1) {
  774. return surrogateMappings;
  775. }
  776. var index = -1;
  777. var innerIndex = -1;
  778. while (++index < surrogateMappings.length) {
  779. var mapping = surrogateMappings[index];
  780. var lowSurrogates = mapping[1];
  781. var lowSurrogateStart = lowSurrogates[0];
  782. var lowSurrogateEnd = lowSurrogates[1];
  783. innerIndex = index; // Note: the loop starts at the next index.
  784. while (++innerIndex < surrogateMappings.length) {
  785. var otherMapping = surrogateMappings[innerIndex];
  786. var otherLowSurrogates = otherMapping[1];
  787. var otherLowSurrogateStart = otherLowSurrogates[0];
  788. var otherLowSurrogateEnd = otherLowSurrogates[1];
  789. if (
  790. lowSurrogateStart == otherLowSurrogateStart &&
  791. lowSurrogateEnd == otherLowSurrogateEnd &&
  792. otherLowSurrogates.length === 2
  793. ) {
  794. // Add the code points in the other item to this one.
  795. if (dataIsSingleton(otherMapping[0])) {
  796. mapping[0] = dataAdd(mapping[0], otherMapping[0][0]);
  797. } else {
  798. mapping[0] = dataAddRange(
  799. mapping[0],
  800. otherMapping[0][0],
  801. otherMapping[0][1] - 1
  802. );
  803. }
  804. // Remove the other, now redundant, item.
  805. surrogateMappings.splice(innerIndex, 1);
  806. --innerIndex;
  807. }
  808. }
  809. }
  810. return surrogateMappings;
  811. };
  812. var surrogateSet = function(data) {
  813. // Exit early if `data` is an empty set.
  814. if (!data.length) {
  815. return [];
  816. }
  817. // Iterate over the data per `(start, end)` pair.
  818. var index = 0;
  819. var start;
  820. var end;
  821. var startHigh;
  822. var startLow;
  823. var endHigh;
  824. var endLow;
  825. var surrogateMappings = [];
  826. var length = data.length;
  827. while (index < length) {
  828. start = data[index];
  829. end = data[index + 1] - 1;
  830. startHigh = highSurrogate(start);
  831. startLow = lowSurrogate(start);
  832. endHigh = highSurrogate(end);
  833. endLow = lowSurrogate(end);
  834. var startsWithLowestLowSurrogate = startLow == LOW_SURROGATE_MIN;
  835. var endsWithHighestLowSurrogate = endLow == LOW_SURROGATE_MAX;
  836. var complete = false;
  837. // Append the previous high-surrogate-to-low-surrogate mappings.
  838. // Step 1: `(startHigh, startLow)` to `(startHigh, LOW_SURROGATE_MAX)`.
  839. if (
  840. startHigh == endHigh ||
  841. startsWithLowestLowSurrogate && endsWithHighestLowSurrogate
  842. ) {
  843. surrogateMappings.push([
  844. [startHigh, endHigh + 1],
  845. [startLow, endLow + 1]
  846. ]);
  847. complete = true;
  848. } else {
  849. surrogateMappings.push([
  850. [startHigh, startHigh + 1],
  851. [startLow, LOW_SURROGATE_MAX + 1]
  852. ]);
  853. }
  854. // Step 2: `(startHigh + 1, LOW_SURROGATE_MIN)` to
  855. // `(endHigh - 1, LOW_SURROGATE_MAX)`.
  856. if (!complete && startHigh + 1 < endHigh) {
  857. if (endsWithHighestLowSurrogate) {
  858. // Combine step 2 and step 3.
  859. surrogateMappings.push([
  860. [startHigh + 1, endHigh + 1],
  861. [LOW_SURROGATE_MIN, endLow + 1]
  862. ]);
  863. complete = true;
  864. } else {
  865. surrogateMappings.push([
  866. [startHigh + 1, endHigh],
  867. [LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1]
  868. ]);
  869. }
  870. }
  871. // Step 3. `(endHigh, LOW_SURROGATE_MIN)` to `(endHigh, endLow)`.
  872. if (!complete) {
  873. surrogateMappings.push([
  874. [endHigh, endHigh + 1],
  875. [LOW_SURROGATE_MIN, endLow + 1]
  876. ]);
  877. }
  878. index += 2;
  879. }
  880. // The format of `surrogateMappings` is as follows:
  881. //
  882. // [ surrogateMapping1, surrogateMapping2 ]
  883. //
  884. // i.e.:
  885. //
  886. // [
  887. // [ highSurrogates1, lowSurrogates1 ],
  888. // [ highSurrogates2, lowSurrogates2 ]
  889. // ]
  890. return optimizeSurrogateMappings(surrogateMappings);
  891. };
  892. var createSurrogateCharacterClasses = function(surrogateMappings) {
  893. var result = [];
  894. forEach(surrogateMappings, function(surrogateMapping) {
  895. var highSurrogates = surrogateMapping[0];
  896. var lowSurrogates = surrogateMapping[1];
  897. result.push(
  898. createBMPCharacterClasses(highSurrogates) +
  899. createBMPCharacterClasses(lowSurrogates)
  900. );
  901. });
  902. return result.join('|');
  903. };
  904. var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) {
  905. if (hasUnicodeFlag) {
  906. return createUnicodeCharacterClasses(data);
  907. }
  908. var result = [];
  909. var parts = splitAtBMP(data);
  910. var loneHighSurrogates = parts.loneHighSurrogates;
  911. var loneLowSurrogates = parts.loneLowSurrogates;
  912. var bmp = parts.bmp;
  913. var astral = parts.astral;
  914. var hasLoneHighSurrogates = !dataIsEmpty(loneHighSurrogates);
  915. var hasLoneLowSurrogates = !dataIsEmpty(loneLowSurrogates);
  916. var surrogateMappings = surrogateSet(astral);
  917. if (bmpOnly) {
  918. bmp = dataAddData(bmp, loneHighSurrogates);
  919. hasLoneHighSurrogates = false;
  920. bmp = dataAddData(bmp, loneLowSurrogates);
  921. hasLoneLowSurrogates = false;
  922. }
  923. if (!dataIsEmpty(bmp)) {
  924. // The data set contains BMP code points that are not high surrogates
  925. // needed for astral code points in the set.
  926. result.push(createBMPCharacterClasses(bmp));
  927. }
  928. if (surrogateMappings.length) {
  929. // The data set contains astral code points; append character classes
  930. // based on their surrogate pairs.
  931. result.push(createSurrogateCharacterClasses(surrogateMappings));
  932. }
  933. // https://gist.github.com/mathiasbynens/bbe7f870208abcfec860
  934. if (hasLoneHighSurrogates) {
  935. result.push(
  936. createBMPCharacterClasses(loneHighSurrogates) +
  937. // Make sure the high surrogates aren’t part of a surrogate pair.
  938. '(?![\\uDC00-\\uDFFF])'
  939. );
  940. }
  941. if (hasLoneLowSurrogates) {
  942. result.push(
  943. // It is not possible to accurately assert the low surrogates aren’t
  944. // part of a surrogate pair, since JavaScript regular expressions do
  945. // not support lookbehind.
  946. '(?:[^\\uD800-\\uDBFF]|^)' +
  947. createBMPCharacterClasses(loneLowSurrogates)
  948. );
  949. }
  950. return result.join('|');
  951. };
  952. /*--------------------------------------------------------------------------*/
  953. // `regenerate` can be used as a constructor (and new methods can be added to
  954. // its prototype) but also as a regular function, the latter of which is the
  955. // documented and most common usage. For that reason, it’s not capitalized.
  956. var regenerate = function(value) {
  957. if (arguments.length > 1) {
  958. value = slice.call(arguments);
  959. }
  960. if (this instanceof regenerate) {
  961. this.data = [];
  962. return value ? this.add(value) : this;
  963. }
  964. return (new regenerate).add(value);
  965. };
  966. regenerate.version = '1.4.2';
  967. var proto = regenerate.prototype;
  968. extend(proto, {
  969. 'add': function(value) {
  970. var $this = this;
  971. if (value == null) {
  972. return $this;
  973. }
  974. if (value instanceof regenerate) {
  975. // Allow passing other Regenerate instances.
  976. $this.data = dataAddData($this.data, value.data);
  977. return $this;
  978. }
  979. if (arguments.length > 1) {
  980. value = slice.call(arguments);
  981. }
  982. if (isArray(value)) {
  983. forEach(value, function(item) {
  984. $this.add(item);
  985. });
  986. return $this;
  987. }
  988. $this.data = dataAdd(
  989. $this.data,
  990. isNumber(value) ? value : symbolToCodePoint(value)
  991. );
  992. return $this;
  993. },
  994. 'remove': function(value) {
  995. var $this = this;
  996. if (value == null) {
  997. return $this;
  998. }
  999. if (value instanceof regenerate) {
  1000. // Allow passing other Regenerate instances.
  1001. $this.data = dataRemoveData($this.data, value.data);
  1002. return $this;
  1003. }
  1004. if (arguments.length > 1) {
  1005. value = slice.call(arguments);
  1006. }
  1007. if (isArray(value)) {
  1008. forEach(value, function(item) {
  1009. $this.remove(item);
  1010. });
  1011. return $this;
  1012. }
  1013. $this.data = dataRemove(
  1014. $this.data,
  1015. isNumber(value) ? value : symbolToCodePoint(value)
  1016. );
  1017. return $this;
  1018. },
  1019. 'addRange': function(start, end) {
  1020. var $this = this;
  1021. $this.data = dataAddRange($this.data,
  1022. isNumber(start) ? start : symbolToCodePoint(start),
  1023. isNumber(end) ? end : symbolToCodePoint(end)
  1024. );
  1025. return $this;
  1026. },
  1027. 'removeRange': function(start, end) {
  1028. var $this = this;
  1029. var startCodePoint = isNumber(start) ? start : symbolToCodePoint(start);
  1030. var endCodePoint = isNumber(end) ? end : symbolToCodePoint(end);
  1031. $this.data = dataRemoveRange(
  1032. $this.data,
  1033. startCodePoint,
  1034. endCodePoint
  1035. );
  1036. return $this;
  1037. },
  1038. 'intersection': function(argument) {
  1039. var $this = this;
  1040. // Allow passing other Regenerate instances.
  1041. // TODO: Optimize this by writing and using `dataIntersectionData()`.
  1042. var array = argument instanceof regenerate ?
  1043. dataToArray(argument.data) :
  1044. argument;
  1045. $this.data = dataIntersection($this.data, array);
  1046. return $this;
  1047. },
  1048. 'contains': function(codePoint) {
  1049. return dataContains(
  1050. this.data,
  1051. isNumber(codePoint) ? codePoint : symbolToCodePoint(codePoint)
  1052. );
  1053. },
  1054. 'clone': function() {
  1055. var set = new regenerate;
  1056. set.data = this.data.slice(0);
  1057. return set;
  1058. },
  1059. 'toString': function(options) {
  1060. var result = createCharacterClassesFromData(
  1061. this.data,
  1062. options ? options.bmpOnly : false,
  1063. options ? options.hasUnicodeFlag : false
  1064. );
  1065. if (!result) {
  1066. // For an empty set, return something that can be inserted `/here/` to
  1067. // form a valid regular expression. Avoid `(?:)` since that matches the
  1068. // empty string.
  1069. return '[]';
  1070. }
  1071. // Use `\0` instead of `\x00` where possible.
  1072. return result.replace(regexNull, '\\0$1');
  1073. },
  1074. 'toRegExp': function(flags) {
  1075. var pattern = this.toString(
  1076. flags && flags.indexOf('u') != -1 ?
  1077. { 'hasUnicodeFlag': true } :
  1078. null
  1079. );
  1080. return RegExp(pattern, flags || '');
  1081. },
  1082. 'valueOf': function() { // Note: `valueOf` is aliased as `toArray`.
  1083. return dataToArray(this.data);
  1084. }
  1085. });
  1086. proto.toArray = proto.valueOf;
  1087. // Some AMD build optimizers, like r.js, check for specific condition patterns
  1088. // like the following:
  1089. if (
  1090. typeof define == 'function' &&
  1091. typeof define.amd == 'object' &&
  1092. define.amd
  1093. ) {
  1094. define(function() {
  1095. return regenerate;
  1096. });
  1097. } else if (freeExports && !freeExports.nodeType) {
  1098. if (freeModule) { // in Node.js, io.js, or RingoJS v0.8.0+
  1099. freeModule.exports = regenerate;
  1100. } else { // in Narwhal or RingoJS v0.7.0-
  1101. freeExports.regenerate = regenerate;
  1102. }
  1103. } else { // in Rhino or a web browser
  1104. root.regenerate = regenerate;
  1105. }
  1106. }(this));