rewrite-pattern.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. 'use strict';
  2. const generate = require('regjsgen').generate;
  3. const parse = require('regjsparser').parse;
  4. const regenerate = require('regenerate');
  5. const unicodeMatchProperty = require('unicode-match-property-ecmascript');
  6. const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
  7. const iuMappings = require('./data/iu-mappings.js');
  8. const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
  9. function flatMap(array, callback) {
  10. const result = [];
  11. array.forEach(item => {
  12. const res = callback(item);
  13. if (Array.isArray(res)) {
  14. result.push.apply(result, res);
  15. } else {
  16. result.push(res);
  17. }
  18. });
  19. return result;
  20. }
  21. const SPECIAL_CHARS = new Set('\\^$.*+?()[]{}|'.split(''));
  22. // Prepare a Regenerate set containing all code points, used for negative
  23. // character classes (if any).
  24. const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);
  25. // Prepare a Regenerate set containing all code points that are supposed to be
  26. // matched by `/./u`. https://mths.be/es6#sec-atom
  27. const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
  28. .remove(
  29. // minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
  30. 0x000A, // Line Feed <LF>
  31. 0x000D, // Carriage Return <CR>
  32. 0x2028, // Line Separator <LS>
  33. 0x2029 // Paragraph Separator <PS>
  34. );
  35. const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {
  36. if (unicode) {
  37. if (ignoreCase) {
  38. return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
  39. }
  40. return ESCAPE_SETS.UNICODE.get(character);
  41. }
  42. return ESCAPE_SETS.REGULAR.get(character);
  43. };
  44. const getUnicodeDotSet = (dotAll) => {
  45. return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
  46. };
  47. const getUnicodePropertyValueSet = (property, value) => {
  48. const path = value ?
  49. `${ property }/${ value }` :
  50. `Binary_Property/${ property }`;
  51. try {
  52. return require(`regenerate-unicode-properties/${ path }.js`);
  53. } catch (exception) {
  54. throw new Error(
  55. `Failed to recognize value \`${ value }\` for property ` +
  56. `\`${ property }\`.`
  57. );
  58. }
  59. };
  60. const handleLoneUnicodePropertyNameOrValue = (value) => {
  61. // It could be a `General_Category` value or a binary property.
  62. // Note: `unicodeMatchPropertyValue` throws on invalid values.
  63. try {
  64. const property = 'General_Category';
  65. const category = unicodeMatchPropertyValue(property, value);
  66. return getUnicodePropertyValueSet(property, category);
  67. } catch (exception) {}
  68. // It’s not a `General_Category` value, so check if it’s a property
  69. // of strings.
  70. try {
  71. return getUnicodePropertyValueSet('Property_of_Strings', value);
  72. } catch (exception) {}
  73. // Lastly, check if it’s a binary property of single code points.
  74. // Note: `unicodeMatchProperty` throws on invalid properties.
  75. const property = unicodeMatchProperty(value);
  76. return getUnicodePropertyValueSet(property);
  77. };
  78. const getUnicodePropertyEscapeSet = (value, isNegative) => {
  79. const parts = value.split('=');
  80. const firstPart = parts[0];
  81. let set;
  82. if (parts.length == 1) {
  83. set = handleLoneUnicodePropertyNameOrValue(firstPart);
  84. } else {
  85. // The pattern consists of two parts, i.e. `Property=Value`.
  86. const property = unicodeMatchProperty(firstPart);
  87. const value = unicodeMatchPropertyValue(property, parts[1]);
  88. set = getUnicodePropertyValueSet(property, value);
  89. }
  90. if (isNegative) {
  91. if (set.strings) {
  92. throw new Error('Cannot negate Unicode property of strings');
  93. }
  94. return {
  95. characters: UNICODE_SET.clone().remove(set.characters),
  96. strings: new Set()
  97. };
  98. }
  99. return {
  100. characters: set.characters.clone(),
  101. strings: set.strings
  102. // We need to escape strings like *️⃣ to make sure that they can be safelu used in unions
  103. ? new Set(set.strings.map(str => SPECIAL_CHARS.has(str[0]) ? `\\${str}` : str))
  104. : new Set()
  105. };
  106. };
  107. const getUnicodePropertyEscapeCharacterClassData = (property, isNegative) => {
  108. const set = getUnicodePropertyEscapeSet(property, isNegative);
  109. const data = getCharacterClassEmptyData();
  110. data.singleChars = set.characters;
  111. if (set.strings.size > 0) {
  112. data.longStrings = set.strings;
  113. data.maybeIncludesStrings = true;
  114. }
  115. return data;
  116. };
  117. // Given a range of code points, add any case-folded code points in that range
  118. // to a set.
  119. regenerate.prototype.iuAddRange = function(min, max) {
  120. const $this = this;
  121. do {
  122. const folded = caseFold(min);
  123. if (folded) {
  124. $this.add(folded);
  125. }
  126. } while (++min <= max);
  127. return $this;
  128. };
  129. regenerate.prototype.iuRemoveRange = function(min, max) {
  130. const $this = this;
  131. do {
  132. const folded = caseFold(min);
  133. if (folded) {
  134. $this.remove(folded);
  135. }
  136. } while (++min <= max);
  137. return $this;
  138. };
  139. const update = (item, pattern) => {
  140. let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');
  141. switch (tree.type) {
  142. case 'characterClass':
  143. case 'group':
  144. case 'value':
  145. // No wrapping needed.
  146. break;
  147. default:
  148. // Wrap the pattern in a non-capturing group.
  149. tree = wrap(tree, pattern);
  150. }
  151. Object.assign(item, tree);
  152. };
  153. const wrap = (tree, pattern) => {
  154. // Wrap the pattern in a non-capturing group.
  155. return {
  156. 'type': 'group',
  157. 'behavior': 'ignore',
  158. 'body': [tree],
  159. 'raw': `(?:${ pattern })`
  160. };
  161. };
  162. const caseFold = (codePoint) => {
  163. return iuMappings.get(codePoint) || false;
  164. };
  165. const buildHandler = (action) => {
  166. switch (action) {
  167. case 'union':
  168. return {
  169. single: (data, cp) => {
  170. data.singleChars.add(cp);
  171. },
  172. regSet: (data, set2) => {
  173. data.singleChars.add(set2);
  174. },
  175. range: (data, start, end) => {
  176. data.singleChars.addRange(start, end);
  177. },
  178. iuRange: (data, start, end) => {
  179. data.singleChars.iuAddRange(start, end);
  180. },
  181. nested: (data, nestedData) => {
  182. data.singleChars.add(nestedData.singleChars);
  183. for (const str of nestedData.longStrings) data.longStrings.add(str);
  184. if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;
  185. }
  186. };
  187. case 'union-negative': {
  188. const regSet = (data, set2) => {
  189. data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);
  190. };
  191. return {
  192. single: (data, cp) => {
  193. const unicode = UNICODE_SET.clone();
  194. data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);
  195. },
  196. regSet: regSet,
  197. range: (data, start, end) => {
  198. data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);
  199. },
  200. iuRange: (data, start, end) => {
  201. data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end).add(data.singleChars);
  202. },
  203. nested: (data, nestedData) => {
  204. regSet(data, nestedData.singleChars);
  205. if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');
  206. }
  207. };
  208. }
  209. case 'intersection': {
  210. const regSet = (data, set2) => {
  211. if (data.first) data.singleChars = set2;
  212. else data.singleChars.intersection(set2);
  213. };
  214. return {
  215. single: (data, cp) => {
  216. data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate();
  217. data.longStrings.clear();
  218. data.maybeIncludesStrings = false;
  219. },
  220. regSet: (data, set) => {
  221. regSet(data, set);
  222. data.longStrings.clear();
  223. data.maybeIncludesStrings = false;
  224. },
  225. range: (data, start, end) => {
  226. if (data.first) data.singleChars.addRange(start, end);
  227. else data.singleChars.intersection(regenerate().addRange(start, end));
  228. data.longStrings.clear();
  229. data.maybeIncludesStrings = false;
  230. },
  231. iuRange: (data, start, end) => {
  232. if (data.first) data.singleChars.iuAddRange(start, end);
  233. else data.singleChars.intersection(regenerate().iuAddRange(start, end));
  234. data.longStrings.clear();
  235. data.maybeIncludesStrings = false;
  236. },
  237. nested: (data, nestedData) => {
  238. regSet(data, nestedData.singleChars);
  239. if (data.first) {
  240. data.longStrings = nestedData.longStrings;
  241. data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
  242. } else {
  243. for (const str of data.longStrings) {
  244. if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);
  245. }
  246. if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;
  247. }
  248. }
  249. };
  250. }
  251. case 'subtraction': {
  252. const regSet = (data, set2) => {
  253. if (data.first) data.singleChars.add(set2);
  254. else data.singleChars.remove(set2);
  255. };
  256. return {
  257. single: (data, cp) => {
  258. if (data.first) data.singleChars.add(cp);
  259. else data.singleChars.remove(cp);
  260. },
  261. regSet: regSet,
  262. range: (data, start, end) => {
  263. if (data.first) data.singleChars.addRange(start, end);
  264. else data.singleChars.removeRange(start, end);
  265. },
  266. iuRange: (data, start, end) => {
  267. if (data.first) data.singleChars.iuAddRange(start, end);
  268. else data.singleChars.iuRemoveRange(start, end);
  269. },
  270. nested: (data, nestedData) => {
  271. regSet(data, nestedData.singleChars);
  272. if (data.first) {
  273. data.longStrings = nestedData.longStrings;
  274. data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
  275. } else {
  276. for (const str of data.longStrings) {
  277. if (nestedData.longStrings.has(str)) data.longStrings.delete(str);
  278. }
  279. }
  280. }
  281. };
  282. }
  283. // The `default` clause is only here as a safeguard; it should never be
  284. // reached. Code coverage tools should ignore it.
  285. /* istanbul ignore next */
  286. default:
  287. throw new Error(`Unknown set action: ${ characterClassItem.kind }`);
  288. }
  289. };
  290. const getCharacterClassEmptyData = () => ({
  291. transformed: config.transform.unicodeFlag,
  292. singleChars: regenerate(),
  293. longStrings: new Set(),
  294. hasEmptyString: false,
  295. first: true,
  296. maybeIncludesStrings: false
  297. });
  298. const maybeFold = (codePoint) => {
  299. if (config.flags.ignoreCase && config.transform.unicodeFlag) {
  300. const folded = caseFold(codePoint);
  301. if (folded) {
  302. return [codePoint, folded];
  303. }
  304. }
  305. return [codePoint];
  306. };
  307. const computeClassStrings = (classStrings, regenerateOptions) => {
  308. let data = getCharacterClassEmptyData();
  309. for (const string of classStrings.strings) {
  310. if (string.characters.length === 1) {
  311. maybeFold(string.characters[0].codePoint).forEach((cp) => {
  312. data.singleChars.add(cp);
  313. });
  314. } else {
  315. let stringifiedString;
  316. if (config.flags.ignoreCase && config.transform.unicodeFlag) {
  317. stringifiedString = '';
  318. for (const ch of string.characters) {
  319. let set = regenerate(ch.codePoint);
  320. const folded = caseFold(ch.codePoint);
  321. if (folded) set.add(folded);
  322. stringifiedString += set.toString(regenerateOptions);
  323. }
  324. } else {
  325. stringifiedString = string.characters.map(ch => generate(ch)).join('')
  326. }
  327. data.longStrings.add(stringifiedString);
  328. data.maybeIncludesStrings = true;
  329. }
  330. }
  331. return data;
  332. }
  333. const computeCharacterClass = (characterClassItem, regenerateOptions) => {
  334. let data = getCharacterClassEmptyData();
  335. let handlePositive;
  336. let handleNegative;
  337. switch (characterClassItem.kind) {
  338. case 'union':
  339. handlePositive = buildHandler('union');
  340. handleNegative = buildHandler('union-negative');
  341. break;
  342. case 'intersection':
  343. handlePositive = buildHandler('intersection');
  344. handleNegative = buildHandler('subtraction');
  345. break;
  346. case 'subtraction':
  347. handlePositive = buildHandler('subtraction');
  348. handleNegative = buildHandler('intersection');
  349. break;
  350. // The `default` clause is only here as a safeguard; it should never be
  351. // reached. Code coverage tools should ignore it.
  352. /* istanbul ignore next */
  353. default:
  354. throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);
  355. }
  356. for (const item of characterClassItem.body) {
  357. switch (item.type) {
  358. case 'value':
  359. maybeFold(item.codePoint).forEach((cp) => {
  360. handlePositive.single(data, cp);
  361. });
  362. break;
  363. case 'characterClassRange':
  364. const min = item.min.codePoint;
  365. const max = item.max.codePoint;
  366. handlePositive.range(data, min, max);
  367. if (config.flags.ignoreCase && config.transform.unicodeFlag) {
  368. handlePositive.iuRange(data, min, max);
  369. }
  370. break;
  371. case 'characterClassEscape':
  372. handlePositive.regSet(data, getCharacterClassEscapeSet(
  373. item.value,
  374. config.flags.unicode,
  375. config.flags.ignoreCase
  376. ));
  377. break;
  378. case 'unicodePropertyEscape':
  379. const nestedData = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative);
  380. handlePositive.nested(data, nestedData);
  381. data.transformed =
  382. data.transformed ||
  383. config.transform.unicodePropertyEscapes ||
  384. (config.transform.unicodeSetsFlag && nestedData.maybeIncludesStrings);
  385. break;
  386. case 'characterClass':
  387. const handler = item.negative ? handleNegative : handlePositive;
  388. const res = computeCharacterClass(item, regenerateOptions);
  389. handler.nested(data, res);
  390. data.transformed = true;
  391. break;
  392. case 'classStrings':
  393. handlePositive.nested(data, computeClassStrings(item, regenerateOptions));
  394. data.transformed = true;
  395. break;
  396. // The `default` clause is only here as a safeguard; it should never be
  397. // reached. Code coverage tools should ignore it.
  398. /* istanbul ignore next */
  399. default:
  400. throw new Error(`Unknown term type: ${ item.type }`);
  401. }
  402. data.first = false;
  403. }
  404. if (characterClassItem.negative && data.maybeIncludesStrings) {
  405. throw new SyntaxError('Cannot negate set containing strings');
  406. }
  407. return data;
  408. }
  409. const processCharacterClass = (
  410. characterClassItem,
  411. regenerateOptions,
  412. computed = computeCharacterClass(characterClassItem, regenerateOptions)
  413. ) => {
  414. const negative = characterClassItem.negative;
  415. const { singleChars, transformed, longStrings } = computed;
  416. if (transformed) {
  417. const setStr = singleChars.toString(regenerateOptions);
  418. if (negative) {
  419. if (config.useUnicodeFlag) {
  420. update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)
  421. } else {
  422. update(characterClassItem, `(?!${setStr})[\\s\\S]`)
  423. }
  424. } else {
  425. const hasEmptyString = longStrings.has('');
  426. const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);
  427. if (setStr !== '[]' || longStrings.size === 0) {
  428. pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);
  429. }
  430. update(characterClassItem, pieces.join('|'));
  431. }
  432. }
  433. return characterClassItem;
  434. };
  435. const assertNoUnmatchedReferences = (groups) => {
  436. const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
  437. if (unmatchedReferencesNames.length > 0) {
  438. throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
  439. }
  440. };
  441. const processTerm = (item, regenerateOptions, groups) => {
  442. switch (item.type) {
  443. case 'dot':
  444. if (config.transform.unicodeFlag) {
  445. update(
  446. item,
  447. getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions)
  448. );
  449. } else if (config.transform.dotAllFlag) {
  450. // TODO: consider changing this at the regenerate level.
  451. update(item, '[\\s\\S]');
  452. }
  453. break;
  454. case 'characterClass':
  455. item = processCharacterClass(item, regenerateOptions);
  456. break;
  457. case 'unicodePropertyEscape':
  458. const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative);
  459. if (data.maybeIncludesStrings) {
  460. if (!config.flags.unicodeSets) {
  461. throw new Error(
  462. 'Properties of strings are only supported when using the unicodeSets (v) flag.'
  463. );
  464. }
  465. if (config.transform.unicodeSetsFlag) {
  466. data.transformed = true;
  467. item = processCharacterClass(item, regenerateOptions, data);
  468. }
  469. } else if (config.transform.unicodePropertyEscapes) {
  470. update(
  471. item,
  472. data.singleChars.toString(regenerateOptions)
  473. );
  474. }
  475. break;
  476. case 'characterClassEscape':
  477. if (config.transform.unicodeFlag) {
  478. update(
  479. item,
  480. getCharacterClassEscapeSet(
  481. item.value,
  482. /* config.transform.unicodeFlag implies config.flags.unicode */ true,
  483. config.flags.ignoreCase
  484. ).toString(regenerateOptions)
  485. );
  486. }
  487. break;
  488. case 'group':
  489. if (item.behavior == 'normal') {
  490. groups.lastIndex++;
  491. }
  492. if (item.name && config.transform.namedGroups) {
  493. const name = item.name.value;
  494. if (groups.namesConflicts[name]) {
  495. throw new Error(
  496. `Group '${ name }' has already been defined in this context.`
  497. );
  498. }
  499. groups.namesConflicts[name] = true;
  500. const index = groups.lastIndex;
  501. delete item.name;
  502. if (!groups.names[name]) {
  503. groups.names[name] = [];
  504. }
  505. groups.names[name].push(index);
  506. if (groups.onNamedGroup) {
  507. groups.onNamedGroup.call(null, name, index);
  508. }
  509. if (groups.unmatchedReferences[name]) {
  510. delete groups.unmatchedReferences[name];
  511. }
  512. }
  513. /* falls through */
  514. case 'quantifier':
  515. item.body = item.body.map(term => {
  516. return processTerm(term, regenerateOptions, groups);
  517. });
  518. break;
  519. case 'disjunction':
  520. const outerNamesConflicts = groups.namesConflicts;
  521. item.body = item.body.map(term => {
  522. groups.namesConflicts = Object.create(outerNamesConflicts);
  523. return processTerm(term, regenerateOptions, groups);
  524. });
  525. break;
  526. case 'alternative':
  527. item.body = flatMap(item.body, term => {
  528. const res = processTerm(term, regenerateOptions, groups);
  529. // Alternatives cannot contain alternatives; flatten them.
  530. return res.type === 'alternative' ? res.body : res;
  531. });
  532. break;
  533. case 'value':
  534. const codePoint = item.codePoint;
  535. const set = regenerate(codePoint);
  536. if (config.flags.ignoreCase && config.transform.unicodeFlag) {
  537. const folded = caseFold(codePoint);
  538. if (folded) {
  539. set.add(folded);
  540. }
  541. }
  542. update(item, set.toString(regenerateOptions));
  543. break;
  544. case 'reference':
  545. if (item.name) {
  546. const name = item.name.value;
  547. const indexes = groups.names[name];
  548. if (indexes) {
  549. const body = indexes.map(index => ({
  550. 'type': 'reference',
  551. 'matchIndex': index,
  552. 'raw': '\\' + index,
  553. }));
  554. if (body.length === 1) {
  555. return body[0];
  556. }
  557. return {
  558. 'type': 'alternative',
  559. 'body': body,
  560. 'raw': body.map(term => term.raw).join(''),
  561. };
  562. }
  563. // This named reference comes before the group where it’s defined,
  564. // so it’s always an empty match.
  565. groups.unmatchedReferences[name] = true;
  566. return {
  567. 'type': 'group',
  568. 'behavior': 'ignore',
  569. 'body': [],
  570. 'raw': '(?:)',
  571. };
  572. }
  573. break;
  574. case 'anchor':
  575. case 'empty':
  576. case 'group':
  577. // Nothing to do here.
  578. break;
  579. // The `default` clause is only here as a safeguard; it should never be
  580. // reached. Code coverage tools should ignore it.
  581. /* istanbul ignore next */
  582. default:
  583. throw new Error(`Unknown term type: ${ item.type }`);
  584. }
  585. return item;
  586. };
  587. const config = {
  588. 'flags': {
  589. 'ignoreCase': false,
  590. 'unicode': false,
  591. 'unicodeSets': false,
  592. 'dotAll': false,
  593. },
  594. 'transform': {
  595. 'dotAllFlag': false,
  596. 'unicodeFlag': false,
  597. 'unicodeSetsFlag': false,
  598. 'unicodePropertyEscapes': false,
  599. 'namedGroups': false,
  600. },
  601. get useUnicodeFlag() {
  602. return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag;
  603. }
  604. };
  605. const validateOptions = (options) => {
  606. if (!options) return;
  607. for (const key of Object.keys(options)) {
  608. const value = options[key];
  609. switch (key) {
  610. case 'dotAllFlag':
  611. case 'unicodeFlag':
  612. case 'unicodePropertyEscapes':
  613. case 'namedGroups':
  614. if (value != null && value !== false && value !== 'transform') {
  615. throw new Error(`.${key} must be false (default) or 'transform'.`);
  616. }
  617. break;
  618. case 'unicodeSetsFlag':
  619. if (value != null && value !== false && value !== 'parse' && value !== 'transform') {
  620. throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);
  621. }
  622. break;
  623. case 'onNamedGroup':
  624. if (value != null && typeof value !== 'function') {
  625. throw new Error('.onNamedGroup must be a function.');
  626. }
  627. break;
  628. default:
  629. throw new Error(`.${key} is not a valid regexpu-core option.`);
  630. }
  631. }
  632. };
  633. const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
  634. const transform = (options, name) => options ? options[name] === 'transform' : false;
  635. const rewritePattern = (pattern, flags, options) => {
  636. validateOptions(options);
  637. config.flags.unicode = hasFlag(flags, 'u');
  638. config.flags.unicodeSets = hasFlag(flags, 'v');
  639. config.flags.ignoreCase = hasFlag(flags, 'i');
  640. config.flags.dotAll = hasFlag(flags, 's');
  641. config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
  642. config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag');
  643. config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');
  644. // unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
  645. config.transform.unicodePropertyEscapes = config.flags.unicode && (
  646. transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes')
  647. );
  648. config.transform.namedGroups = transform(options, 'namedGroups');
  649. const regjsparserFeatures = {
  650. 'unicodeSet': Boolean(options && options.unicodeSetsFlag),
  651. // Enable every stable RegExp feature by default
  652. 'unicodePropertyEscape': true,
  653. 'namedGroups': true,
  654. 'lookbehind': true,
  655. };
  656. const regenerateOptions = {
  657. 'hasUnicodeFlag': config.useUnicodeFlag,
  658. 'bmpOnly': !config.flags.unicode
  659. };
  660. const groups = {
  661. 'onNamedGroup': options && options.onNamedGroup,
  662. 'lastIndex': 0,
  663. 'names': Object.create(null), // { [name]: Array<index> }
  664. 'namesConflicts': Object.create(null), // { [name]: true }
  665. 'unmatchedReferences': Object.create(null) // { [name]: true }
  666. };
  667. const tree = parse(pattern, flags, regjsparserFeatures);
  668. // Note: `processTerm` mutates `tree` and `groups`.
  669. processTerm(tree, regenerateOptions, groups);
  670. assertNoUnmatchedReferences(groups);
  671. return generate(tree);
  672. };
  673. module.exports = rewritePattern;