regjsgen.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /*!
  2. * regjsgen 0.5.2
  3. * Copyright 2014-2020 Benjamin Tan <https://ofcr.se/>
  4. * Available under the MIT license <https://github.com/bnjmnt4n/regjsgen/blob/master/LICENSE-MIT.txt>
  5. */
  6. ;(function() {
  7. 'use strict';
  8. // Used to determine if values are of the language type `Object`.
  9. var objectTypes = {
  10. 'function': true,
  11. 'object': true
  12. };
  13. // Used as a reference to the global object.
  14. var root = (objectTypes[typeof window] && window) || this;
  15. // Detect free variable `exports`.
  16. var freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports;
  17. // Detect free variable `module`.
  18. var hasFreeModule = objectTypes[typeof module] && module && !module.nodeType;
  19. // Detect free variable `global` from Node.js or Browserified code and use it as `root`.
  20. var freeGlobal = freeExports && hasFreeModule && typeof global == 'object' && global;
  21. if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
  22. root = freeGlobal;
  23. }
  24. // Used to check objects for own properties.
  25. var hasOwnProperty = Object.prototype.hasOwnProperty;
  26. /*--------------------------------------------------------------------------*/
  27. // Generates a string based on the given code point.
  28. // Based on https://mths.be/fromcodepoint by @mathias.
  29. function fromCodePoint() {
  30. var codePoint = Number(arguments[0]);
  31. if (
  32. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  33. codePoint < 0 || // not a valid Unicode code point
  34. codePoint > 0x10FFFF || // not a valid Unicode code point
  35. Math.floor(codePoint) != codePoint // not an integer
  36. ) {
  37. throw RangeError('Invalid code point: ' + codePoint);
  38. }
  39. if (codePoint <= 0xFFFF) {
  40. // BMP code point
  41. return String.fromCharCode(codePoint);
  42. } else {
  43. // Astral code point; split in surrogate halves
  44. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  45. codePoint -= 0x10000;
  46. var highSurrogate = (codePoint >> 10) + 0xD800;
  47. var lowSurrogate = (codePoint % 0x400) + 0xDC00;
  48. return String.fromCharCode(highSurrogate, lowSurrogate);
  49. }
  50. }
  51. /*--------------------------------------------------------------------------*/
  52. // Ensures that nodes have the correct types.
  53. var assertTypeRegexMap = {};
  54. function assertType(type, expected) {
  55. if (expected.indexOf('|') == -1) {
  56. if (type == expected) {
  57. return;
  58. }
  59. throw Error('Invalid node type: ' + type + '; expected type: ' + expected);
  60. }
  61. expected = hasOwnProperty.call(assertTypeRegexMap, expected)
  62. ? assertTypeRegexMap[expected]
  63. : (assertTypeRegexMap[expected] = RegExp('^(?:' + expected + ')$'));
  64. if (expected.test(type)) {
  65. return;
  66. }
  67. throw Error('Invalid node type: ' + type + '; expected types: ' + expected);
  68. }
  69. /*--------------------------------------------------------------------------*/
  70. // Generates a regular expression string based on an AST.
  71. function generate(node) {
  72. var type = node.type;
  73. if (hasOwnProperty.call(generators, type)) {
  74. return generators[type](node);
  75. }
  76. throw Error('Invalid node type: ' + type);
  77. }
  78. // Constructs a string by concatentating the output of each term.
  79. function generateSequence(generator, terms, /* optional */ separator) {
  80. var i = -1,
  81. length = terms.length,
  82. result = '',
  83. term;
  84. while (++i < length) {
  85. term = terms[i];
  86. if (separator && i > 0) result += separator;
  87. // Ensure that `\0` null escapes followed by number symbols are not
  88. // treated as backreferences.
  89. if (
  90. i + 1 < length &&
  91. terms[i].type == 'value' &&
  92. terms[i].kind == 'null' &&
  93. terms[i + 1].type == 'value' &&
  94. terms[i + 1].kind == 'symbol' &&
  95. terms[i + 1].codePoint >= 48 &&
  96. terms[i + 1].codePoint <= 57
  97. ) {
  98. result += '\\000';
  99. continue;
  100. }
  101. result += generator(term);
  102. }
  103. return result;
  104. }
  105. /*--------------------------------------------------------------------------*/
  106. function generateAlternative(node) {
  107. assertType(node.type, 'alternative');
  108. return generateSequence(generateTerm, node.body);
  109. }
  110. function generateAnchor(node) {
  111. assertType(node.type, 'anchor');
  112. switch (node.kind) {
  113. case 'start':
  114. return '^';
  115. case 'end':
  116. return '$';
  117. case 'boundary':
  118. return '\\b';
  119. case 'not-boundary':
  120. return '\\B';
  121. default:
  122. throw Error('Invalid assertion');
  123. }
  124. }
  125. function generateAtom(node) {
  126. assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
  127. return generate(node);
  128. }
  129. function generateCharacterClass(node) {
  130. assertType(node.type, 'characterClass');
  131. var kind = node.kind;
  132. var separator = kind === 'intersection' ? '&&' : kind === 'subtraction' ? '--' : '';
  133. return '[' +
  134. (node.negative ? '^' : '') +
  135. generateSequence(generateClassAtom, node.body, separator) +
  136. ']';
  137. }
  138. function generateCharacterClassEscape(node) {
  139. assertType(node.type, 'characterClassEscape');
  140. return '\\' + node.value;
  141. }
  142. function generateCharacterClassRange(node) {
  143. assertType(node.type, 'characterClassRange');
  144. var min = node.min,
  145. max = node.max;
  146. if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
  147. throw Error('Invalid character class range');
  148. }
  149. return generateClassAtom(min) + '-' + generateClassAtom(max);
  150. }
  151. function generateClassAtom(node) {
  152. assertType(node.type, 'anchor|characterClass|characterClassEscape|characterClassRange|dot|value|unicodePropertyEscape|classStrings');
  153. return generate(node);
  154. }
  155. function generateClassStrings(node) {
  156. assertType(node.type, 'classStrings');
  157. return '(' + generateSequence(generateClassString, node.strings, '|') + ')';
  158. }
  159. function generateClassString(node) {
  160. assertType(node.type, 'classString');
  161. return generateSequence(generate, node.characters);
  162. }
  163. function generateDisjunction(node) {
  164. assertType(node.type, 'disjunction');
  165. return generateSequence(generate, node.body, '|');
  166. }
  167. function generateDot(node) {
  168. assertType(node.type, 'dot');
  169. return '.';
  170. }
  171. function generateGroup(node) {
  172. assertType(node.type, 'group');
  173. var result = '';
  174. switch (node.behavior) {
  175. case 'normal':
  176. if (node.name) {
  177. result += '?<' + generateIdentifier(node.name) + '>';
  178. }
  179. break;
  180. case 'ignore':
  181. result += '?:';
  182. break;
  183. case 'lookahead':
  184. result += '?=';
  185. break;
  186. case 'negativeLookahead':
  187. result += '?!';
  188. break;
  189. case 'lookbehind':
  190. result += '?<=';
  191. break;
  192. case 'negativeLookbehind':
  193. result += '?<!';
  194. break;
  195. default:
  196. throw Error('Invalid behaviour: ' + node.behaviour);
  197. }
  198. result += generateSequence(generate, node.body);
  199. return '(' + result + ')';
  200. }
  201. function generateIdentifier(node) {
  202. assertType(node.type, 'identifier');
  203. return node.value;
  204. }
  205. function generateQuantifier(node) {
  206. assertType(node.type, 'quantifier');
  207. var quantifier = '',
  208. min = node.min,
  209. max = node.max;
  210. if (max == null) {
  211. if (min == 0) {
  212. quantifier = '*';
  213. } else if (min == 1) {
  214. quantifier = '+';
  215. } else {
  216. quantifier = '{' + min + ',}';
  217. }
  218. } else if (min == max) {
  219. quantifier = '{' + min + '}';
  220. } else if (min == 0 && max == 1) {
  221. quantifier = '?';
  222. } else {
  223. quantifier = '{' + min + ',' + max + '}';
  224. }
  225. if (!node.greedy) {
  226. quantifier += '?';
  227. }
  228. return generateAtom(node.body[0]) + quantifier;
  229. }
  230. function generateReference(node) {
  231. assertType(node.type, 'reference');
  232. if (node.matchIndex) {
  233. return '\\' + node.matchIndex;
  234. }
  235. if (node.name) {
  236. return '\\k<' + generateIdentifier(node.name) + '>';
  237. }
  238. throw new Error('Unknown reference type');
  239. }
  240. function generateTerm(node) {
  241. assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|unicodePropertyEscape|value|dot');
  242. return generate(node);
  243. }
  244. function generateUnicodePropertyEscape(node) {
  245. assertType(node.type, 'unicodePropertyEscape');
  246. return '\\' + (node.negative ? 'P' : 'p') + '{' + node.value + '}';
  247. }
  248. function generateValue(node) {
  249. assertType(node.type, 'value');
  250. var kind = node.kind,
  251. codePoint = node.codePoint;
  252. if (typeof codePoint != 'number') {
  253. throw new Error('Invalid code point: ' + codePoint);
  254. }
  255. switch (kind) {
  256. case 'controlLetter':
  257. return '\\c' + fromCodePoint(codePoint + 64);
  258. case 'hexadecimalEscape':
  259. return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
  260. case 'identifier':
  261. return '\\' + fromCodePoint(codePoint);
  262. case 'null':
  263. return '\\' + codePoint;
  264. case 'octal':
  265. return '\\' + ('000' + codePoint.toString(8)).slice(-3);
  266. case 'singleEscape':
  267. switch (codePoint) {
  268. case 0x0008:
  269. return '\\b';
  270. case 0x0009:
  271. return '\\t';
  272. case 0x000A:
  273. return '\\n';
  274. case 0x000B:
  275. return '\\v';
  276. case 0x000C:
  277. return '\\f';
  278. case 0x000D:
  279. return '\\r';
  280. case 0x002D:
  281. return '\\-';
  282. default:
  283. throw Error('Invalid code point: ' + codePoint);
  284. }
  285. case 'symbol':
  286. return fromCodePoint(codePoint);
  287. case 'unicodeEscape':
  288. return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
  289. case 'unicodeCodePointEscape':
  290. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  291. default:
  292. throw Error('Unsupported node kind: ' + kind);
  293. }
  294. }
  295. /*--------------------------------------------------------------------------*/
  296. // Used to generate strings for each node type.
  297. var generators = {
  298. 'alternative': generateAlternative,
  299. 'anchor': generateAnchor,
  300. 'characterClass': generateCharacterClass,
  301. 'characterClassEscape': generateCharacterClassEscape,
  302. 'characterClassRange': generateCharacterClassRange,
  303. 'classStrings': generateClassStrings,
  304. 'disjunction': generateDisjunction,
  305. 'dot': generateDot,
  306. 'group': generateGroup,
  307. 'quantifier': generateQuantifier,
  308. 'reference': generateReference,
  309. 'unicodePropertyEscape': generateUnicodePropertyEscape,
  310. 'value': generateValue
  311. };
  312. /*--------------------------------------------------------------------------*/
  313. // Export regjsgen.
  314. var regjsgen = {
  315. 'generate': generate
  316. };
  317. // Some AMD build optimizers, like r.js, check for condition patterns like the following:
  318. if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
  319. // Define as an anonymous module so it can be aliased through path mapping.
  320. define(function() {
  321. return regjsgen;
  322. });
  323. root.regjsgen = regjsgen;
  324. }
  325. // Check for `exports` after `define` in case a build optimizer adds an `exports` object.
  326. else if (freeExports && hasFreeModule) {
  327. // Export for CommonJS support.
  328. freeExports.generate = generate;
  329. }
  330. else {
  331. // Export to the global object.
  332. root.regjsgen = regjsgen;
  333. }
  334. }.call(this));