no-misleading-character-class.js 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /**
  2. * @author Toru Nagashima <https://github.com/mysticatea>
  3. */
  4. "use strict";
  5. const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
  6. const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp");
  7. const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
  8. const astUtils = require("./utils/ast-utils.js");
  9. //------------------------------------------------------------------------------
  10. // Helpers
  11. //------------------------------------------------------------------------------
  12. const REGEXPP_LATEST_ECMA_VERSION = 2022;
  13. /**
  14. * Iterate character sequences of a given nodes.
  15. *
  16. * CharacterClassRange syntax can steal a part of character sequence,
  17. * so this function reverts CharacterClassRange syntax and restore the sequence.
  18. * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
  19. * @returns {IterableIterator<number[]>} The list of character sequences.
  20. */
  21. function *iterateCharacterSequence(nodes) {
  22. let seq = [];
  23. for (const node of nodes) {
  24. switch (node.type) {
  25. case "Character":
  26. seq.push(node.value);
  27. break;
  28. case "CharacterClassRange":
  29. seq.push(node.min.value);
  30. yield seq;
  31. seq = [node.max.value];
  32. break;
  33. case "CharacterSet":
  34. if (seq.length > 0) {
  35. yield seq;
  36. seq = [];
  37. }
  38. break;
  39. // no default
  40. }
  41. }
  42. if (seq.length > 0) {
  43. yield seq;
  44. }
  45. }
  46. const hasCharacterSequence = {
  47. surrogatePairWithoutUFlag(chars) {
  48. return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
  49. },
  50. combiningClass(chars) {
  51. return chars.some((c, i) => (
  52. i !== 0 &&
  53. isCombiningCharacter(c) &&
  54. !isCombiningCharacter(chars[i - 1])
  55. ));
  56. },
  57. emojiModifier(chars) {
  58. return chars.some((c, i) => (
  59. i !== 0 &&
  60. isEmojiModifier(c) &&
  61. !isEmojiModifier(chars[i - 1])
  62. ));
  63. },
  64. regionalIndicatorSymbol(chars) {
  65. return chars.some((c, i) => (
  66. i !== 0 &&
  67. isRegionalIndicatorSymbol(c) &&
  68. isRegionalIndicatorSymbol(chars[i - 1])
  69. ));
  70. },
  71. zwj(chars) {
  72. const lastIndex = chars.length - 1;
  73. return chars.some((c, i) => (
  74. i !== 0 &&
  75. i !== lastIndex &&
  76. c === 0x200d &&
  77. chars[i - 1] !== 0x200d &&
  78. chars[i + 1] !== 0x200d
  79. ));
  80. }
  81. };
  82. const kinds = Object.keys(hasCharacterSequence);
  83. //------------------------------------------------------------------------------
  84. // Rule Definition
  85. //------------------------------------------------------------------------------
  86. /** @type {import('../shared/types').Rule} */
  87. module.exports = {
  88. meta: {
  89. type: "problem",
  90. docs: {
  91. description: "Disallow characters which are made with multiple code points in character class syntax",
  92. recommended: true,
  93. url: "https://eslint.org/docs/rules/no-misleading-character-class"
  94. },
  95. hasSuggestions: true,
  96. schema: [],
  97. messages: {
  98. surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
  99. combiningClass: "Unexpected combined character in character class.",
  100. emojiModifier: "Unexpected modified Emoji in character class.",
  101. regionalIndicatorSymbol: "Unexpected national flag in character class.",
  102. zwj: "Unexpected joined character sequence in character class.",
  103. suggestUnicodeFlag: "Add unicode 'u' flag to regex."
  104. }
  105. },
  106. create(context) {
  107. const sourceCode = context.getSourceCode();
  108. const parser = new RegExpParser();
  109. /**
  110. * Verify a given regular expression.
  111. * @param {Node} node The node to report.
  112. * @param {string} pattern The regular expression pattern to verify.
  113. * @param {string} flags The flags of the regular expression.
  114. * @param {Function} unicodeFixer Fixer for missing "u" flag.
  115. * @returns {void}
  116. */
  117. function verify(node, pattern, flags, unicodeFixer) {
  118. let patternNode;
  119. try {
  120. patternNode = parser.parsePattern(
  121. pattern,
  122. 0,
  123. pattern.length,
  124. flags.includes("u")
  125. );
  126. } catch {
  127. // Ignore regular expressions with syntax errors
  128. return;
  129. }
  130. const foundKinds = new Set();
  131. visitRegExpAST(patternNode, {
  132. onCharacterClassEnter(ccNode) {
  133. for (const chars of iterateCharacterSequence(ccNode.elements)) {
  134. for (const kind of kinds) {
  135. if (hasCharacterSequence[kind](chars)) {
  136. foundKinds.add(kind);
  137. }
  138. }
  139. }
  140. }
  141. });
  142. for (const kind of foundKinds) {
  143. let suggest;
  144. if (kind === "surrogatePairWithoutUFlag") {
  145. suggest = [{
  146. messageId: "suggestUnicodeFlag",
  147. fix: unicodeFixer
  148. }];
  149. }
  150. context.report({
  151. node,
  152. messageId: kind,
  153. suggest
  154. });
  155. }
  156. }
  157. /**
  158. * Checks if the given regular expression pattern would be valid with the `u` flag.
  159. * @param {string} pattern The regular expression pattern to verify.
  160. * @returns {boolean} `true` if the pattern would be valid with the `u` flag.
  161. * `false` if the pattern would be invalid with the `u` flag or the configured
  162. * ecmaVersion doesn't support the `u` flag.
  163. */
  164. function isValidWithUnicodeFlag(pattern) {
  165. const { ecmaVersion } = context.languageOptions;
  166. // ecmaVersion <= 5 doesn't support the 'u' flag
  167. if (ecmaVersion <= 5) {
  168. return false;
  169. }
  170. const validator = new RegExpValidator({
  171. ecmaVersion: Math.min(ecmaVersion, REGEXPP_LATEST_ECMA_VERSION)
  172. });
  173. try {
  174. validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true);
  175. } catch {
  176. return false;
  177. }
  178. return true;
  179. }
  180. return {
  181. "Literal[regex]"(node) {
  182. verify(node, node.regex.pattern, node.regex.flags, fixer => {
  183. if (!isValidWithUnicodeFlag(node.regex.pattern)) {
  184. return null;
  185. }
  186. return fixer.insertTextAfter(node, "u");
  187. });
  188. },
  189. "Program"() {
  190. const scope = context.getScope();
  191. const tracker = new ReferenceTracker(scope);
  192. /*
  193. * Iterate calls of RegExp.
  194. * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
  195. * `const {RegExp: a} = window; new a()`, etc...
  196. */
  197. for (const { node } of tracker.iterateGlobalReferences({
  198. RegExp: { [CALL]: true, [CONSTRUCT]: true }
  199. })) {
  200. const [patternNode, flagsNode] = node.arguments;
  201. const pattern = getStringIfConstant(patternNode, scope);
  202. const flags = getStringIfConstant(flagsNode, scope);
  203. if (typeof pattern === "string") {
  204. verify(node, pattern, flags || "", fixer => {
  205. if (!isValidWithUnicodeFlag(pattern)) {
  206. return null;
  207. }
  208. if (node.arguments.length === 1) {
  209. const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis
  210. return fixer.insertTextAfter(
  211. penultimateToken,
  212. astUtils.isCommaToken(penultimateToken)
  213. ? ' "u",'
  214. : ', "u"'
  215. );
  216. }
  217. if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
  218. const range = [flagsNode.range[0], flagsNode.range[1] - 1];
  219. return fixer.insertTextAfterRange(range, "u");
  220. }
  221. return null;
  222. });
  223. }
  224. }
  225. }
  226. };
  227. }
  228. };