parse.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. import { SelectorType, AttributeAction, } from "./types";
  2. const reName = /^[^\\#]?(?:\\(?:[\da-f]{1,6}\s?|.)|[\w\-\u00b0-\uFFFF])+/;
  3. const reEscape = /\\([\da-f]{1,6}\s?|(\s)|.)/gi;
  4. const actionTypes = new Map([
  5. [126 /* Tilde */, AttributeAction.Element],
  6. [94 /* Circumflex */, AttributeAction.Start],
  7. [36 /* Dollar */, AttributeAction.End],
  8. [42 /* Asterisk */, AttributeAction.Any],
  9. [33 /* ExclamationMark */, AttributeAction.Not],
  10. [124 /* Pipe */, AttributeAction.Hyphen],
  11. ]);
  12. // Pseudos, whose data property is parsed as well.
  13. const unpackPseudos = new Set([
  14. "has",
  15. "not",
  16. "matches",
  17. "is",
  18. "where",
  19. "host",
  20. "host-context",
  21. ]);
  22. /**
  23. * Checks whether a specific selector is a traversal.
  24. * This is useful eg. in swapping the order of elements that
  25. * are not traversals.
  26. *
  27. * @param selector Selector to check.
  28. */
  29. export function isTraversal(selector) {
  30. switch (selector.type) {
  31. case SelectorType.Adjacent:
  32. case SelectorType.Child:
  33. case SelectorType.Descendant:
  34. case SelectorType.Parent:
  35. case SelectorType.Sibling:
  36. case SelectorType.ColumnCombinator:
  37. return true;
  38. default:
  39. return false;
  40. }
  41. }
  42. const stripQuotesFromPseudos = new Set(["contains", "icontains"]);
  43. // Unescape function taken from https://github.com/jquery/sizzle/blob/master/src/sizzle.js#L152
  44. function funescape(_, escaped, escapedWhitespace) {
  45. const high = parseInt(escaped, 16) - 0x10000;
  46. // NaN means non-codepoint
  47. return high !== high || escapedWhitespace
  48. ? escaped
  49. : high < 0
  50. ? // BMP codepoint
  51. String.fromCharCode(high + 0x10000)
  52. : // Supplemental Plane codepoint (surrogate pair)
  53. String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00);
  54. }
  55. function unescapeCSS(str) {
  56. return str.replace(reEscape, funescape);
  57. }
  58. function isQuote(c) {
  59. return c === 39 /* SingleQuote */ || c === 34 /* DoubleQuote */;
  60. }
  61. function isWhitespace(c) {
  62. return (c === 32 /* Space */ ||
  63. c === 9 /* Tab */ ||
  64. c === 10 /* NewLine */ ||
  65. c === 12 /* FormFeed */ ||
  66. c === 13 /* CarriageReturn */);
  67. }
  68. /**
  69. * Parses `selector`, optionally with the passed `options`.
  70. *
  71. * @param selector Selector to parse.
  72. * @param options Options for parsing.
  73. * @returns Returns a two-dimensional array.
  74. * The first dimension represents selectors separated by commas (eg. `sub1, sub2`),
  75. * the second contains the relevant tokens for that selector.
  76. */
  77. export function parse(selector) {
  78. const subselects = [];
  79. const endIndex = parseSelector(subselects, `${selector}`, 0);
  80. if (endIndex < selector.length) {
  81. throw new Error(`Unmatched selector: ${selector.slice(endIndex)}`);
  82. }
  83. return subselects;
  84. }
  85. function parseSelector(subselects, selector, selectorIndex) {
  86. let tokens = [];
  87. function getName(offset) {
  88. const match = selector.slice(selectorIndex + offset).match(reName);
  89. if (!match) {
  90. throw new Error(`Expected name, found ${selector.slice(selectorIndex)}`);
  91. }
  92. const [name] = match;
  93. selectorIndex += offset + name.length;
  94. return unescapeCSS(name);
  95. }
  96. function stripWhitespace(offset) {
  97. selectorIndex += offset;
  98. while (selectorIndex < selector.length &&
  99. isWhitespace(selector.charCodeAt(selectorIndex))) {
  100. selectorIndex++;
  101. }
  102. }
  103. function readValueWithParenthesis() {
  104. selectorIndex += 1;
  105. const start = selectorIndex;
  106. let counter = 1;
  107. for (; counter > 0 && selectorIndex < selector.length; selectorIndex++) {
  108. if (selector.charCodeAt(selectorIndex) ===
  109. 40 /* LeftParenthesis */ &&
  110. !isEscaped(selectorIndex)) {
  111. counter++;
  112. }
  113. else if (selector.charCodeAt(selectorIndex) ===
  114. 41 /* RightParenthesis */ &&
  115. !isEscaped(selectorIndex)) {
  116. counter--;
  117. }
  118. }
  119. if (counter) {
  120. throw new Error("Parenthesis not matched");
  121. }
  122. return unescapeCSS(selector.slice(start, selectorIndex - 1));
  123. }
  124. function isEscaped(pos) {
  125. let slashCount = 0;
  126. while (selector.charCodeAt(--pos) === 92 /* BackSlash */)
  127. slashCount++;
  128. return (slashCount & 1) === 1;
  129. }
  130. function ensureNotTraversal() {
  131. if (tokens.length > 0 && isTraversal(tokens[tokens.length - 1])) {
  132. throw new Error("Did not expect successive traversals.");
  133. }
  134. }
  135. function addTraversal(type) {
  136. if (tokens.length > 0 &&
  137. tokens[tokens.length - 1].type === SelectorType.Descendant) {
  138. tokens[tokens.length - 1].type = type;
  139. return;
  140. }
  141. ensureNotTraversal();
  142. tokens.push({ type });
  143. }
  144. function addSpecialAttribute(name, action) {
  145. tokens.push({
  146. type: SelectorType.Attribute,
  147. name,
  148. action,
  149. value: getName(1),
  150. namespace: null,
  151. ignoreCase: "quirks",
  152. });
  153. }
  154. /**
  155. * We have finished parsing the current part of the selector.
  156. *
  157. * Remove descendant tokens at the end if they exist,
  158. * and return the last index, so that parsing can be
  159. * picked up from here.
  160. */
  161. function finalizeSubselector() {
  162. if (tokens.length &&
  163. tokens[tokens.length - 1].type === SelectorType.Descendant) {
  164. tokens.pop();
  165. }
  166. if (tokens.length === 0) {
  167. throw new Error("Empty sub-selector");
  168. }
  169. subselects.push(tokens);
  170. }
  171. stripWhitespace(0);
  172. if (selector.length === selectorIndex) {
  173. return selectorIndex;
  174. }
  175. loop: while (selectorIndex < selector.length) {
  176. const firstChar = selector.charCodeAt(selectorIndex);
  177. switch (firstChar) {
  178. // Whitespace
  179. case 32 /* Space */:
  180. case 9 /* Tab */:
  181. case 10 /* NewLine */:
  182. case 12 /* FormFeed */:
  183. case 13 /* CarriageReturn */: {
  184. if (tokens.length === 0 ||
  185. tokens[0].type !== SelectorType.Descendant) {
  186. ensureNotTraversal();
  187. tokens.push({ type: SelectorType.Descendant });
  188. }
  189. stripWhitespace(1);
  190. break;
  191. }
  192. // Traversals
  193. case 62 /* GreaterThan */: {
  194. addTraversal(SelectorType.Child);
  195. stripWhitespace(1);
  196. break;
  197. }
  198. case 60 /* LessThan */: {
  199. addTraversal(SelectorType.Parent);
  200. stripWhitespace(1);
  201. break;
  202. }
  203. case 126 /* Tilde */: {
  204. addTraversal(SelectorType.Sibling);
  205. stripWhitespace(1);
  206. break;
  207. }
  208. case 43 /* Plus */: {
  209. addTraversal(SelectorType.Adjacent);
  210. stripWhitespace(1);
  211. break;
  212. }
  213. // Special attribute selectors: .class, #id
  214. case 46 /* Period */: {
  215. addSpecialAttribute("class", AttributeAction.Element);
  216. break;
  217. }
  218. case 35 /* Hash */: {
  219. addSpecialAttribute("id", AttributeAction.Equals);
  220. break;
  221. }
  222. case 91 /* LeftSquareBracket */: {
  223. stripWhitespace(1);
  224. // Determine attribute name and namespace
  225. let name;
  226. let namespace = null;
  227. if (selector.charCodeAt(selectorIndex) === 124 /* Pipe */) {
  228. // Equivalent to no namespace
  229. name = getName(1);
  230. }
  231. else if (selector.startsWith("*|", selectorIndex)) {
  232. namespace = "*";
  233. name = getName(2);
  234. }
  235. else {
  236. name = getName(0);
  237. if (selector.charCodeAt(selectorIndex) === 124 /* Pipe */ &&
  238. selector.charCodeAt(selectorIndex + 1) !==
  239. 61 /* Equal */) {
  240. namespace = name;
  241. name = getName(1);
  242. }
  243. }
  244. stripWhitespace(0);
  245. // Determine comparison operation
  246. let action = AttributeAction.Exists;
  247. const possibleAction = actionTypes.get(selector.charCodeAt(selectorIndex));
  248. if (possibleAction) {
  249. action = possibleAction;
  250. if (selector.charCodeAt(selectorIndex + 1) !==
  251. 61 /* Equal */) {
  252. throw new Error("Expected `=`");
  253. }
  254. stripWhitespace(2);
  255. }
  256. else if (selector.charCodeAt(selectorIndex) === 61 /* Equal */) {
  257. action = AttributeAction.Equals;
  258. stripWhitespace(1);
  259. }
  260. // Determine value
  261. let value = "";
  262. let ignoreCase = null;
  263. if (action !== "exists") {
  264. if (isQuote(selector.charCodeAt(selectorIndex))) {
  265. const quote = selector.charCodeAt(selectorIndex);
  266. let sectionEnd = selectorIndex + 1;
  267. while (sectionEnd < selector.length &&
  268. (selector.charCodeAt(sectionEnd) !== quote ||
  269. isEscaped(sectionEnd))) {
  270. sectionEnd += 1;
  271. }
  272. if (selector.charCodeAt(sectionEnd) !== quote) {
  273. throw new Error("Attribute value didn't end");
  274. }
  275. value = unescapeCSS(selector.slice(selectorIndex + 1, sectionEnd));
  276. selectorIndex = sectionEnd + 1;
  277. }
  278. else {
  279. const valueStart = selectorIndex;
  280. while (selectorIndex < selector.length &&
  281. ((!isWhitespace(selector.charCodeAt(selectorIndex)) &&
  282. selector.charCodeAt(selectorIndex) !==
  283. 93 /* RightSquareBracket */) ||
  284. isEscaped(selectorIndex))) {
  285. selectorIndex += 1;
  286. }
  287. value = unescapeCSS(selector.slice(valueStart, selectorIndex));
  288. }
  289. stripWhitespace(0);
  290. // See if we have a force ignore flag
  291. const forceIgnore = selector.charCodeAt(selectorIndex) | 0x20;
  292. // If the forceIgnore flag is set (either `i` or `s`), use that value
  293. if (forceIgnore === 115 /* LowerS */) {
  294. ignoreCase = false;
  295. stripWhitespace(1);
  296. }
  297. else if (forceIgnore === 105 /* LowerI */) {
  298. ignoreCase = true;
  299. stripWhitespace(1);
  300. }
  301. }
  302. if (selector.charCodeAt(selectorIndex) !==
  303. 93 /* RightSquareBracket */) {
  304. throw new Error("Attribute selector didn't terminate");
  305. }
  306. selectorIndex += 1;
  307. const attributeSelector = {
  308. type: SelectorType.Attribute,
  309. name,
  310. action,
  311. value,
  312. namespace,
  313. ignoreCase,
  314. };
  315. tokens.push(attributeSelector);
  316. break;
  317. }
  318. case 58 /* Colon */: {
  319. if (selector.charCodeAt(selectorIndex + 1) === 58 /* Colon */) {
  320. tokens.push({
  321. type: SelectorType.PseudoElement,
  322. name: getName(2).toLowerCase(),
  323. data: selector.charCodeAt(selectorIndex) ===
  324. 40 /* LeftParenthesis */
  325. ? readValueWithParenthesis()
  326. : null,
  327. });
  328. continue;
  329. }
  330. const name = getName(1).toLowerCase();
  331. let data = null;
  332. if (selector.charCodeAt(selectorIndex) ===
  333. 40 /* LeftParenthesis */) {
  334. if (unpackPseudos.has(name)) {
  335. if (isQuote(selector.charCodeAt(selectorIndex + 1))) {
  336. throw new Error(`Pseudo-selector ${name} cannot be quoted`);
  337. }
  338. data = [];
  339. selectorIndex = parseSelector(data, selector, selectorIndex + 1);
  340. if (selector.charCodeAt(selectorIndex) !==
  341. 41 /* RightParenthesis */) {
  342. throw new Error(`Missing closing parenthesis in :${name} (${selector})`);
  343. }
  344. selectorIndex += 1;
  345. }
  346. else {
  347. data = readValueWithParenthesis();
  348. if (stripQuotesFromPseudos.has(name)) {
  349. const quot = data.charCodeAt(0);
  350. if (quot === data.charCodeAt(data.length - 1) &&
  351. isQuote(quot)) {
  352. data = data.slice(1, -1);
  353. }
  354. }
  355. data = unescapeCSS(data);
  356. }
  357. }
  358. tokens.push({ type: SelectorType.Pseudo, name, data });
  359. break;
  360. }
  361. case 44 /* Comma */: {
  362. finalizeSubselector();
  363. tokens = [];
  364. stripWhitespace(1);
  365. break;
  366. }
  367. default: {
  368. if (selector.startsWith("/*", selectorIndex)) {
  369. const endIndex = selector.indexOf("*/", selectorIndex + 2);
  370. if (endIndex < 0) {
  371. throw new Error("Comment was not terminated");
  372. }
  373. selectorIndex = endIndex + 2;
  374. // Remove leading whitespace
  375. if (tokens.length === 0) {
  376. stripWhitespace(0);
  377. }
  378. break;
  379. }
  380. let namespace = null;
  381. let name;
  382. if (firstChar === 42 /* Asterisk */) {
  383. selectorIndex += 1;
  384. name = "*";
  385. }
  386. else if (firstChar === 124 /* Pipe */) {
  387. name = "";
  388. if (selector.charCodeAt(selectorIndex + 1) === 124 /* Pipe */) {
  389. addTraversal(SelectorType.ColumnCombinator);
  390. stripWhitespace(2);
  391. break;
  392. }
  393. }
  394. else if (reName.test(selector.slice(selectorIndex))) {
  395. name = getName(0);
  396. }
  397. else {
  398. break loop;
  399. }
  400. if (selector.charCodeAt(selectorIndex) === 124 /* Pipe */ &&
  401. selector.charCodeAt(selectorIndex + 1) !== 124 /* Pipe */) {
  402. namespace = name;
  403. if (selector.charCodeAt(selectorIndex + 1) ===
  404. 42 /* Asterisk */) {
  405. name = "*";
  406. selectorIndex += 2;
  407. }
  408. else {
  409. name = getName(1);
  410. }
  411. }
  412. tokens.push(name === "*"
  413. ? { type: SelectorType.Universal, namespace }
  414. : { type: SelectorType.Tag, name, namespace });
  415. }
  416. }
  417. }
  418. finalizeSubselector();
  419. return selectorIndex;
  420. }