url-state-machine.js 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297
  1. "use strict";
  2. const punycode = require("punycode");
  3. const tr46 = require("tr46");
  4. const specialSchemes = {
  5. ftp: 21,
  6. file: null,
  7. gopher: 70,
  8. http: 80,
  9. https: 443,
  10. ws: 80,
  11. wss: 443
  12. };
  13. const failure = Symbol("failure");
  14. function countSymbols(str) {
  15. return punycode.ucs2.decode(str).length;
  16. }
  17. function at(input, idx) {
  18. const c = input[idx];
  19. return isNaN(c) ? undefined : String.fromCodePoint(c);
  20. }
  21. function isASCIIDigit(c) {
  22. return c >= 0x30 && c <= 0x39;
  23. }
  24. function isASCIIAlpha(c) {
  25. return (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A);
  26. }
  27. function isASCIIAlphanumeric(c) {
  28. return isASCIIAlpha(c) || isASCIIDigit(c);
  29. }
  30. function isASCIIHex(c) {
  31. return isASCIIDigit(c) || (c >= 0x41 && c <= 0x46) || (c >= 0x61 && c <= 0x66);
  32. }
  33. function isSingleDot(buffer) {
  34. return buffer === "." || buffer.toLowerCase() === "%2e";
  35. }
  36. function isDoubleDot(buffer) {
  37. buffer = buffer.toLowerCase();
  38. return buffer === ".." || buffer === "%2e." || buffer === ".%2e" || buffer === "%2e%2e";
  39. }
  40. function isWindowsDriveLetterCodePoints(cp1, cp2) {
  41. return isASCIIAlpha(cp1) && (cp2 === 58 || cp2 === 124);
  42. }
  43. function isWindowsDriveLetterString(string) {
  44. return string.length === 2 && isASCIIAlpha(string.codePointAt(0)) && (string[1] === ":" || string[1] === "|");
  45. }
  46. function isNormalizedWindowsDriveLetterString(string) {
  47. return string.length === 2 && isASCIIAlpha(string.codePointAt(0)) && string[1] === ":";
  48. }
  49. function containsForbiddenHostCodePoint(string) {
  50. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|%|\/|:|\?|@|\[|\\|\]/) !== -1;
  51. }
  52. function containsForbiddenHostCodePointExcludingPercent(string) {
  53. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|\/|:|\?|@|\[|\\|\]/) !== -1;
  54. }
  55. function isSpecialScheme(scheme) {
  56. return specialSchemes[scheme] !== undefined;
  57. }
  58. function isSpecial(url) {
  59. return isSpecialScheme(url.scheme);
  60. }
  61. function defaultPort(scheme) {
  62. return specialSchemes[scheme];
  63. }
  64. function percentEncode(c) {
  65. let hex = c.toString(16).toUpperCase();
  66. if (hex.length === 1) {
  67. hex = "0" + hex;
  68. }
  69. return "%" + hex;
  70. }
  71. function utf8PercentEncode(c) {
  72. const buf = new Buffer(c);
  73. let str = "";
  74. for (let i = 0; i < buf.length; ++i) {
  75. str += percentEncode(buf[i]);
  76. }
  77. return str;
  78. }
  79. function utf8PercentDecode(str) {
  80. const input = new Buffer(str);
  81. const output = [];
  82. for (let i = 0; i < input.length; ++i) {
  83. if (input[i] !== 37) {
  84. output.push(input[i]);
  85. } else if (input[i] === 37 && isASCIIHex(input[i + 1]) && isASCIIHex(input[i + 2])) {
  86. output.push(parseInt(input.slice(i + 1, i + 3).toString(), 16));
  87. i += 2;
  88. } else {
  89. output.push(input[i]);
  90. }
  91. }
  92. return new Buffer(output).toString();
  93. }
  94. function isC0ControlPercentEncode(c) {
  95. return c <= 0x1F || c > 0x7E;
  96. }
  97. const extraPathPercentEncodeSet = new Set([32, 34, 35, 60, 62, 63, 96, 123, 125]);
  98. function isPathPercentEncode(c) {
  99. return isC0ControlPercentEncode(c) || extraPathPercentEncodeSet.has(c);
  100. }
  101. const extraUserinfoPercentEncodeSet =
  102. new Set([47, 58, 59, 61, 64, 91, 92, 93, 94, 124]);
  103. function isUserinfoPercentEncode(c) {
  104. return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
  105. }
  106. function percentEncodeChar(c, encodeSetPredicate) {
  107. const cStr = String.fromCodePoint(c);
  108. if (encodeSetPredicate(c)) {
  109. return utf8PercentEncode(cStr);
  110. }
  111. return cStr;
  112. }
  113. function parseIPv4Number(input) {
  114. let R = 10;
  115. if (input.length >= 2 && input.charAt(0) === "0" && input.charAt(1).toLowerCase() === "x") {
  116. input = input.substring(2);
  117. R = 16;
  118. } else if (input.length >= 2 && input.charAt(0) === "0") {
  119. input = input.substring(1);
  120. R = 8;
  121. }
  122. if (input === "") {
  123. return 0;
  124. }
  125. const regex = R === 10 ? /[^0-9]/ : (R === 16 ? /[^0-9A-Fa-f]/ : /[^0-7]/);
  126. if (regex.test(input)) {
  127. return failure;
  128. }
  129. return parseInt(input, R);
  130. }
  131. function parseIPv4(input) {
  132. const parts = input.split(".");
  133. if (parts[parts.length - 1] === "") {
  134. if (parts.length > 1) {
  135. parts.pop();
  136. }
  137. }
  138. if (parts.length > 4) {
  139. return input;
  140. }
  141. const numbers = [];
  142. for (const part of parts) {
  143. if (part === "") {
  144. return input;
  145. }
  146. const n = parseIPv4Number(part);
  147. if (n === failure) {
  148. return input;
  149. }
  150. numbers.push(n);
  151. }
  152. for (let i = 0; i < numbers.length - 1; ++i) {
  153. if (numbers[i] > 255) {
  154. return failure;
  155. }
  156. }
  157. if (numbers[numbers.length - 1] >= Math.pow(256, 5 - numbers.length)) {
  158. return failure;
  159. }
  160. let ipv4 = numbers.pop();
  161. let counter = 0;
  162. for (const n of numbers) {
  163. ipv4 += n * Math.pow(256, 3 - counter);
  164. ++counter;
  165. }
  166. return ipv4;
  167. }
  168. function serializeIPv4(address) {
  169. let output = "";
  170. let n = address;
  171. for (let i = 1; i <= 4; ++i) {
  172. output = String(n % 256) + output;
  173. if (i !== 4) {
  174. output = "." + output;
  175. }
  176. n = Math.floor(n / 256);
  177. }
  178. return output;
  179. }
  180. function parseIPv6(input) {
  181. const address = [0, 0, 0, 0, 0, 0, 0, 0];
  182. let pieceIndex = 0;
  183. let compress = null;
  184. let pointer = 0;
  185. input = punycode.ucs2.decode(input);
  186. if (input[pointer] === 58) {
  187. if (input[pointer + 1] !== 58) {
  188. return failure;
  189. }
  190. pointer += 2;
  191. ++pieceIndex;
  192. compress = pieceIndex;
  193. }
  194. while (pointer < input.length) {
  195. if (pieceIndex === 8) {
  196. return failure;
  197. }
  198. if (input[pointer] === 58) {
  199. if (compress !== null) {
  200. return failure;
  201. }
  202. ++pointer;
  203. ++pieceIndex;
  204. compress = pieceIndex;
  205. continue;
  206. }
  207. let value = 0;
  208. let length = 0;
  209. while (length < 4 && isASCIIHex(input[pointer])) {
  210. value = value * 0x10 + parseInt(at(input, pointer), 16);
  211. ++pointer;
  212. ++length;
  213. }
  214. if (input[pointer] === 46) {
  215. if (length === 0) {
  216. return failure;
  217. }
  218. pointer -= length;
  219. if (pieceIndex > 6) {
  220. return failure;
  221. }
  222. let numbersSeen = 0;
  223. while (input[pointer] !== undefined) {
  224. let ipv4Piece = null;
  225. if (numbersSeen > 0) {
  226. if (input[pointer] === 46 && numbersSeen < 4) {
  227. ++pointer;
  228. } else {
  229. return failure;
  230. }
  231. }
  232. if (!isASCIIDigit(input[pointer])) {
  233. return failure;
  234. }
  235. while (isASCIIDigit(input[pointer])) {
  236. const number = parseInt(at(input, pointer));
  237. if (ipv4Piece === null) {
  238. ipv4Piece = number;
  239. } else if (ipv4Piece === 0) {
  240. return failure;
  241. } else {
  242. ipv4Piece = ipv4Piece * 10 + number;
  243. }
  244. if (ipv4Piece > 255) {
  245. return failure;
  246. }
  247. ++pointer;
  248. }
  249. address[pieceIndex] = address[pieceIndex] * 0x100 + ipv4Piece;
  250. ++numbersSeen;
  251. if (numbersSeen === 2 || numbersSeen === 4) {
  252. ++pieceIndex;
  253. }
  254. }
  255. if (numbersSeen !== 4) {
  256. return failure;
  257. }
  258. break;
  259. } else if (input[pointer] === 58) {
  260. ++pointer;
  261. if (input[pointer] === undefined) {
  262. return failure;
  263. }
  264. } else if (input[pointer] !== undefined) {
  265. return failure;
  266. }
  267. address[pieceIndex] = value;
  268. ++pieceIndex;
  269. }
  270. if (compress !== null) {
  271. let swaps = pieceIndex - compress;
  272. pieceIndex = 7;
  273. while (pieceIndex !== 0 && swaps > 0) {
  274. const temp = address[compress + swaps - 1];
  275. address[compress + swaps - 1] = address[pieceIndex];
  276. address[pieceIndex] = temp;
  277. --pieceIndex;
  278. --swaps;
  279. }
  280. } else if (compress === null && pieceIndex !== 8) {
  281. return failure;
  282. }
  283. return address;
  284. }
  285. function serializeIPv6(address) {
  286. let output = "";
  287. const seqResult = findLongestZeroSequence(address);
  288. const compress = seqResult.idx;
  289. let ignore0 = false;
  290. for (let pieceIndex = 0; pieceIndex <= 7; ++pieceIndex) {
  291. if (ignore0 && address[pieceIndex] === 0) {
  292. continue;
  293. } else if (ignore0) {
  294. ignore0 = false;
  295. }
  296. if (compress === pieceIndex) {
  297. const separator = pieceIndex === 0 ? "::" : ":";
  298. output += separator;
  299. ignore0 = true;
  300. continue;
  301. }
  302. output += address[pieceIndex].toString(16);
  303. if (pieceIndex !== 7) {
  304. output += ":";
  305. }
  306. }
  307. return output;
  308. }
  309. function parseHost(input, isSpecialArg) {
  310. if (input[0] === "[") {
  311. if (input[input.length - 1] !== "]") {
  312. return failure;
  313. }
  314. return parseIPv6(input.substring(1, input.length - 1));
  315. }
  316. if (!isSpecialArg) {
  317. return parseOpaqueHost(input);
  318. }
  319. const domain = utf8PercentDecode(input);
  320. const asciiDomain = tr46.toASCII(domain, false, tr46.PROCESSING_OPTIONS.NONTRANSITIONAL, false);
  321. if (asciiDomain === null) {
  322. return failure;
  323. }
  324. if (containsForbiddenHostCodePoint(asciiDomain)) {
  325. return failure;
  326. }
  327. const ipv4Host = parseIPv4(asciiDomain);
  328. if (typeof ipv4Host === "number" || ipv4Host === failure) {
  329. return ipv4Host;
  330. }
  331. return asciiDomain;
  332. }
  333. function parseOpaqueHost(input) {
  334. if (containsForbiddenHostCodePointExcludingPercent(input)) {
  335. return failure;
  336. }
  337. let output = "";
  338. const decoded = punycode.ucs2.decode(input);
  339. for (let i = 0; i < decoded.length; ++i) {
  340. output += percentEncodeChar(decoded[i], isC0ControlPercentEncode);
  341. }
  342. return output;
  343. }
  344. function findLongestZeroSequence(arr) {
  345. let maxIdx = null;
  346. let maxLen = 1; // only find elements > 1
  347. let currStart = null;
  348. let currLen = 0;
  349. for (let i = 0; i < arr.length; ++i) {
  350. if (arr[i] !== 0) {
  351. if (currLen > maxLen) {
  352. maxIdx = currStart;
  353. maxLen = currLen;
  354. }
  355. currStart = null;
  356. currLen = 0;
  357. } else {
  358. if (currStart === null) {
  359. currStart = i;
  360. }
  361. ++currLen;
  362. }
  363. }
  364. // if trailing zeros
  365. if (currLen > maxLen) {
  366. maxIdx = currStart;
  367. maxLen = currLen;
  368. }
  369. return {
  370. idx: maxIdx,
  371. len: maxLen
  372. };
  373. }
  374. function serializeHost(host) {
  375. if (typeof host === "number") {
  376. return serializeIPv4(host);
  377. }
  378. // IPv6 serializer
  379. if (host instanceof Array) {
  380. return "[" + serializeIPv6(host) + "]";
  381. }
  382. return host;
  383. }
  384. function trimControlChars(url) {
  385. return url.replace(/^[\u0000-\u001F\u0020]+|[\u0000-\u001F\u0020]+$/g, "");
  386. }
  387. function trimTabAndNewline(url) {
  388. return url.replace(/\u0009|\u000A|\u000D/g, "");
  389. }
  390. function shortenPath(url) {
  391. const path = url.path;
  392. if (path.length === 0) {
  393. return;
  394. }
  395. if (url.scheme === "file" && path.length === 1 && isNormalizedWindowsDriveLetter(path[0])) {
  396. return;
  397. }
  398. path.pop();
  399. }
  400. function includesCredentials(url) {
  401. return url.username !== "" || url.password !== "";
  402. }
  403. function cannotHaveAUsernamePasswordPort(url) {
  404. return url.host === null || url.host === "" || url.cannotBeABaseURL || url.scheme === "file";
  405. }
  406. function isNormalizedWindowsDriveLetter(string) {
  407. return /^[A-Za-z]:$/.test(string);
  408. }
  409. function URLStateMachine(input, base, encodingOverride, url, stateOverride) {
  410. this.pointer = 0;
  411. this.input = input;
  412. this.base = base || null;
  413. this.encodingOverride = encodingOverride || "utf-8";
  414. this.stateOverride = stateOverride;
  415. this.url = url;
  416. this.failure = false;
  417. this.parseError = false;
  418. if (!this.url) {
  419. this.url = {
  420. scheme: "",
  421. username: "",
  422. password: "",
  423. host: null,
  424. port: null,
  425. path: [],
  426. query: null,
  427. fragment: null,
  428. cannotBeABaseURL: false
  429. };
  430. const res = trimControlChars(this.input);
  431. if (res !== this.input) {
  432. this.parseError = true;
  433. }
  434. this.input = res;
  435. }
  436. const res = trimTabAndNewline(this.input);
  437. if (res !== this.input) {
  438. this.parseError = true;
  439. }
  440. this.input = res;
  441. this.state = stateOverride || "scheme start";
  442. this.buffer = "";
  443. this.atFlag = false;
  444. this.arrFlag = false;
  445. this.passwordTokenSeenFlag = false;
  446. this.input = punycode.ucs2.decode(this.input);
  447. for (; this.pointer <= this.input.length; ++this.pointer) {
  448. const c = this.input[this.pointer];
  449. const cStr = isNaN(c) ? undefined : String.fromCodePoint(c);
  450. // exec state machine
  451. const ret = this["parse " + this.state](c, cStr);
  452. if (!ret) {
  453. break; // terminate algorithm
  454. } else if (ret === failure) {
  455. this.failure = true;
  456. break;
  457. }
  458. }
  459. }
  460. URLStateMachine.prototype["parse scheme start"] = function parseSchemeStart(c, cStr) {
  461. if (isASCIIAlpha(c)) {
  462. this.buffer += cStr.toLowerCase();
  463. this.state = "scheme";
  464. } else if (!this.stateOverride) {
  465. this.state = "no scheme";
  466. --this.pointer;
  467. } else {
  468. this.parseError = true;
  469. return failure;
  470. }
  471. return true;
  472. };
  473. URLStateMachine.prototype["parse scheme"] = function parseScheme(c, cStr) {
  474. if (isASCIIAlphanumeric(c) || c === 43 || c === 45 || c === 46) {
  475. this.buffer += cStr.toLowerCase();
  476. } else if (c === 58) {
  477. if (this.stateOverride) {
  478. if (isSpecial(this.url) && !isSpecialScheme(this.buffer)) {
  479. return false;
  480. }
  481. if (!isSpecial(this.url) && isSpecialScheme(this.buffer)) {
  482. return false;
  483. }
  484. if ((includesCredentials(this.url) || this.url.port !== null) && this.buffer === "file") {
  485. return false;
  486. }
  487. if (this.url.scheme === "file" && (this.url.host === "" || this.url.host === null)) {
  488. return false;
  489. }
  490. }
  491. this.url.scheme = this.buffer;
  492. this.buffer = "";
  493. if (this.stateOverride) {
  494. return false;
  495. }
  496. if (this.url.scheme === "file") {
  497. if (this.input[this.pointer + 1] !== 47 || this.input[this.pointer + 2] !== 47) {
  498. this.parseError = true;
  499. }
  500. this.state = "file";
  501. } else if (isSpecial(this.url) && this.base !== null && this.base.scheme === this.url.scheme) {
  502. this.state = "special relative or authority";
  503. } else if (isSpecial(this.url)) {
  504. this.state = "special authority slashes";
  505. } else if (this.input[this.pointer + 1] === 47) {
  506. this.state = "path or authority";
  507. ++this.pointer;
  508. } else {
  509. this.url.cannotBeABaseURL = true;
  510. this.url.path.push("");
  511. this.state = "cannot-be-a-base-URL path";
  512. }
  513. } else if (!this.stateOverride) {
  514. this.buffer = "";
  515. this.state = "no scheme";
  516. this.pointer = -1;
  517. } else {
  518. this.parseError = true;
  519. return failure;
  520. }
  521. return true;
  522. };
  523. URLStateMachine.prototype["parse no scheme"] = function parseNoScheme(c) {
  524. if (this.base === null || (this.base.cannotBeABaseURL && c !== 35)) {
  525. return failure;
  526. } else if (this.base.cannotBeABaseURL && c === 35) {
  527. this.url.scheme = this.base.scheme;
  528. this.url.path = this.base.path.slice();
  529. this.url.query = this.base.query;
  530. this.url.fragment = "";
  531. this.url.cannotBeABaseURL = true;
  532. this.state = "fragment";
  533. } else if (this.base.scheme === "file") {
  534. this.state = "file";
  535. --this.pointer;
  536. } else {
  537. this.state = "relative";
  538. --this.pointer;
  539. }
  540. return true;
  541. };
  542. URLStateMachine.prototype["parse special relative or authority"] = function parseSpecialRelativeOrAuthority(c) {
  543. if (c === 47 && this.input[this.pointer + 1] === 47) {
  544. this.state = "special authority ignore slashes";
  545. ++this.pointer;
  546. } else {
  547. this.parseError = true;
  548. this.state = "relative";
  549. --this.pointer;
  550. }
  551. return true;
  552. };
  553. URLStateMachine.prototype["parse path or authority"] = function parsePathOrAuthority(c) {
  554. if (c === 47) {
  555. this.state = "authority";
  556. } else {
  557. this.state = "path";
  558. --this.pointer;
  559. }
  560. return true;
  561. };
  562. URLStateMachine.prototype["parse relative"] = function parseRelative(c) {
  563. this.url.scheme = this.base.scheme;
  564. if (isNaN(c)) {
  565. this.url.username = this.base.username;
  566. this.url.password = this.base.password;
  567. this.url.host = this.base.host;
  568. this.url.port = this.base.port;
  569. this.url.path = this.base.path.slice();
  570. this.url.query = this.base.query;
  571. } else if (c === 47) {
  572. this.state = "relative slash";
  573. } else if (c === 63) {
  574. this.url.username = this.base.username;
  575. this.url.password = this.base.password;
  576. this.url.host = this.base.host;
  577. this.url.port = this.base.port;
  578. this.url.path = this.base.path.slice();
  579. this.url.query = "";
  580. this.state = "query";
  581. } else if (c === 35) {
  582. this.url.username = this.base.username;
  583. this.url.password = this.base.password;
  584. this.url.host = this.base.host;
  585. this.url.port = this.base.port;
  586. this.url.path = this.base.path.slice();
  587. this.url.query = this.base.query;
  588. this.url.fragment = "";
  589. this.state = "fragment";
  590. } else if (isSpecial(this.url) && c === 92) {
  591. this.parseError = true;
  592. this.state = "relative slash";
  593. } else {
  594. this.url.username = this.base.username;
  595. this.url.password = this.base.password;
  596. this.url.host = this.base.host;
  597. this.url.port = this.base.port;
  598. this.url.path = this.base.path.slice(0, this.base.path.length - 1);
  599. this.state = "path";
  600. --this.pointer;
  601. }
  602. return true;
  603. };
  604. URLStateMachine.prototype["parse relative slash"] = function parseRelativeSlash(c) {
  605. if (isSpecial(this.url) && (c === 47 || c === 92)) {
  606. if (c === 92) {
  607. this.parseError = true;
  608. }
  609. this.state = "special authority ignore slashes";
  610. } else if (c === 47) {
  611. this.state = "authority";
  612. } else {
  613. this.url.username = this.base.username;
  614. this.url.password = this.base.password;
  615. this.url.host = this.base.host;
  616. this.url.port = this.base.port;
  617. this.state = "path";
  618. --this.pointer;
  619. }
  620. return true;
  621. };
  622. URLStateMachine.prototype["parse special authority slashes"] = function parseSpecialAuthoritySlashes(c) {
  623. if (c === 47 && this.input[this.pointer + 1] === 47) {
  624. this.state = "special authority ignore slashes";
  625. ++this.pointer;
  626. } else {
  627. this.parseError = true;
  628. this.state = "special authority ignore slashes";
  629. --this.pointer;
  630. }
  631. return true;
  632. };
  633. URLStateMachine.prototype["parse special authority ignore slashes"] = function parseSpecialAuthorityIgnoreSlashes(c) {
  634. if (c !== 47 && c !== 92) {
  635. this.state = "authority";
  636. --this.pointer;
  637. } else {
  638. this.parseError = true;
  639. }
  640. return true;
  641. };
  642. URLStateMachine.prototype["parse authority"] = function parseAuthority(c, cStr) {
  643. if (c === 64) {
  644. this.parseError = true;
  645. if (this.atFlag) {
  646. this.buffer = "%40" + this.buffer;
  647. }
  648. this.atFlag = true;
  649. // careful, this is based on buffer and has its own pointer (this.pointer != pointer) and inner chars
  650. const len = countSymbols(this.buffer);
  651. for (let pointer = 0; pointer < len; ++pointer) {
  652. const codePoint = this.buffer.codePointAt(pointer);
  653. if (codePoint === 58 && !this.passwordTokenSeenFlag) {
  654. this.passwordTokenSeenFlag = true;
  655. continue;
  656. }
  657. const encodedCodePoints = percentEncodeChar(codePoint, isUserinfoPercentEncode);
  658. if (this.passwordTokenSeenFlag) {
  659. this.url.password += encodedCodePoints;
  660. } else {
  661. this.url.username += encodedCodePoints;
  662. }
  663. }
  664. this.buffer = "";
  665. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  666. (isSpecial(this.url) && c === 92)) {
  667. if (this.atFlag && this.buffer === "") {
  668. this.parseError = true;
  669. return failure;
  670. }
  671. this.pointer -= countSymbols(this.buffer) + 1;
  672. this.buffer = "";
  673. this.state = "host";
  674. } else {
  675. this.buffer += cStr;
  676. }
  677. return true;
  678. };
  679. URLStateMachine.prototype["parse hostname"] =
  680. URLStateMachine.prototype["parse host"] = function parseHostName(c, cStr) {
  681. if (this.stateOverride && this.url.scheme === "file") {
  682. --this.pointer;
  683. this.state = "file host";
  684. } else if (c === 58 && !this.arrFlag) {
  685. if (this.buffer === "") {
  686. this.parseError = true;
  687. return failure;
  688. }
  689. const host = parseHost(this.buffer, isSpecial(this.url));
  690. if (host === failure) {
  691. return failure;
  692. }
  693. this.url.host = host;
  694. this.buffer = "";
  695. this.state = "port";
  696. if (this.stateOverride === "hostname") {
  697. return false;
  698. }
  699. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  700. (isSpecial(this.url) && c === 92)) {
  701. --this.pointer;
  702. if (isSpecial(this.url) && this.buffer === "") {
  703. this.parseError = true;
  704. return failure;
  705. } else if (this.stateOverride && this.buffer === "" &&
  706. (includesCredentials(this.url) || this.url.port !== null)) {
  707. this.parseError = true;
  708. return false;
  709. }
  710. const host = parseHost(this.buffer, isSpecial(this.url));
  711. if (host === failure) {
  712. return failure;
  713. }
  714. this.url.host = host;
  715. this.buffer = "";
  716. this.state = "path start";
  717. if (this.stateOverride) {
  718. return false;
  719. }
  720. } else {
  721. if (c === 91) {
  722. this.arrFlag = true;
  723. } else if (c === 93) {
  724. this.arrFlag = false;
  725. }
  726. this.buffer += cStr;
  727. }
  728. return true;
  729. };
  730. URLStateMachine.prototype["parse port"] = function parsePort(c, cStr) {
  731. if (isASCIIDigit(c)) {
  732. this.buffer += cStr;
  733. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  734. (isSpecial(this.url) && c === 92) ||
  735. this.stateOverride) {
  736. if (this.buffer !== "") {
  737. const port = parseInt(this.buffer);
  738. if (port > Math.pow(2, 16) - 1) {
  739. this.parseError = true;
  740. return failure;
  741. }
  742. this.url.port = port === defaultPort(this.url.scheme) ? null : port;
  743. this.buffer = "";
  744. }
  745. if (this.stateOverride) {
  746. return false;
  747. }
  748. this.state = "path start";
  749. --this.pointer;
  750. } else {
  751. this.parseError = true;
  752. return failure;
  753. }
  754. return true;
  755. };
  756. const fileOtherwiseCodePoints = new Set([47, 92, 63, 35]);
  757. URLStateMachine.prototype["parse file"] = function parseFile(c) {
  758. this.url.scheme = "file";
  759. if (c === 47 || c === 92) {
  760. if (c === 92) {
  761. this.parseError = true;
  762. }
  763. this.state = "file slash";
  764. } else if (this.base !== null && this.base.scheme === "file") {
  765. if (isNaN(c)) {
  766. this.url.host = this.base.host;
  767. this.url.path = this.base.path.slice();
  768. this.url.query = this.base.query;
  769. } else if (c === 63) {
  770. this.url.host = this.base.host;
  771. this.url.path = this.base.path.slice();
  772. this.url.query = "";
  773. this.state = "query";
  774. } else if (c === 35) {
  775. this.url.host = this.base.host;
  776. this.url.path = this.base.path.slice();
  777. this.url.query = this.base.query;
  778. this.url.fragment = "";
  779. this.state = "fragment";
  780. } else {
  781. if (this.input.length - this.pointer - 1 === 0 || // remaining consists of 0 code points
  782. !isWindowsDriveLetterCodePoints(c, this.input[this.pointer + 1]) ||
  783. (this.input.length - this.pointer - 1 >= 2 && // remaining has at least 2 code points
  784. !fileOtherwiseCodePoints.has(this.input[this.pointer + 2]))) {
  785. this.url.host = this.base.host;
  786. this.url.path = this.base.path.slice();
  787. shortenPath(this.url);
  788. } else {
  789. this.parseError = true;
  790. }
  791. this.state = "path";
  792. --this.pointer;
  793. }
  794. } else {
  795. this.state = "path";
  796. --this.pointer;
  797. }
  798. return true;
  799. };
  800. URLStateMachine.prototype["parse file slash"] = function parseFileSlash(c) {
  801. if (c === 47 || c === 92) {
  802. if (c === 92) {
  803. this.parseError = true;
  804. }
  805. this.state = "file host";
  806. } else {
  807. if (this.base !== null && this.base.scheme === "file") {
  808. if (isNormalizedWindowsDriveLetterString(this.base.path[0])) {
  809. this.url.path.push(this.base.path[0]);
  810. } else {
  811. this.url.host = this.base.host;
  812. }
  813. }
  814. this.state = "path";
  815. --this.pointer;
  816. }
  817. return true;
  818. };
  819. URLStateMachine.prototype["parse file host"] = function parseFileHost(c, cStr) {
  820. if (isNaN(c) || c === 47 || c === 92 || c === 63 || c === 35) {
  821. --this.pointer;
  822. if (!this.stateOverride && isWindowsDriveLetterString(this.buffer)) {
  823. this.parseError = true;
  824. this.state = "path";
  825. } else if (this.buffer === "") {
  826. this.url.host = "";
  827. if (this.stateOverride) {
  828. return false;
  829. }
  830. this.state = "path start";
  831. } else {
  832. let host = parseHost(this.buffer, isSpecial(this.url));
  833. if (host === failure) {
  834. return failure;
  835. }
  836. if (host === "localhost") {
  837. host = "";
  838. }
  839. this.url.host = host;
  840. if (this.stateOverride) {
  841. return false;
  842. }
  843. this.buffer = "";
  844. this.state = "path start";
  845. }
  846. } else {
  847. this.buffer += cStr;
  848. }
  849. return true;
  850. };
  851. URLStateMachine.prototype["parse path start"] = function parsePathStart(c) {
  852. if (isSpecial(this.url)) {
  853. if (c === 92) {
  854. this.parseError = true;
  855. }
  856. this.state = "path";
  857. if (c !== 47 && c !== 92) {
  858. --this.pointer;
  859. }
  860. } else if (!this.stateOverride && c === 63) {
  861. this.url.query = "";
  862. this.state = "query";
  863. } else if (!this.stateOverride && c === 35) {
  864. this.url.fragment = "";
  865. this.state = "fragment";
  866. } else if (c !== undefined) {
  867. this.state = "path";
  868. if (c !== 47) {
  869. --this.pointer;
  870. }
  871. }
  872. return true;
  873. };
  874. URLStateMachine.prototype["parse path"] = function parsePath(c) {
  875. if (isNaN(c) || c === 47 || (isSpecial(this.url) && c === 92) ||
  876. (!this.stateOverride && (c === 63 || c === 35))) {
  877. if (isSpecial(this.url) && c === 92) {
  878. this.parseError = true;
  879. }
  880. if (isDoubleDot(this.buffer)) {
  881. shortenPath(this.url);
  882. if (c !== 47 && !(isSpecial(this.url) && c === 92)) {
  883. this.url.path.push("");
  884. }
  885. } else if (isSingleDot(this.buffer) && c !== 47 &&
  886. !(isSpecial(this.url) && c === 92)) {
  887. this.url.path.push("");
  888. } else if (!isSingleDot(this.buffer)) {
  889. if (this.url.scheme === "file" && this.url.path.length === 0 && isWindowsDriveLetterString(this.buffer)) {
  890. if (this.url.host !== "" && this.url.host !== null) {
  891. this.parseError = true;
  892. this.url.host = "";
  893. }
  894. this.buffer = this.buffer[0] + ":";
  895. }
  896. this.url.path.push(this.buffer);
  897. }
  898. this.buffer = "";
  899. if (this.url.scheme === "file" && (c === undefined || c === 63 || c === 35)) {
  900. while (this.url.path.length > 1 && this.url.path[0] === "") {
  901. this.parseError = true;
  902. this.url.path.shift();
  903. }
  904. }
  905. if (c === 63) {
  906. this.url.query = "";
  907. this.state = "query";
  908. }
  909. if (c === 35) {
  910. this.url.fragment = "";
  911. this.state = "fragment";
  912. }
  913. } else {
  914. // TODO: If c is not a URL code point and not "%", parse error.
  915. if (c === 37 &&
  916. (!isASCIIHex(this.input[this.pointer + 1]) ||
  917. !isASCIIHex(this.input[this.pointer + 2]))) {
  918. this.parseError = true;
  919. }
  920. this.buffer += percentEncodeChar(c, isPathPercentEncode);
  921. }
  922. return true;
  923. };
  924. URLStateMachine.prototype["parse cannot-be-a-base-URL path"] = function parseCannotBeABaseURLPath(c) {
  925. if (c === 63) {
  926. this.url.query = "";
  927. this.state = "query";
  928. } else if (c === 35) {
  929. this.url.fragment = "";
  930. this.state = "fragment";
  931. } else {
  932. // TODO: Add: not a URL code point
  933. if (!isNaN(c) && c !== 37) {
  934. this.parseError = true;
  935. }
  936. if (c === 37 &&
  937. (!isASCIIHex(this.input[this.pointer + 1]) ||
  938. !isASCIIHex(this.input[this.pointer + 2]))) {
  939. this.parseError = true;
  940. }
  941. if (!isNaN(c)) {
  942. this.url.path[0] = this.url.path[0] + percentEncodeChar(c, isC0ControlPercentEncode);
  943. }
  944. }
  945. return true;
  946. };
  947. URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) {
  948. if (isNaN(c) || (!this.stateOverride && c === 35)) {
  949. if (!isSpecial(this.url) || this.url.scheme === "ws" || this.url.scheme === "wss") {
  950. this.encodingOverride = "utf-8";
  951. }
  952. const buffer = new Buffer(this.buffer); // TODO: Use encoding override instead
  953. for (let i = 0; i < buffer.length; ++i) {
  954. if (buffer[i] < 0x21 || buffer[i] > 0x7E || buffer[i] === 0x22 || buffer[i] === 0x23 ||
  955. buffer[i] === 0x3C || buffer[i] === 0x3E) {
  956. this.url.query += percentEncode(buffer[i]);
  957. } else {
  958. this.url.query += String.fromCodePoint(buffer[i]);
  959. }
  960. }
  961. this.buffer = "";
  962. if (c === 35) {
  963. this.url.fragment = "";
  964. this.state = "fragment";
  965. }
  966. } else {
  967. // TODO: If c is not a URL code point and not "%", parse error.
  968. if (c === 37 &&
  969. (!isASCIIHex(this.input[this.pointer + 1]) ||
  970. !isASCIIHex(this.input[this.pointer + 2]))) {
  971. this.parseError = true;
  972. }
  973. this.buffer += cStr;
  974. }
  975. return true;
  976. };
  977. URLStateMachine.prototype["parse fragment"] = function parseFragment(c) {
  978. if (isNaN(c)) { // do nothing
  979. } else if (c === 0x0) {
  980. this.parseError = true;
  981. } else {
  982. // TODO: If c is not a URL code point and not "%", parse error.
  983. if (c === 37 &&
  984. (!isASCIIHex(this.input[this.pointer + 1]) ||
  985. !isASCIIHex(this.input[this.pointer + 2]))) {
  986. this.parseError = true;
  987. }
  988. this.url.fragment += percentEncodeChar(c, isC0ControlPercentEncode);
  989. }
  990. return true;
  991. };
  992. function serializeURL(url, excludeFragment) {
  993. let output = url.scheme + ":";
  994. if (url.host !== null) {
  995. output += "//";
  996. if (url.username !== "" || url.password !== "") {
  997. output += url.username;
  998. if (url.password !== "") {
  999. output += ":" + url.password;
  1000. }
  1001. output += "@";
  1002. }
  1003. output += serializeHost(url.host);
  1004. if (url.port !== null) {
  1005. output += ":" + url.port;
  1006. }
  1007. } else if (url.host === null && url.scheme === "file") {
  1008. output += "//";
  1009. }
  1010. if (url.cannotBeABaseURL) {
  1011. output += url.path[0];
  1012. } else {
  1013. for (const string of url.path) {
  1014. output += "/" + string;
  1015. }
  1016. }
  1017. if (url.query !== null) {
  1018. output += "?" + url.query;
  1019. }
  1020. if (!excludeFragment && url.fragment !== null) {
  1021. output += "#" + url.fragment;
  1022. }
  1023. return output;
  1024. }
  1025. function serializeOrigin(tuple) {
  1026. let result = tuple.scheme + "://";
  1027. result += serializeHost(tuple.host);
  1028. if (tuple.port !== null) {
  1029. result += ":" + tuple.port;
  1030. }
  1031. return result;
  1032. }
  1033. module.exports.serializeURL = serializeURL;
  1034. module.exports.serializeURLOrigin = function (url) {
  1035. // https://url.spec.whatwg.org/#concept-url-origin
  1036. switch (url.scheme) {
  1037. case "blob":
  1038. try {
  1039. return module.exports.serializeURLOrigin(module.exports.parseURL(url.path[0]));
  1040. } catch (e) {
  1041. // serializing an opaque origin returns "null"
  1042. return "null";
  1043. }
  1044. case "ftp":
  1045. case "gopher":
  1046. case "http":
  1047. case "https":
  1048. case "ws":
  1049. case "wss":
  1050. return serializeOrigin({
  1051. scheme: url.scheme,
  1052. host: url.host,
  1053. port: url.port
  1054. });
  1055. case "file":
  1056. // spec says "exercise to the reader", chrome says "file://"
  1057. return "file://";
  1058. default:
  1059. // serializing an opaque origin returns "null"
  1060. return "null";
  1061. }
  1062. };
  1063. module.exports.basicURLParse = function (input, options) {
  1064. if (options === undefined) {
  1065. options = {};
  1066. }
  1067. const usm = new URLStateMachine(input, options.baseURL, options.encodingOverride, options.url, options.stateOverride);
  1068. if (usm.failure) {
  1069. return "failure";
  1070. }
  1071. return usm.url;
  1072. };
  1073. module.exports.setTheUsername = function (url, username) {
  1074. url.username = "";
  1075. const decoded = punycode.ucs2.decode(username);
  1076. for (let i = 0; i < decoded.length; ++i) {
  1077. url.username += percentEncodeChar(decoded[i], isUserinfoPercentEncode);
  1078. }
  1079. };
  1080. module.exports.setThePassword = function (url, password) {
  1081. url.password = "";
  1082. const decoded = punycode.ucs2.decode(password);
  1083. for (let i = 0; i < decoded.length; ++i) {
  1084. url.password += percentEncodeChar(decoded[i], isUserinfoPercentEncode);
  1085. }
  1086. };
  1087. module.exports.serializeHost = serializeHost;
  1088. module.exports.cannotHaveAUsernamePasswordPort = cannotHaveAUsernamePasswordPort;
  1089. module.exports.serializeInteger = function (integer) {
  1090. return String(integer);
  1091. };
  1092. module.exports.parseURL = function (input, options) {
  1093. if (options === undefined) {
  1094. options = {};
  1095. }
  1096. // We don't handle blobs, so this just delegates:
  1097. return module.exports.basicURLParse(input, { baseURL: options.baseURL, encodingOverride: options.encodingOverride });
  1098. };