utils.js 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. // Utilities
  2. //
  3. 'use strict';
  4. function _class(obj) { return Object.prototype.toString.call(obj); }
  5. function isString(obj) { return _class(obj) === '[object String]'; }
  6. var _hasOwnProperty = Object.prototype.hasOwnProperty;
  7. function has(object, key) {
  8. return _hasOwnProperty.call(object, key);
  9. }
  10. // Merge objects
  11. //
  12. function assign(obj /*from1, from2, from3, ...*/) {
  13. var sources = Array.prototype.slice.call(arguments, 1);
  14. sources.forEach(function (source) {
  15. if (!source) { return; }
  16. if (typeof source !== 'object') {
  17. throw new TypeError(source + 'must be object');
  18. }
  19. Object.keys(source).forEach(function (key) {
  20. obj[key] = source[key];
  21. });
  22. });
  23. return obj;
  24. }
  25. // Remove element from array and put another array at those position.
  26. // Useful for some operations with tokens
  27. function arrayReplaceAt(src, pos, newElements) {
  28. return [].concat(src.slice(0, pos), newElements, src.slice(pos + 1));
  29. }
  30. ////////////////////////////////////////////////////////////////////////////////
  31. function isValidEntityCode(c) {
  32. /*eslint no-bitwise:0*/
  33. // broken sequence
  34. if (c >= 0xD800 && c <= 0xDFFF) { return false; }
  35. // never used
  36. if (c >= 0xFDD0 && c <= 0xFDEF) { return false; }
  37. if ((c & 0xFFFF) === 0xFFFF || (c & 0xFFFF) === 0xFFFE) { return false; }
  38. // control codes
  39. if (c >= 0x00 && c <= 0x08) { return false; }
  40. if (c === 0x0B) { return false; }
  41. if (c >= 0x0E && c <= 0x1F) { return false; }
  42. if (c >= 0x7F && c <= 0x9F) { return false; }
  43. // out of range
  44. if (c > 0x10FFFF) { return false; }
  45. return true;
  46. }
  47. function fromCodePoint(c) {
  48. /*eslint no-bitwise:0*/
  49. if (c > 0xffff) {
  50. c -= 0x10000;
  51. var surrogate1 = 0xd800 + (c >> 10),
  52. surrogate2 = 0xdc00 + (c & 0x3ff);
  53. return String.fromCharCode(surrogate1, surrogate2);
  54. }
  55. return String.fromCharCode(c);
  56. }
  57. var UNESCAPE_MD_RE = /\\([!"#$%&'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])/g;
  58. var ENTITY_RE = /&([a-z#][a-z0-9]{1,31});/gi;
  59. var UNESCAPE_ALL_RE = new RegExp(UNESCAPE_MD_RE.source + '|' + ENTITY_RE.source, 'gi');
  60. var DIGITAL_ENTITY_TEST_RE = /^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))/i;
  61. var entities = require('./entities');
  62. function replaceEntityPattern(match, name) {
  63. var code = 0;
  64. if (has(entities, name)) {
  65. return entities[name];
  66. }
  67. if (name.charCodeAt(0) === 0x23/* # */ && DIGITAL_ENTITY_TEST_RE.test(name)) {
  68. code = name[1].toLowerCase() === 'x' ?
  69. parseInt(name.slice(2), 16) : parseInt(name.slice(1), 10);
  70. if (isValidEntityCode(code)) {
  71. return fromCodePoint(code);
  72. }
  73. }
  74. return match;
  75. }
  76. /*function replaceEntities(str) {
  77. if (str.indexOf('&') < 0) { return str; }
  78. return str.replace(ENTITY_RE, replaceEntityPattern);
  79. }*/
  80. function unescapeMd(str) {
  81. if (str.indexOf('\\') < 0) { return str; }
  82. return str.replace(UNESCAPE_MD_RE, '$1');
  83. }
  84. function unescapeAll(str) {
  85. if (str.indexOf('\\') < 0 && str.indexOf('&') < 0) { return str; }
  86. return str.replace(UNESCAPE_ALL_RE, function (match, escaped, entity) {
  87. if (escaped) { return escaped; }
  88. return replaceEntityPattern(match, entity);
  89. });
  90. }
  91. ////////////////////////////////////////////////////////////////////////////////
  92. var HTML_ESCAPE_TEST_RE = /[&<>"]/;
  93. var HTML_ESCAPE_REPLACE_RE = /[&<>"]/g;
  94. var HTML_REPLACEMENTS = {
  95. '&': '&amp;',
  96. '<': '&lt;',
  97. '>': '&gt;',
  98. '"': '&quot;'
  99. };
  100. function replaceUnsafeChar(ch) {
  101. return HTML_REPLACEMENTS[ch];
  102. }
  103. function escapeHtml(str) {
  104. if (HTML_ESCAPE_TEST_RE.test(str)) {
  105. return str.replace(HTML_ESCAPE_REPLACE_RE, replaceUnsafeChar);
  106. }
  107. return str;
  108. }
  109. ////////////////////////////////////////////////////////////////////////////////
  110. var REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g;
  111. function escapeRE(str) {
  112. return str.replace(REGEXP_ESCAPE_RE, '\\$&');
  113. }
  114. ////////////////////////////////////////////////////////////////////////////////
  115. function isSpace(code) {
  116. switch (code) {
  117. case 0x09:
  118. case 0x20:
  119. return true;
  120. }
  121. return false;
  122. }
  123. // Zs (unicode class) || [\t\f\v\r\n]
  124. function isWhiteSpace(code) {
  125. if (code >= 0x2000 && code <= 0x200A) { return true; }
  126. switch (code) {
  127. case 0x09: // \t
  128. case 0x0A: // \n
  129. case 0x0B: // \v
  130. case 0x0C: // \f
  131. case 0x0D: // \r
  132. case 0x20:
  133. case 0xA0:
  134. case 0x1680:
  135. case 0x202F:
  136. case 0x205F:
  137. case 0x3000:
  138. return true;
  139. }
  140. return false;
  141. }
  142. ////////////////////////////////////////////////////////////////////////////////
  143. /*eslint-disable max-len*/
  144. var UNICODE_PUNCT_RE = require('uc.micro/categories/P/regex');
  145. // Currently without astral characters support.
  146. function isPunctChar(ch) {
  147. return UNICODE_PUNCT_RE.test(ch);
  148. }
  149. // Markdown ASCII punctuation characters.
  150. //
  151. // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
  152. // http://spec.commonmark.org/0.15/#ascii-punctuation-character
  153. //
  154. // Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
  155. //
  156. function isMdAsciiPunct(ch) {
  157. switch (ch) {
  158. case 0x21/* ! */:
  159. case 0x22/* " */:
  160. case 0x23/* # */:
  161. case 0x24/* $ */:
  162. case 0x25/* % */:
  163. case 0x26/* & */:
  164. case 0x27/* ' */:
  165. case 0x28/* ( */:
  166. case 0x29/* ) */:
  167. case 0x2A/* * */:
  168. case 0x2B/* + */:
  169. case 0x2C/* , */:
  170. case 0x2D/* - */:
  171. case 0x2E/* . */:
  172. case 0x2F/* / */:
  173. case 0x3A/* : */:
  174. case 0x3B/* ; */:
  175. case 0x3C/* < */:
  176. case 0x3D/* = */:
  177. case 0x3E/* > */:
  178. case 0x3F/* ? */:
  179. case 0x40/* @ */:
  180. case 0x5B/* [ */:
  181. case 0x5C/* \ */:
  182. case 0x5D/* ] */:
  183. case 0x5E/* ^ */:
  184. case 0x5F/* _ */:
  185. case 0x60/* ` */:
  186. case 0x7B/* { */:
  187. case 0x7C/* | */:
  188. case 0x7D/* } */:
  189. case 0x7E/* ~ */:
  190. return true;
  191. default:
  192. return false;
  193. }
  194. }
  195. // Hepler to unify [reference labels].
  196. //
  197. function normalizeReference(str) {
  198. // Trim and collapse whitespace
  199. //
  200. str = str.trim().replace(/\s+/g, ' ');
  201. // In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
  202. // fixed in v12 (couldn't find any details).
  203. //
  204. // So treat this one as a special case
  205. // (remove this when node v10 is no longer supported).
  206. //
  207. if ('ẞ'.toLowerCase() === 'Ṿ') {
  208. str = str.replace(/ẞ/g, 'ß');
  209. }
  210. // .toLowerCase().toUpperCase() should get rid of all differences
  211. // between letter variants.
  212. //
  213. // Simple .toLowerCase() doesn't normalize 125 code points correctly,
  214. // and .toUpperCase doesn't normalize 6 of them (list of exceptions:
  215. // İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
  216. // uppercased versions).
  217. //
  218. // Here's an example showing how it happens. Lets take greek letter omega:
  219. // uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
  220. //
  221. // Unicode entries:
  222. // 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8;
  223. // 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
  224. // 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
  225. // 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8;
  226. //
  227. // Case-insensitive comparison should treat all of them as equivalent.
  228. //
  229. // But .toLowerCase() doesn't change ϑ (it's already lowercase),
  230. // and .toUpperCase() doesn't change ϴ (already uppercase).
  231. //
  232. // Applying first lower then upper case normalizes any character:
  233. // '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
  234. //
  235. // Note: this is equivalent to unicode case folding; unicode normalization
  236. // is a different step that is not required here.
  237. //
  238. // Final result should be uppercased, because it's later stored in an object
  239. // (this avoid a conflict with Object.prototype members,
  240. // most notably, `__proto__`)
  241. //
  242. return str.toLowerCase().toUpperCase();
  243. }
  244. ////////////////////////////////////////////////////////////////////////////////
  245. // Re-export libraries commonly used in both markdown-it and its plugins,
  246. // so plugins won't have to depend on them explicitly, which reduces their
  247. // bundled size (e.g. a browser build).
  248. //
  249. exports.lib = {};
  250. exports.lib.mdurl = require('mdurl');
  251. exports.lib.ucmicro = require('uc.micro');
  252. exports.assign = assign;
  253. exports.isString = isString;
  254. exports.has = has;
  255. exports.unescapeMd = unescapeMd;
  256. exports.unescapeAll = unescapeAll;
  257. exports.isValidEntityCode = isValidEntityCode;
  258. exports.fromCodePoint = fromCodePoint;
  259. // exports.replaceEntities = replaceEntities;
  260. exports.escapeHtml = escapeHtml;
  261. exports.arrayReplaceAt = arrayReplaceAt;
  262. exports.isSpace = isSpace;
  263. exports.isWhiteSpace = isWhiteSpace;
  264. exports.isMdAsciiPunct = isMdAsciiPunct;
  265. exports.isPunctChar = isPunctChar;
  266. exports.escapeRE = escapeRE;
  267. exports.normalizeReference = normalizeReference;