index.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. // Main parser class
  2. 'use strict';
  3. var utils = require('./common/utils');
  4. var helpers = require('./helpers');
  5. var Renderer = require('./renderer');
  6. var ParserCore = require('./parser_core');
  7. var ParserBlock = require('./parser_block');
  8. var ParserInline = require('./parser_inline');
  9. var LinkifyIt = require('linkify-it');
  10. var mdurl = require('mdurl');
  11. var punycode = require('punycode');
  12. var config = {
  13. default: require('./presets/default'),
  14. zero: require('./presets/zero'),
  15. commonmark: require('./presets/commonmark')
  16. };
  17. ////////////////////////////////////////////////////////////////////////////////
  18. //
  19. // This validator can prohibit more than really needed to prevent XSS. It's a
  20. // tradeoff to keep code simple and to be secure by default.
  21. //
  22. // If you need different setup - override validator method as you wish. Or
  23. // replace it with dummy function and use external sanitizer.
  24. //
  25. var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/;
  26. var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;
  27. function validateLink(url) {
  28. // url should be normalized at this point, and existing entities are decoded
  29. var str = url.trim().toLowerCase();
  30. return BAD_PROTO_RE.test(str) ? (GOOD_DATA_RE.test(str) ? true : false) : true;
  31. }
  32. ////////////////////////////////////////////////////////////////////////////////
  33. var RECODE_HOSTNAME_FOR = [ 'http:', 'https:', 'mailto:' ];
  34. function normalizeLink(url) {
  35. var parsed = mdurl.parse(url, true);
  36. if (parsed.hostname) {
  37. // Encode hostnames in urls like:
  38. // `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
  39. //
  40. // We don't encode unknown schemas, because it's likely that we encode
  41. // something we shouldn't (e.g. `skype:name` treated as `skype:host`)
  42. //
  43. if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
  44. try {
  45. parsed.hostname = punycode.toASCII(parsed.hostname);
  46. } catch (er) { /**/ }
  47. }
  48. }
  49. return mdurl.encode(mdurl.format(parsed));
  50. }
  51. function normalizeLinkText(url) {
  52. var parsed = mdurl.parse(url, true);
  53. if (parsed.hostname) {
  54. // Encode hostnames in urls like:
  55. // `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
  56. //
  57. // We don't encode unknown schemas, because it's likely that we encode
  58. // something we shouldn't (e.g. `skype:name` treated as `skype:host`)
  59. //
  60. if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
  61. try {
  62. parsed.hostname = punycode.toUnicode(parsed.hostname);
  63. } catch (er) { /**/ }
  64. }
  65. }
  66. // add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
  67. return mdurl.decode(mdurl.format(parsed), mdurl.decode.defaultChars + '%');
  68. }
  69. /**
  70. * class MarkdownIt
  71. *
  72. * Main parser/renderer class.
  73. *
  74. * ##### Usage
  75. *
  76. * ```javascript
  77. * // node.js, "classic" way:
  78. * var MarkdownIt = require('markdown-it'),
  79. * md = new MarkdownIt();
  80. * var result = md.render('# markdown-it rulezz!');
  81. *
  82. * // node.js, the same, but with sugar:
  83. * var md = require('markdown-it')();
  84. * var result = md.render('# markdown-it rulezz!');
  85. *
  86. * // browser without AMD, added to "window" on script load
  87. * // Note, there are no dash.
  88. * var md = window.markdownit();
  89. * var result = md.render('# markdown-it rulezz!');
  90. * ```
  91. *
  92. * Single line rendering, without paragraph wrap:
  93. *
  94. * ```javascript
  95. * var md = require('markdown-it')();
  96. * var result = md.renderInline('__markdown-it__ rulezz!');
  97. * ```
  98. **/
  99. /**
  100. * new MarkdownIt([presetName, options])
  101. * - presetName (String): optional, `commonmark` / `zero`
  102. * - options (Object)
  103. *
  104. * Creates parser instanse with given config. Can be called without `new`.
  105. *
  106. * ##### presetName
  107. *
  108. * MarkdownIt provides named presets as a convenience to quickly
  109. * enable/disable active syntax rules and options for common use cases.
  110. *
  111. * - ["commonmark"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/commonmark.js) -
  112. * configures parser to strict [CommonMark](http://commonmark.org/) mode.
  113. * - [default](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/default.js) -
  114. * similar to GFM, used when no preset name given. Enables all available rules,
  115. * but still without html, typographer & autolinker.
  116. * - ["zero"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/zero.js) -
  117. * all rules disabled. Useful to quickly setup your config via `.enable()`.
  118. * For example, when you need only `bold` and `italic` markup and nothing else.
  119. *
  120. * ##### options:
  121. *
  122. * - __html__ - `false`. Set `true` to enable HTML tags in source. Be careful!
  123. * That's not safe! You may need external sanitizer to protect output from XSS.
  124. * It's better to extend features via plugins, instead of enabling HTML.
  125. * - __xhtmlOut__ - `false`. Set `true` to add '/' when closing single tags
  126. * (`<br />`). This is needed only for full CommonMark compatibility. In real
  127. * world you will need HTML output.
  128. * - __breaks__ - `false`. Set `true` to convert `\n` in paragraphs into `<br>`.
  129. * - __langPrefix__ - `language-`. CSS language class prefix for fenced blocks.
  130. * Can be useful for external highlighters.
  131. * - __linkify__ - `false`. Set `true` to autoconvert URL-like text to links.
  132. * - __typographer__ - `false`. Set `true` to enable [some language-neutral
  133. * replacement](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/replacements.js) +
  134. * quotes beautification (smartquotes).
  135. * - __quotes__ - `“”‘’`, String or Array. Double + single quotes replacement
  136. * pairs, when typographer enabled and smartquotes on. For example, you can
  137. * use `'«»„“'` for Russian, `'„“‚‘'` for German, and
  138. * `['«\xA0', '\xA0»', '‹\xA0', '\xA0›']` for French (including nbsp).
  139. * - __highlight__ - `null`. Highlighter function for fenced code blocks.
  140. * Highlighter `function (str, lang)` should return escaped HTML. It can also
  141. * return empty string if the source was not changed and should be escaped
  142. * externaly. If result starts with <pre... internal wrapper is skipped.
  143. *
  144. * ##### Example
  145. *
  146. * ```javascript
  147. * // commonmark mode
  148. * var md = require('markdown-it')('commonmark');
  149. *
  150. * // default mode
  151. * var md = require('markdown-it')();
  152. *
  153. * // enable everything
  154. * var md = require('markdown-it')({
  155. * html: true,
  156. * linkify: true,
  157. * typographer: true
  158. * });
  159. * ```
  160. *
  161. * ##### Syntax highlighting
  162. *
  163. * ```js
  164. * var hljs = require('highlight.js') // https://highlightjs.org/
  165. *
  166. * var md = require('markdown-it')({
  167. * highlight: function (str, lang) {
  168. * if (lang && hljs.getLanguage(lang)) {
  169. * try {
  170. * return hljs.highlight(str, { language: lang, ignoreIllegals: true }).value;
  171. * } catch (__) {}
  172. * }
  173. *
  174. * return ''; // use external default escaping
  175. * }
  176. * });
  177. * ```
  178. *
  179. * Or with full wrapper override (if you need assign class to `<pre>`):
  180. *
  181. * ```javascript
  182. * var hljs = require('highlight.js') // https://highlightjs.org/
  183. *
  184. * // Actual default values
  185. * var md = require('markdown-it')({
  186. * highlight: function (str, lang) {
  187. * if (lang && hljs.getLanguage(lang)) {
  188. * try {
  189. * return '<pre class="hljs"><code>' +
  190. * hljs.highlight(str, { language: lang, ignoreIllegals: true }).value +
  191. * '</code></pre>';
  192. * } catch (__) {}
  193. * }
  194. *
  195. * return '<pre class="hljs"><code>' + md.utils.escapeHtml(str) + '</code></pre>';
  196. * }
  197. * });
  198. * ```
  199. *
  200. **/
  201. function MarkdownIt(presetName, options) {
  202. if (!(this instanceof MarkdownIt)) {
  203. return new MarkdownIt(presetName, options);
  204. }
  205. if (!options) {
  206. if (!utils.isString(presetName)) {
  207. options = presetName || {};
  208. presetName = 'default';
  209. }
  210. }
  211. /**
  212. * MarkdownIt#inline -> ParserInline
  213. *
  214. * Instance of [[ParserInline]]. You may need it to add new rules when
  215. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  216. * [[MarkdownIt.enable]].
  217. **/
  218. this.inline = new ParserInline();
  219. /**
  220. * MarkdownIt#block -> ParserBlock
  221. *
  222. * Instance of [[ParserBlock]]. You may need it to add new rules when
  223. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  224. * [[MarkdownIt.enable]].
  225. **/
  226. this.block = new ParserBlock();
  227. /**
  228. * MarkdownIt#core -> Core
  229. *
  230. * Instance of [[Core]] chain executor. You may need it to add new rules when
  231. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  232. * [[MarkdownIt.enable]].
  233. **/
  234. this.core = new ParserCore();
  235. /**
  236. * MarkdownIt#renderer -> Renderer
  237. *
  238. * Instance of [[Renderer]]. Use it to modify output look. Or to add rendering
  239. * rules for new token types, generated by plugins.
  240. *
  241. * ##### Example
  242. *
  243. * ```javascript
  244. * var md = require('markdown-it')();
  245. *
  246. * function myToken(tokens, idx, options, env, self) {
  247. * //...
  248. * return result;
  249. * };
  250. *
  251. * md.renderer.rules['my_token'] = myToken
  252. * ```
  253. *
  254. * See [[Renderer]] docs and [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js).
  255. **/
  256. this.renderer = new Renderer();
  257. /**
  258. * MarkdownIt#linkify -> LinkifyIt
  259. *
  260. * [linkify-it](https://github.com/markdown-it/linkify-it) instance.
  261. * Used by [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js)
  262. * rule.
  263. **/
  264. this.linkify = new LinkifyIt();
  265. /**
  266. * MarkdownIt#validateLink(url) -> Boolean
  267. *
  268. * Link validation function. CommonMark allows too much in links. By default
  269. * we disable `javascript:`, `vbscript:`, `file:` schemas, and almost all `data:...` schemas
  270. * except some embedded image types.
  271. *
  272. * You can change this behaviour:
  273. *
  274. * ```javascript
  275. * var md = require('markdown-it')();
  276. * // enable everything
  277. * md.validateLink = function () { return true; }
  278. * ```
  279. **/
  280. this.validateLink = validateLink;
  281. /**
  282. * MarkdownIt#normalizeLink(url) -> String
  283. *
  284. * Function used to encode link url to a machine-readable format,
  285. * which includes url-encoding, punycode, etc.
  286. **/
  287. this.normalizeLink = normalizeLink;
  288. /**
  289. * MarkdownIt#normalizeLinkText(url) -> String
  290. *
  291. * Function used to decode link url to a human-readable format`
  292. **/
  293. this.normalizeLinkText = normalizeLinkText;
  294. // Expose utils & helpers for easy acces from plugins
  295. /**
  296. * MarkdownIt#utils -> utils
  297. *
  298. * Assorted utility functions, useful to write plugins. See details
  299. * [here](https://github.com/markdown-it/markdown-it/blob/master/lib/common/utils.js).
  300. **/
  301. this.utils = utils;
  302. /**
  303. * MarkdownIt#helpers -> helpers
  304. *
  305. * Link components parser functions, useful to write plugins. See details
  306. * [here](https://github.com/markdown-it/markdown-it/blob/master/lib/helpers).
  307. **/
  308. this.helpers = utils.assign({}, helpers);
  309. this.options = {};
  310. this.configure(presetName);
  311. if (options) { this.set(options); }
  312. }
  313. /** chainable
  314. * MarkdownIt.set(options)
  315. *
  316. * Set parser options (in the same format as in constructor). Probably, you
  317. * will never need it, but you can change options after constructor call.
  318. *
  319. * ##### Example
  320. *
  321. * ```javascript
  322. * var md = require('markdown-it')()
  323. * .set({ html: true, breaks: true })
  324. * .set({ typographer, true });
  325. * ```
  326. *
  327. * __Note:__ To achieve the best possible performance, don't modify a
  328. * `markdown-it` instance options on the fly. If you need multiple configurations
  329. * it's best to create multiple instances and initialize each with separate
  330. * config.
  331. **/
  332. MarkdownIt.prototype.set = function (options) {
  333. utils.assign(this.options, options);
  334. return this;
  335. };
  336. /** chainable, internal
  337. * MarkdownIt.configure(presets)
  338. *
  339. * Batch load of all options and compenent settings. This is internal method,
  340. * and you probably will not need it. But if you will - see available presets
  341. * and data structure [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
  342. *
  343. * We strongly recommend to use presets instead of direct config loads. That
  344. * will give better compatibility with next versions.
  345. **/
  346. MarkdownIt.prototype.configure = function (presets) {
  347. var self = this, presetName;
  348. if (utils.isString(presets)) {
  349. presetName = presets;
  350. presets = config[presetName];
  351. if (!presets) { throw new Error('Wrong `markdown-it` preset "' + presetName + '", check name'); }
  352. }
  353. if (!presets) { throw new Error('Wrong `markdown-it` preset, can\'t be empty'); }
  354. if (presets.options) { self.set(presets.options); }
  355. if (presets.components) {
  356. Object.keys(presets.components).forEach(function (name) {
  357. if (presets.components[name].rules) {
  358. self[name].ruler.enableOnly(presets.components[name].rules);
  359. }
  360. if (presets.components[name].rules2) {
  361. self[name].ruler2.enableOnly(presets.components[name].rules2);
  362. }
  363. });
  364. }
  365. return this;
  366. };
  367. /** chainable
  368. * MarkdownIt.enable(list, ignoreInvalid)
  369. * - list (String|Array): rule name or list of rule names to enable
  370. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
  371. *
  372. * Enable list or rules. It will automatically find appropriate components,
  373. * containing rules with given names. If rule not found, and `ignoreInvalid`
  374. * not set - throws exception.
  375. *
  376. * ##### Example
  377. *
  378. * ```javascript
  379. * var md = require('markdown-it')()
  380. * .enable(['sub', 'sup'])
  381. * .disable('smartquotes');
  382. * ```
  383. **/
  384. MarkdownIt.prototype.enable = function (list, ignoreInvalid) {
  385. var result = [];
  386. if (!Array.isArray(list)) { list = [ list ]; }
  387. [ 'core', 'block', 'inline' ].forEach(function (chain) {
  388. result = result.concat(this[chain].ruler.enable(list, true));
  389. }, this);
  390. result = result.concat(this.inline.ruler2.enable(list, true));
  391. var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
  392. if (missed.length && !ignoreInvalid) {
  393. throw new Error('MarkdownIt. Failed to enable unknown rule(s): ' + missed);
  394. }
  395. return this;
  396. };
  397. /** chainable
  398. * MarkdownIt.disable(list, ignoreInvalid)
  399. * - list (String|Array): rule name or list of rule names to disable.
  400. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
  401. *
  402. * The same as [[MarkdownIt.enable]], but turn specified rules off.
  403. **/
  404. MarkdownIt.prototype.disable = function (list, ignoreInvalid) {
  405. var result = [];
  406. if (!Array.isArray(list)) { list = [ list ]; }
  407. [ 'core', 'block', 'inline' ].forEach(function (chain) {
  408. result = result.concat(this[chain].ruler.disable(list, true));
  409. }, this);
  410. result = result.concat(this.inline.ruler2.disable(list, true));
  411. var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
  412. if (missed.length && !ignoreInvalid) {
  413. throw new Error('MarkdownIt. Failed to disable unknown rule(s): ' + missed);
  414. }
  415. return this;
  416. };
  417. /** chainable
  418. * MarkdownIt.use(plugin, params)
  419. *
  420. * Load specified plugin with given params into current parser instance.
  421. * It's just a sugar to call `plugin(md, params)` with curring.
  422. *
  423. * ##### Example
  424. *
  425. * ```javascript
  426. * var iterator = require('markdown-it-for-inline');
  427. * var md = require('markdown-it')()
  428. * .use(iterator, 'foo_replace', 'text', function (tokens, idx) {
  429. * tokens[idx].content = tokens[idx].content.replace(/foo/g, 'bar');
  430. * });
  431. * ```
  432. **/
  433. MarkdownIt.prototype.use = function (plugin /*, params, ... */) {
  434. var args = [ this ].concat(Array.prototype.slice.call(arguments, 1));
  435. plugin.apply(plugin, args);
  436. return this;
  437. };
  438. /** internal
  439. * MarkdownIt.parse(src, env) -> Array
  440. * - src (String): source string
  441. * - env (Object): environment sandbox
  442. *
  443. * Parse input string and return list of block tokens (special token type
  444. * "inline" will contain list of inline tokens). You should not call this
  445. * method directly, until you write custom renderer (for example, to produce
  446. * AST).
  447. *
  448. * `env` is used to pass data between "distributed" rules and return additional
  449. * metadata like reference info, needed for the renderer. It also can be used to
  450. * inject data in specific cases. Usually, you will be ok to pass `{}`,
  451. * and then pass updated object to renderer.
  452. **/
  453. MarkdownIt.prototype.parse = function (src, env) {
  454. if (typeof src !== 'string') {
  455. throw new Error('Input data should be a String');
  456. }
  457. var state = new this.core.State(src, this, env);
  458. this.core.process(state);
  459. return state.tokens;
  460. };
  461. /**
  462. * MarkdownIt.render(src [, env]) -> String
  463. * - src (String): source string
  464. * - env (Object): environment sandbox
  465. *
  466. * Render markdown string into html. It does all magic for you :).
  467. *
  468. * `env` can be used to inject additional metadata (`{}` by default).
  469. * But you will not need it with high probability. See also comment
  470. * in [[MarkdownIt.parse]].
  471. **/
  472. MarkdownIt.prototype.render = function (src, env) {
  473. env = env || {};
  474. return this.renderer.render(this.parse(src, env), this.options, env);
  475. };
  476. /** internal
  477. * MarkdownIt.parseInline(src, env) -> Array
  478. * - src (String): source string
  479. * - env (Object): environment sandbox
  480. *
  481. * The same as [[MarkdownIt.parse]] but skip all block rules. It returns the
  482. * block tokens list with the single `inline` element, containing parsed inline
  483. * tokens in `children` property. Also updates `env` object.
  484. **/
  485. MarkdownIt.prototype.parseInline = function (src, env) {
  486. var state = new this.core.State(src, this, env);
  487. state.inlineMode = true;
  488. this.core.process(state);
  489. return state.tokens;
  490. };
  491. /**
  492. * MarkdownIt.renderInline(src [, env]) -> String
  493. * - src (String): source string
  494. * - env (Object): environment sandbox
  495. *
  496. * Similar to [[MarkdownIt.render]] but for single paragraph content. Result
  497. * will NOT be wrapped into `<p>` tags.
  498. **/
  499. MarkdownIt.prototype.renderInline = function (src, env) {
  500. env = env || {};
  501. return this.renderer.render(this.parseInline(src, env), this.options, env);
  502. };
  503. module.exports = MarkdownIt;