rules.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. import {
  2. noopTest,
  3. edit,
  4. merge
  5. } from './helpers.js';
  6. /**
  7. * Block-Level Grammar
  8. */
  9. export const block = {
  10. newline: /^(?: *(?:\n|$))+/,
  11. code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
  12. fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
  13. hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
  14. heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
  15. blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
  16. list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
  17. html: '^ {0,3}(?:' // optional indentation
  18. + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
  19. + '|comment[^\\n]*(\\n+|$)' // (2)
  20. + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
  21. + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
  22. + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
  23. + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
  24. + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
  25. + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
  26. + ')',
  27. def: /^ {0,3}\[(label)\]: *(?:\n *)?<?([^\s>]+)>?(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
  28. table: noopTest,
  29. lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
  30. // regex template, placeholders will be replaced according to different paragraph
  31. // interruption rules of commonmark and the original markdown spec:
  32. _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
  33. text: /^[^\n]+/
  34. };
  35. block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
  36. block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
  37. block.def = edit(block.def)
  38. .replace('label', block._label)
  39. .replace('title', block._title)
  40. .getRegex();
  41. block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
  42. block.listItemStart = edit(/^( *)(bull) */)
  43. .replace('bull', block.bullet)
  44. .getRegex();
  45. block.list = edit(block.list)
  46. .replace(/bull/g, block.bullet)
  47. .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
  48. .replace('def', '\\n+(?=' + block.def.source + ')')
  49. .getRegex();
  50. block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
  51. + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
  52. + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
  53. + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
  54. + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
  55. + '|track|ul';
  56. block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
  57. block.html = edit(block.html, 'i')
  58. .replace('comment', block._comment)
  59. .replace('tag', block._tag)
  60. .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
  61. .getRegex();
  62. block.paragraph = edit(block._paragraph)
  63. .replace('hr', block.hr)
  64. .replace('heading', ' {0,3}#{1,6} ')
  65. .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
  66. .replace('|table', '')
  67. .replace('blockquote', ' {0,3}>')
  68. .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
  69. .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
  70. .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
  71. .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
  72. .getRegex();
  73. block.blockquote = edit(block.blockquote)
  74. .replace('paragraph', block.paragraph)
  75. .getRegex();
  76. /**
  77. * Normal Block Grammar
  78. */
  79. block.normal = merge({}, block);
  80. /**
  81. * GFM Block Grammar
  82. */
  83. block.gfm = merge({}, block.normal, {
  84. table: '^ *([^\\n ].*\\|.*)\\n' // Header
  85. + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align
  86. + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
  87. });
  88. block.gfm.table = edit(block.gfm.table)
  89. .replace('hr', block.hr)
  90. .replace('heading', ' {0,3}#{1,6} ')
  91. .replace('blockquote', ' {0,3}>')
  92. .replace('code', ' {4}[^\\n]')
  93. .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
  94. .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
  95. .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
  96. .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
  97. .getRegex();
  98. block.gfm.paragraph = edit(block._paragraph)
  99. .replace('hr', block.hr)
  100. .replace('heading', ' {0,3}#{1,6} ')
  101. .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
  102. .replace('table', block.gfm.table) // interrupt paragraphs with table
  103. .replace('blockquote', ' {0,3}>')
  104. .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
  105. .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
  106. .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
  107. .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
  108. .getRegex();
  109. /**
  110. * Pedantic grammar (original John Gruber's loose markdown specification)
  111. */
  112. block.pedantic = merge({}, block.normal, {
  113. html: edit(
  114. '^ *(?:comment *(?:\\n|\\s*$)'
  115. + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
  116. + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
  117. .replace('comment', block._comment)
  118. .replace(/tag/g, '(?!(?:'
  119. + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
  120. + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
  121. + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
  122. .getRegex(),
  123. def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
  124. heading: /^(#{1,6})(.*)(?:\n+|$)/,
  125. fences: noopTest, // fences not supported
  126. paragraph: edit(block.normal._paragraph)
  127. .replace('hr', block.hr)
  128. .replace('heading', ' *#{1,6} *[^\n]')
  129. .replace('lheading', block.lheading)
  130. .replace('blockquote', ' {0,3}>')
  131. .replace('|fences', '')
  132. .replace('|list', '')
  133. .replace('|html', '')
  134. .getRegex()
  135. });
  136. /**
  137. * Inline-Level Grammar
  138. */
  139. export const inline = {
  140. escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
  141. autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
  142. url: noopTest,
  143. tag: '^comment'
  144. + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
  145. + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
  146. + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
  147. + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
  148. + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
  149. link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
  150. reflink: /^!?\[(label)\]\[(ref)\]/,
  151. nolink: /^!?\[(ref)\](?:\[\])?/,
  152. reflinkSearch: 'reflink|nolink(?!\\()',
  153. emStrong: {
  154. lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
  155. // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
  156. // () Skip orphan delim inside strong (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
  157. rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
  158. rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
  159. },
  160. code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
  161. br: /^( {2,}|\\)\n(?!\s*$)/,
  162. del: noopTest,
  163. text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
  164. punctuation: /^([\spunctuation])/
  165. };
  166. // list of punctuation marks from CommonMark spec
  167. // without * and _ to handle the different emphasis markers * and _
  168. inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
  169. inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
  170. // sequences em should skip over [title](link), `code`, <html>
  171. inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
  172. inline.escapedEmSt = /\\\*|\\_/g;
  173. inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
  174. inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
  175. .replace(/punct/g, inline._punctuation)
  176. .getRegex();
  177. inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
  178. .replace(/punct/g, inline._punctuation)
  179. .getRegex();
  180. inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
  181. .replace(/punct/g, inline._punctuation)
  182. .getRegex();
  183. inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
  184. inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
  185. inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
  186. inline.autolink = edit(inline.autolink)
  187. .replace('scheme', inline._scheme)
  188. .replace('email', inline._email)
  189. .getRegex();
  190. inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
  191. inline.tag = edit(inline.tag)
  192. .replace('comment', inline._comment)
  193. .replace('attribute', inline._attribute)
  194. .getRegex();
  195. inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
  196. inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
  197. inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
  198. inline.link = edit(inline.link)
  199. .replace('label', inline._label)
  200. .replace('href', inline._href)
  201. .replace('title', inline._title)
  202. .getRegex();
  203. inline.reflink = edit(inline.reflink)
  204. .replace('label', inline._label)
  205. .replace('ref', block._label)
  206. .getRegex();
  207. inline.nolink = edit(inline.nolink)
  208. .replace('ref', block._label)
  209. .getRegex();
  210. inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
  211. .replace('reflink', inline.reflink)
  212. .replace('nolink', inline.nolink)
  213. .getRegex();
  214. /**
  215. * Normal Inline Grammar
  216. */
  217. inline.normal = merge({}, inline);
  218. /**
  219. * Pedantic Inline Grammar
  220. */
  221. inline.pedantic = merge({}, inline.normal, {
  222. strong: {
  223. start: /^__|\*\*/,
  224. middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
  225. endAst: /\*\*(?!\*)/g,
  226. endUnd: /__(?!_)/g
  227. },
  228. em: {
  229. start: /^_|\*/,
  230. middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
  231. endAst: /\*(?!\*)/g,
  232. endUnd: /_(?!_)/g
  233. },
  234. link: edit(/^!?\[(label)\]\((.*?)\)/)
  235. .replace('label', inline._label)
  236. .getRegex(),
  237. reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
  238. .replace('label', inline._label)
  239. .getRegex()
  240. });
  241. /**
  242. * GFM Inline Grammar
  243. */
  244. inline.gfm = merge({}, inline.normal, {
  245. escape: edit(inline.escape).replace('])', '~|])').getRegex(),
  246. _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
  247. url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
  248. _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
  249. del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
  250. text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
  251. });
  252. inline.gfm.url = edit(inline.gfm.url, 'i')
  253. .replace('email', inline.gfm._extended_email)
  254. .getRegex();
  255. /**
  256. * GFM + Line Breaks Inline Grammar
  257. */
  258. inline.breaks = merge({}, inline.gfm, {
  259. br: edit(inline.br).replace('{2,}', '*').getRegex(),
  260. text: edit(inline.gfm.text)
  261. .replace('\\b_', '\\b_| {2,}\\n')
  262. .replace(/\{2,\}/g, '*')
  263. .getRegex()
  264. });