123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302 |
- import {
- noopTest,
- edit,
- merge
- } from './helpers.js';
- /**
- * Block-Level Grammar
- */
- export const block = {
- newline: /^(?: *(?:\n|$))+/,
- code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
- fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
- hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
- heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
- blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
- list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
- html: '^ {0,3}(?:' // optional indentation
- + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
- + '|comment[^\\n]*(\\n+|$)' // (2)
- + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
- + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
- + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
- + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
- + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
- + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
- + ')',
- def: /^ {0,3}\[(label)\]: *(?:\n *)?<?([^\s>]+)>?(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
- table: noopTest,
- lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
- // regex template, placeholders will be replaced according to different paragraph
- // interruption rules of commonmark and the original markdown spec:
- _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
- text: /^[^\n]+/
- };
- block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
- block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
- block.def = edit(block.def)
- .replace('label', block._label)
- .replace('title', block._title)
- .getRegex();
- block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
- block.listItemStart = edit(/^( *)(bull) */)
- .replace('bull', block.bullet)
- .getRegex();
- block.list = edit(block.list)
- .replace(/bull/g, block.bullet)
- .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
- .replace('def', '\\n+(?=' + block.def.source + ')')
- .getRegex();
- block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
- + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
- + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
- + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
- + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
- + '|track|ul';
- block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
- block.html = edit(block.html, 'i')
- .replace('comment', block._comment)
- .replace('tag', block._tag)
- .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
- .getRegex();
- block.paragraph = edit(block._paragraph)
- .replace('hr', block.hr)
- .replace('heading', ' {0,3}#{1,6} ')
- .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
- .replace('|table', '')
- .replace('blockquote', ' {0,3}>')
- .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
- .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
- .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
- .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
- .getRegex();
- block.blockquote = edit(block.blockquote)
- .replace('paragraph', block.paragraph)
- .getRegex();
- /**
- * Normal Block Grammar
- */
- block.normal = merge({}, block);
- /**
- * GFM Block Grammar
- */
- block.gfm = merge({}, block.normal, {
- table: '^ *([^\\n ].*\\|.*)\\n' // Header
- + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align
- + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
- });
- block.gfm.table = edit(block.gfm.table)
- .replace('hr', block.hr)
- .replace('heading', ' {0,3}#{1,6} ')
- .replace('blockquote', ' {0,3}>')
- .replace('code', ' {4}[^\\n]')
- .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
- .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
- .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
- .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
- .getRegex();
- block.gfm.paragraph = edit(block._paragraph)
- .replace('hr', block.hr)
- .replace('heading', ' {0,3}#{1,6} ')
- .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
- .replace('table', block.gfm.table) // interrupt paragraphs with table
- .replace('blockquote', ' {0,3}>')
- .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
- .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
- .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
- .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
- .getRegex();
- /**
- * Pedantic grammar (original John Gruber's loose markdown specification)
- */
- block.pedantic = merge({}, block.normal, {
- html: edit(
- '^ *(?:comment *(?:\\n|\\s*$)'
- + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
- + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
- .replace('comment', block._comment)
- .replace(/tag/g, '(?!(?:'
- + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
- + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
- + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
- .getRegex(),
- def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
- heading: /^(#{1,6})(.*)(?:\n+|$)/,
- fences: noopTest, // fences not supported
- paragraph: edit(block.normal._paragraph)
- .replace('hr', block.hr)
- .replace('heading', ' *#{1,6} *[^\n]')
- .replace('lheading', block.lheading)
- .replace('blockquote', ' {0,3}>')
- .replace('|fences', '')
- .replace('|list', '')
- .replace('|html', '')
- .getRegex()
- });
- /**
- * Inline-Level Grammar
- */
- export const inline = {
- escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
- autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
- url: noopTest,
- tag: '^comment'
- + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
- + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
- + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
- + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
- + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
- link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
- reflink: /^!?\[(label)\]\[(ref)\]/,
- nolink: /^!?\[(ref)\](?:\[\])?/,
- reflinkSearch: 'reflink|nolink(?!\\()',
- emStrong: {
- lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
- // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
- // () Skip orphan delim inside strong (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
- rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
- rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
- },
- code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
- br: /^( {2,}|\\)\n(?!\s*$)/,
- del: noopTest,
- text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
- punctuation: /^([\spunctuation])/
- };
- // list of punctuation marks from CommonMark spec
- // without * and _ to handle the different emphasis markers * and _
- inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
- inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
- // sequences em should skip over [title](link), `code`, <html>
- inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
- inline.escapedEmSt = /\\\*|\\_/g;
- inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
- inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
- .replace(/punct/g, inline._punctuation)
- .getRegex();
- inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
- .replace(/punct/g, inline._punctuation)
- .getRegex();
- inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
- .replace(/punct/g, inline._punctuation)
- .getRegex();
- inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
- inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
- inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
- inline.autolink = edit(inline.autolink)
- .replace('scheme', inline._scheme)
- .replace('email', inline._email)
- .getRegex();
- inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
- inline.tag = edit(inline.tag)
- .replace('comment', inline._comment)
- .replace('attribute', inline._attribute)
- .getRegex();
- inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
- inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
- inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
- inline.link = edit(inline.link)
- .replace('label', inline._label)
- .replace('href', inline._href)
- .replace('title', inline._title)
- .getRegex();
- inline.reflink = edit(inline.reflink)
- .replace('label', inline._label)
- .replace('ref', block._label)
- .getRegex();
- inline.nolink = edit(inline.nolink)
- .replace('ref', block._label)
- .getRegex();
- inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
- .replace('reflink', inline.reflink)
- .replace('nolink', inline.nolink)
- .getRegex();
- /**
- * Normal Inline Grammar
- */
- inline.normal = merge({}, inline);
- /**
- * Pedantic Inline Grammar
- */
- inline.pedantic = merge({}, inline.normal, {
- strong: {
- start: /^__|\*\*/,
- middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
- endAst: /\*\*(?!\*)/g,
- endUnd: /__(?!_)/g
- },
- em: {
- start: /^_|\*/,
- middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
- endAst: /\*(?!\*)/g,
- endUnd: /_(?!_)/g
- },
- link: edit(/^!?\[(label)\]\((.*?)\)/)
- .replace('label', inline._label)
- .getRegex(),
- reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
- .replace('label', inline._label)
- .getRegex()
- });
- /**
- * GFM Inline Grammar
- */
- inline.gfm = merge({}, inline.normal, {
- escape: edit(inline.escape).replace('])', '~|])').getRegex(),
- _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
- url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
- _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
- del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
- text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
- });
- inline.gfm.url = edit(inline.gfm.url, 'i')
- .replace('email', inline.gfm._extended_email)
- .getRegex();
- /**
- * GFM + Line Breaks Inline Grammar
- */
- inline.breaks = merge({}, inline.gfm, {
- br: edit(inline.br).replace('{2,}', '*').getRegex(),
- text: edit(inline.gfm.text)
- .replace('\\b_', '\\b_| {2,}\\n')
- .replace(/\{2,\}/g, '*')
- .getRegex()
- });
|