123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768 |
- import { defaults } from './defaults.js';
- import {
- rtrim,
- splitCells,
- escape,
- findClosingBracket
- } from './helpers.js';
- function outputLink(cap, link, raw, lexer) {
- const href = link.href;
- const title = link.title ? escape(link.title) : null;
- const text = cap[1].replace(/\\([\[\]])/g, '$1');
- if (cap[0].charAt(0) !== '!') {
- lexer.state.inLink = true;
- const token = {
- type: 'link',
- raw,
- href,
- title,
- text,
- tokens: lexer.inlineTokens(text, [])
- };
- lexer.state.inLink = false;
- return token;
- } else {
- return {
- type: 'image',
- raw,
- href,
- title,
- text: escape(text)
- };
- }
- }
- function indentCodeCompensation(raw, text) {
- const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
- if (matchIndentToCode === null) {
- return text;
- }
- const indentToCode = matchIndentToCode[1];
- return text
- .split('\n')
- .map(node => {
- const matchIndentInNode = node.match(/^\s+/);
- if (matchIndentInNode === null) {
- return node;
- }
- const [indentInNode] = matchIndentInNode;
- if (indentInNode.length >= indentToCode.length) {
- return node.slice(indentToCode.length);
- }
- return node;
- })
- .join('\n');
- }
- /**
- * Tokenizer
- */
- export class Tokenizer {
- constructor(options) {
- this.options = options || defaults;
- }
- space(src) {
- const cap = this.rules.block.newline.exec(src);
- if (cap && cap[0].length > 0) {
- return {
- type: 'space',
- raw: cap[0]
- };
- }
- }
- code(src) {
- const cap = this.rules.block.code.exec(src);
- if (cap) {
- const text = cap[0].replace(/^ {1,4}/gm, '');
- return {
- type: 'code',
- raw: cap[0],
- codeBlockStyle: 'indented',
- text: !this.options.pedantic
- ? rtrim(text, '\n')
- : text
- };
- }
- }
- fences(src) {
- const cap = this.rules.block.fences.exec(src);
- if (cap) {
- const raw = cap[0];
- const text = indentCodeCompensation(raw, cap[3] || '');
- return {
- type: 'code',
- raw,
- lang: cap[2] ? cap[2].trim() : cap[2],
- text
- };
- }
- }
- heading(src) {
- const cap = this.rules.block.heading.exec(src);
- if (cap) {
- let text = cap[2].trim();
- // remove trailing #s
- if (/#$/.test(text)) {
- const trimmed = rtrim(text, '#');
- if (this.options.pedantic) {
- text = trimmed.trim();
- } else if (!trimmed || / $/.test(trimmed)) {
- // CommonMark requires space before trailing #s
- text = trimmed.trim();
- }
- }
- const token = {
- type: 'heading',
- raw: cap[0],
- depth: cap[1].length,
- text: text,
- tokens: []
- };
- this.lexer.inline(token.text, token.tokens);
- return token;
- }
- }
- hr(src) {
- const cap = this.rules.block.hr.exec(src);
- if (cap) {
- return {
- type: 'hr',
- raw: cap[0]
- };
- }
- }
- blockquote(src) {
- const cap = this.rules.block.blockquote.exec(src);
- if (cap) {
- const text = cap[0].replace(/^ *> ?/gm, '');
- return {
- type: 'blockquote',
- raw: cap[0],
- tokens: this.lexer.blockTokens(text, []),
- text
- };
- }
- }
- list(src) {
- let cap = this.rules.block.list.exec(src);
- if (cap) {
- let raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine,
- line, nextLine, rawLine, itemContents, endEarly;
- let bull = cap[1].trim();
- const isordered = bull.length > 1;
- const list = {
- type: 'list',
- raw: '',
- ordered: isordered,
- start: isordered ? +bull.slice(0, -1) : '',
- loose: false,
- items: []
- };
- bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`;
- if (this.options.pedantic) {
- bull = isordered ? bull : '[*+-]';
- }
- // Get next list item
- const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`);
- // Check if current bullet point can start a new List Item
- while (src) {
- endEarly = false;
- if (!(cap = itemRegex.exec(src))) {
- break;
- }
- if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?)
- break;
- }
- raw = cap[0];
- src = src.substring(raw.length);
- line = cap[2].split('\n', 1)[0];
- nextLine = src.split('\n', 1)[0];
- if (this.options.pedantic) {
- indent = 2;
- itemContents = line.trimLeft();
- } else {
- indent = cap[2].search(/[^ ]/); // Find first non-space char
- indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
- itemContents = line.slice(indent);
- indent += cap[1].length;
- }
- blankLine = false;
- if (!line && /^ *$/.test(nextLine)) { // Items begin with at most one blank line
- raw += nextLine + '\n';
- src = src.substring(nextLine.length + 1);
- endEarly = true;
- }
- if (!endEarly) {
- const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])`);
- // Check if following lines should be included in List Item
- while (src) {
- rawLine = src.split('\n', 1)[0];
- line = rawLine;
- // Re-align to follow commonmark nesting rules
- if (this.options.pedantic) {
- line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
- }
- // End list item if found start of new bullet
- if (nextBulletRegex.test(line)) {
- break;
- }
- if (line.search(/[^ ]/) >= indent || !line.trim()) { // Dedent if possible
- itemContents += '\n' + line.slice(indent);
- } else if (!blankLine) { // Until blank line, item doesn't need indentation
- itemContents += '\n' + line;
- } else { // Otherwise, improper indentation ends this item
- break;
- }
- if (!blankLine && !line.trim()) { // Check if current line is blank
- blankLine = true;
- }
- raw += rawLine + '\n';
- src = src.substring(rawLine.length + 1);
- }
- }
- if (!list.loose) {
- // If the previous item ended with a blank line, the list is loose
- if (endsWithBlankLine) {
- list.loose = true;
- } else if (/\n *\n *$/.test(raw)) {
- endsWithBlankLine = true;
- }
- }
- // Check for task list items
- if (this.options.gfm) {
- istask = /^\[[ xX]\] /.exec(itemContents);
- if (istask) {
- ischecked = istask[0] !== '[ ] ';
- itemContents = itemContents.replace(/^\[[ xX]\] +/, '');
- }
- }
- list.items.push({
- type: 'list_item',
- raw: raw,
- task: !!istask,
- checked: ischecked,
- loose: false,
- text: itemContents
- });
- list.raw += raw;
- }
- // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
- list.items[list.items.length - 1].raw = raw.trimRight();
- list.items[list.items.length - 1].text = itemContents.trimRight();
- list.raw = list.raw.trimRight();
- const l = list.items.length;
- // Item child tokens handled here at end because we needed to have the final item to trim it first
- for (i = 0; i < l; i++) {
- this.lexer.state.top = false;
- list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
- const spacers = list.items[i].tokens.filter(t => t.type === 'space');
- const hasMultipleLineBreaks = spacers.every(t => {
- const chars = t.raw.split('');
- let lineBreaks = 0;
- for (const char of chars) {
- if (char === '\n') {
- lineBreaks += 1;
- }
- if (lineBreaks > 1) {
- return true;
- }
- }
- return false;
- });
- if (!list.loose && spacers.length && hasMultipleLineBreaks) {
- // Having a single line break doesn't mean a list is loose. A single line break is terminating the last list item
- list.loose = true;
- list.items[i].loose = true;
- }
- }
- return list;
- }
- }
- html(src) {
- const cap = this.rules.block.html.exec(src);
- if (cap) {
- const token = {
- type: 'html',
- raw: cap[0],
- pre: !this.options.sanitizer
- && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
- text: cap[0]
- };
- if (this.options.sanitize) {
- token.type = 'paragraph';
- token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]);
- token.tokens = [];
- this.lexer.inline(token.text, token.tokens);
- }
- return token;
- }
- }
- def(src) {
- const cap = this.rules.block.def.exec(src);
- if (cap) {
- if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
- const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
- return {
- type: 'def',
- tag,
- raw: cap[0],
- href: cap[2],
- title: cap[3]
- };
- }
- }
- table(src) {
- const cap = this.rules.block.table.exec(src);
- if (cap) {
- const item = {
- type: 'table',
- header: splitCells(cap[1]).map(c => { return { text: c }; }),
- align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
- rows: cap[3] ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : []
- };
- if (item.header.length === item.align.length) {
- item.raw = cap[0];
- let l = item.align.length;
- let i, j, k, row;
- for (i = 0; i < l; i++) {
- if (/^ *-+: *$/.test(item.align[i])) {
- item.align[i] = 'right';
- } else if (/^ *:-+: *$/.test(item.align[i])) {
- item.align[i] = 'center';
- } else if (/^ *:-+ *$/.test(item.align[i])) {
- item.align[i] = 'left';
- } else {
- item.align[i] = null;
- }
- }
- l = item.rows.length;
- for (i = 0; i < l; i++) {
- item.rows[i] = splitCells(item.rows[i], item.header.length).map(c => { return { text: c }; });
- }
- // parse child tokens inside headers and cells
- // header child tokens
- l = item.header.length;
- for (j = 0; j < l; j++) {
- item.header[j].tokens = [];
- this.lexer.inlineTokens(item.header[j].text, item.header[j].tokens);
- }
- // cell child tokens
- l = item.rows.length;
- for (j = 0; j < l; j++) {
- row = item.rows[j];
- for (k = 0; k < row.length; k++) {
- row[k].tokens = [];
- this.lexer.inlineTokens(row[k].text, row[k].tokens);
- }
- }
- return item;
- }
- }
- }
- lheading(src) {
- const cap = this.rules.block.lheading.exec(src);
- if (cap) {
- const token = {
- type: 'heading',
- raw: cap[0],
- depth: cap[2].charAt(0) === '=' ? 1 : 2,
- text: cap[1],
- tokens: []
- };
- this.lexer.inline(token.text, token.tokens);
- return token;
- }
- }
- paragraph(src) {
- const cap = this.rules.block.paragraph.exec(src);
- if (cap) {
- const token = {
- type: 'paragraph',
- raw: cap[0],
- text: cap[1].charAt(cap[1].length - 1) === '\n'
- ? cap[1].slice(0, -1)
- : cap[1],
- tokens: []
- };
- this.lexer.inline(token.text, token.tokens);
- return token;
- }
- }
- text(src) {
- const cap = this.rules.block.text.exec(src);
- if (cap) {
- const token = {
- type: 'text',
- raw: cap[0],
- text: cap[0],
- tokens: []
- };
- this.lexer.inline(token.text, token.tokens);
- return token;
- }
- }
- escape(src) {
- const cap = this.rules.inline.escape.exec(src);
- if (cap) {
- return {
- type: 'escape',
- raw: cap[0],
- text: escape(cap[1])
- };
- }
- }
- tag(src) {
- const cap = this.rules.inline.tag.exec(src);
- if (cap) {
- if (!this.lexer.state.inLink && /^<a /i.test(cap[0])) {
- this.lexer.state.inLink = true;
- } else if (this.lexer.state.inLink && /^<\/a>/i.test(cap[0])) {
- this.lexer.state.inLink = false;
- }
- if (!this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
- this.lexer.state.inRawBlock = true;
- } else if (this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
- this.lexer.state.inRawBlock = false;
- }
- return {
- type: this.options.sanitize
- ? 'text'
- : 'html',
- raw: cap[0],
- inLink: this.lexer.state.inLink,
- inRawBlock: this.lexer.state.inRawBlock,
- text: this.options.sanitize
- ? (this.options.sanitizer
- ? this.options.sanitizer(cap[0])
- : escape(cap[0]))
- : cap[0]
- };
- }
- }
- link(src) {
- const cap = this.rules.inline.link.exec(src);
- if (cap) {
- const trimmedUrl = cap[2].trim();
- if (!this.options.pedantic && /^</.test(trimmedUrl)) {
- // commonmark requires matching angle brackets
- if (!(/>$/.test(trimmedUrl))) {
- return;
- }
- // ending angle bracket cannot be escaped
- const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
- if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
- return;
- }
- } else {
- // find closing parenthesis
- const lastParenIndex = findClosingBracket(cap[2], '()');
- if (lastParenIndex > -1) {
- const start = cap[0].indexOf('!') === 0 ? 5 : 4;
- const linkLen = start + cap[1].length + lastParenIndex;
- cap[2] = cap[2].substring(0, lastParenIndex);
- cap[0] = cap[0].substring(0, linkLen).trim();
- cap[3] = '';
- }
- }
- let href = cap[2];
- let title = '';
- if (this.options.pedantic) {
- // split pedantic href and title
- const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
- if (link) {
- href = link[1];
- title = link[3];
- }
- } else {
- title = cap[3] ? cap[3].slice(1, -1) : '';
- }
- href = href.trim();
- if (/^</.test(href)) {
- if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
- // pedantic allows starting angle bracket without ending angle bracket
- href = href.slice(1);
- } else {
- href = href.slice(1, -1);
- }
- }
- return outputLink(cap, {
- href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
- title: title ? title.replace(this.rules.inline._escapes, '$1') : title
- }, cap[0], this.lexer);
- }
- }
- reflink(src, links) {
- let cap;
- if ((cap = this.rules.inline.reflink.exec(src))
- || (cap = this.rules.inline.nolink.exec(src))) {
- let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
- link = links[link.toLowerCase()];
- if (!link || !link.href) {
- const text = cap[0].charAt(0);
- return {
- type: 'text',
- raw: text,
- text
- };
- }
- return outputLink(cap, link, cap[0], this.lexer);
- }
- }
- emStrong(src, maskedSrc, prevChar = '') {
- let match = this.rules.inline.emStrong.lDelim.exec(src);
- if (!match) return;
- // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
- if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return;
- const nextChar = match[1] || match[2] || '';
- if (!nextChar || (nextChar && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
- const lLength = match[0].length - 1;
- let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
- const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
- endReg.lastIndex = 0;
- // Clip maskedSrc to same section of string as src (move to lexer?)
- maskedSrc = maskedSrc.slice(-1 * src.length + lLength);
- while ((match = endReg.exec(maskedSrc)) != null) {
- rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6];
- if (!rDelim) continue; // skip single * in __abc*abc__
- rLength = rDelim.length;
- if (match[3] || match[4]) { // found another Left Delim
- delimTotal += rLength;
- continue;
- } else if (match[5] || match[6]) { // either Left or Right Delim
- if (lLength % 3 && !((lLength + rLength) % 3)) {
- midDelimTotal += rLength;
- continue; // CommonMark Emphasis Rules 9-10
- }
- }
- delimTotal -= rLength;
- if (delimTotal > 0) continue; // Haven't found enough closing delimiters
- // Remove extra characters. *a*** -> *a*
- rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
- // Create `em` if smallest delimiter has odd char count. *a***
- if (Math.min(lLength, rLength) % 2) {
- const text = src.slice(1, lLength + match.index + rLength);
- return {
- type: 'em',
- raw: src.slice(0, lLength + match.index + rLength + 1),
- text,
- tokens: this.lexer.inlineTokens(text, [])
- };
- }
- // Create 'strong' if smallest delimiter has even char count. **a***
- const text = src.slice(2, lLength + match.index + rLength - 1);
- return {
- type: 'strong',
- raw: src.slice(0, lLength + match.index + rLength + 1),
- text,
- tokens: this.lexer.inlineTokens(text, [])
- };
- }
- }
- }
- codespan(src) {
- const cap = this.rules.inline.code.exec(src);
- if (cap) {
- let text = cap[2].replace(/\n/g, ' ');
- const hasNonSpaceChars = /[^ ]/.test(text);
- const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
- if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
- text = text.substring(1, text.length - 1);
- }
- text = escape(text, true);
- return {
- type: 'codespan',
- raw: cap[0],
- text
- };
- }
- }
- br(src) {
- const cap = this.rules.inline.br.exec(src);
- if (cap) {
- return {
- type: 'br',
- raw: cap[0]
- };
- }
- }
- del(src) {
- const cap = this.rules.inline.del.exec(src);
- if (cap) {
- return {
- type: 'del',
- raw: cap[0],
- text: cap[2],
- tokens: this.lexer.inlineTokens(cap[2], [])
- };
- }
- }
- autolink(src, mangle) {
- const cap = this.rules.inline.autolink.exec(src);
- if (cap) {
- let text, href;
- if (cap[2] === '@') {
- text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
- href = 'mailto:' + text;
- } else {
- text = escape(cap[1]);
- href = text;
- }
- return {
- type: 'link',
- raw: cap[0],
- text,
- href,
- tokens: [
- {
- type: 'text',
- raw: text,
- text
- }
- ]
- };
- }
- }
- url(src, mangle) {
- let cap;
- if (cap = this.rules.inline.url.exec(src)) {
- let text, href;
- if (cap[2] === '@') {
- text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
- href = 'mailto:' + text;
- } else {
- // do extended autolink path validation
- let prevCapZero;
- do {
- prevCapZero = cap[0];
- cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
- } while (prevCapZero !== cap[0]);
- text = escape(cap[0]);
- if (cap[1] === 'www.') {
- href = 'http://' + text;
- } else {
- href = text;
- }
- }
- return {
- type: 'link',
- raw: cap[0],
- text,
- href,
- tokens: [
- {
- type: 'text',
- raw: text,
- text
- }
- ]
- };
- }
- }
- inlineText(src, smartypants) {
- const cap = this.rules.inline.text.exec(src);
- if (cap) {
- let text;
- if (this.lexer.state.inRawBlock) {
- text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
- } else {
- text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
- }
- return {
- type: 'text',
- raw: cap[0],
- text
- };
- }
- }
- }
|