// FIXME: Nested blockquotes require blank line // TODO: HTML tags probably need better handling. Consider whether interior of matched tags should be interpreted as markdown. // TODO: Test broken/incomplete syntax thoroughly // TODO: Sanity checks on loops/recursion? // TODO: Tolerate whitespace between tokens (e.g. [click here] [urlref]) // TODO: Spreadsheet functions in tables // TODO: Support document differentiators for CSS identifiers class MDTokenType { static Text = new MDTokenType('Text'); static Whitespace = new MDTokenType('Whitespace'); static Underscore = new MDTokenType('Underscore'); static Asterisk = new MDTokenType('Asterisk'); static Slash = new MDTokenType('Slash'); static Tilde = new MDTokenType('Tilde'); static Bang = new MDTokenType('Bang'); static Backtick = new MDTokenType('Backtick'); static Label = new MDTokenType('Label'); // content=label static URL = new MDTokenType('URL'); // content=URL, extra=title static Email = new MDTokenType('Email'); // content=email address, extra=title static SimpleLink = new MDTokenType('SimpleLink'); // content=URL static SimpleEmail = new MDTokenType('SimpleEmail'); // content=email address static Footnote = new MDTokenType('Footnote'); // content=symbol static Modifier = new MDTokenType('Modifier'); // content static HTMLTag = new MDTokenType('HTMLTag'); // content=tag string, tag=MDHTMLTag static META_AnyNonWhitespace = new MDTokenType('METAAnyNonWhitespace'); static META_OptionalWhitespace = new MDTokenType('METAOptionalWhitespace'); /** @type {string} */ name; /** * @param {string} name */ constructor(name) { this.name = name; } toString() { return `${this.constructor.name}.${this.name}`; } } class MDToken { /** * The original token string. * @type {string} */ original; /** @type {MDTokenType} */ type; /** @type {string|null} */ content; /** @type {string|null} */ extra; /** @type {MDHTMLTag|null} */ tag; /** @type {MDTagModifier|null} */ modifier; /** * @param {string} original * @param {MDTokenType} type * @param {string|MDTagModifier|null} content * @param {string|null} extra * @param {MDHTMLTag|null} tag */ constructor(original, type, content=null, extra=null, tag=null) { this.original = original; this.type = type; if (content instanceof MDTagModifier) { this.content = null; this.modifier = content; } else { this.content = content; this.modifier = null; } this.extra = extra; this.tag = tag; } /** * Searches an array of MDToken for the given pattern of MDTokenTypes. * If found, returns an object with the given keys. * - `tokens: MDToken[]` - the subarray of `tokensToSearch` that match the pattern * - `index: number` - index into `tokensToSearch` of first matching token * * @param {MDToken[]|MDSpan[]} tokensToSearch * @param {MDTokenType[]} pattern * @param {number} startIndex * @returns {object|null} match */ static findFirstTokens(tokensToSearch, pattern, startIndex=0) { var matched = []; for (var t = startIndex; t < tokensToSearch.length; t++) { var matchedAll = true; matched = []; var patternOffset = 0; for (var p = 0; p < pattern.length; p++) { var t0 = t + p + patternOffset; if (t0 >= tokensToSearch.length) return null; let token = tokensToSearch[t0]; let elem = pattern[p]; if (elem == MDTokenType.META_OptionalWhitespace) { if (token instanceof MDToken && token.type == MDTokenType.Whitespace) { matched.push(token); } else { patternOffset--; } } else if (elem == MDTokenType.META_AnyNonWhitespace) { if (token instanceof MDToken && token.type == MDTokenType.Whitespace) { matchedAll = false; break; } matched.push(token); } else { if (!(token instanceof MDToken) || token.type != elem) { matchedAll = false; break; } matched.push(token); } } if (matchedAll) { return { 'tokens': matched, 'index': t, }; } } return null; } /** * Searches an array of MDToken for a given starting pattern and ending * pattern and returns match info about both and the tokens in between. * * If `contentValidator` is specified, it will be called with the content * tokens of a potential match. If the validator returns `true`, the result * will be accepted and returned by this method. If the validator returns * `false`, this method will keep looking for another matching pair. If no * validator is given the first match will be returned regardless of content. * * If a match is found, returns an object with the given keys: * - `startTokens: MDToken[]` - tokens that matched `startPattern` * - `contentTokens: MDToken[]` - tokens between the start and end pattern. May be an empty array. * - `endTokens: MDToken[]` - tokens that matched `endPattern` * - `startIndex: number` - index into `tokensToSearch` where `startPattern` begins * - `contentIndex: number` - index into `tokensToSearch` of the first token that is between the start and end patterns * - `endIndex: number` - index into `tokensToSearch` where `endPattern` begins * - `totalLength: number` - total number of matched tokens * * @param {MDToken[]} tokensToSearch - array of `MDToken` to search in * @param {MDTokenType[]} startPattern - array of `MDTokenType` to find first * @param {MDTokenType[]} endPattern - array of `MDTokenType` to find positioned after `startPattern` * @param {function|null} contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching * @param {number} startIndex - token index where searching should begin * @returns {object|null} match object */ static findPairedTokens(tokensToSearch, startPattern, endPattern, contentValidator=null, startIndex=0) { for (var s = startIndex; s < tokensToSearch.length; s++) { var startMatch = this.findFirstTokens(tokensToSearch, startPattern, s); if (startMatch === null) return null; var endStart = startMatch.index + startMatch.tokens.length; while (endStart < tokensToSearch.length) { var endMatch = this.findFirstTokens(tokensToSearch, endPattern, endStart); if (endMatch === null) break; var contents = tokensToSearch.slice(startMatch.index + startMatch.tokens.length, endMatch.index); if (contents.length > 0 && (contentValidator === null || contentValidator(contents))) { return { 'startTokens': startMatch.tokens, 'contentTokens': contents, 'endTokens': endMatch.tokens, 'startIndex': startMatch.index, 'contentIndex': startMatch.index + startMatch.tokens.length, 'endIndex': endMatch.index, 'totalLength': endMatch.index + endMatch.tokens.length - startMatch.index, }; } else { // Contents rejected. Try next end match. endStart = endMatch.index + 1; } } // No end matches. Increment start match. s = startMatch.index; } return null; } } class MDUtils { // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only. static baseURLRegex = /(?:(?:(?:[a-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[a-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[a-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/i; // Modified from https://emailregex.com/ to remove capture groups. static baseEmailRegex = /(?:(?:[^<>()\[\]\\.,;:\s@"]+(?:\.[^<>()\[\]\\.,;:\s@"]+)*)|(?:".+"))@(?:(?:\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(?:(?:[a-z\-0-9]+\.)+[a-z]{2,}))/i; /** * @param {string} str * @returns {string} */ static escapeHTML(str) { return str.replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); } /** * @param {string} email */ static escapeObfuscated(text) { var html = ''; for (var p = 0; p < text.length; p++) { const cp = text.codePointAt(p); html += `${cp};`; } return html; } /** * Strips one or more leading indents from a line or lines of markdown. An * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3 * spaces) are treated like one indent level. * * @param {string|string[]} line - string or strings to strip * @param {number} levels - how many indent levels to strip * @returns {string|string[]} stripped lines */ static stripIndent(line, levels=1) { const regex = new RegExp(`^(?: {1,4}|\t){${levels}}`); return (line instanceof Array) ? line.map((l) => l.replace(regex, '')) : line.replace(regex, ''); } /** * Returns a copy of an array without any whitespace-only lines at the end. * * @param {String[]} lines - text lines * @returns {String[]} - text lines without trailing blank lines */ static withoutTrailingBlankLines(lines) { var stripped = lines.slice(); while (stripped.length > 0 && stripped[stripped.length - 1].trim().length == 0) { stripped.pop(); } return stripped; } /** * Counts the number of indent levels in a line of text. Partial indents * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly` * is `true`. * * @param {string} line - line of markdown * @param {boolean} fullIndentsOnly - whether to only count full indent levels (4 spaces or a tab) * @returns {number} number of indent levels found */ static countIndents(line, fullIndentsOnly=false) { // normalize indents to tabs return line.replace(fullIndentsOnly ? /(?: {4}|\t)/g : /(?: {1,4}|\t)/g, "\t") // remove content after indent .replace(/^(\t*)(.*?)$/, '$1') // count tabs .length; } /** * Attempts to parse a label from the beginning of `line`. A label is of the * form `[content]`. If found, returns an array with element 0 being the * entire label and element 1 being the content of the label. * * @param {string} line * @returns {string[]|null} match groups or null if not found */ static tokenizeLabel(line) { if (!line.startsWith('[')) return null; var parenCount = 0; var bracketCount = 0; for (var p = 1; p < line.length; p++) { let ch = line.substring(p, p + 1); if (ch == '\\') { p++; } else if (ch == '(') { parenCount++; } else if (ch == ')') { parenCount--; if (parenCount < 0) return null; } else if (ch == '[') { bracketCount++; } else if (ch == ']') { if (bracketCount > 0) { bracketCount--; } else { return [ line.substring(0, p + 1), line.substring(1, p) ]; } } } return null; } static #urlWithTitleRegex = /^\((\S+?)\s+"(.*?)"\)/i; // 1=URL, 2=title static #urlRegex = /^\((\S+?)\)/i; // 1=URL /** * Attempts to parse a URL from the beginning of `line`. A URL is of the * form `(url)` or `(url "title")`. If found, returns an array with element * 0 being the entire URL token, 1 is the URL, 2 is the optional title. * * @param {string} line * @returns {string[]} token tuple */ static tokenizeURL(line) { var groups; if (groups = this.#urlWithTitleRegex.exec(line)) { if (this.tokenizeEmail(line)) return null; // make sure it's not better described as an email address return groups; } if (groups = this.#urlRegex.exec(line)) { if (this.tokenizeEmail(line)) return null; return [...groups, null]; } return null; } static #emailWithTitleRegex = new RegExp("^\\(\\s*(" + MDUtils.baseEmailRegex.source + ")\\s+\"(.*?)\"\\s*\\)", "i"); // 1=email, 2=title static #emailRegex = new RegExp("^\\(\\s*(" + MDUtils.baseEmailRegex.source + ")\\s*\\)", "i"); // 1=email /** * Attempts to parse an email address from the beginning of `line`. An * email address is of the form `(user@example.com)` or `(user@example.com "link title")`. * If found, returns an array with element 0 being the entire token, 1 is the * email address, and 2 is the optional link title. * * @param {string} line * @returns {string[]} token tuple */ static tokenizeEmail(line) { var groups; if (groups = this.#emailWithTitleRegex.exec(line)) { return groups; } if (groups = this.#emailRegex.exec(line)) { return [...groups, null]; } return null; } /** * Describes the type of a variable for debugging. * * @param {any} value - value * @returns {String} description of type */ static typename(value) { if (value === null) return 'null'; if (value instanceof Object) { return value.constructor.name; } return typeof value; } } // -- Block readers --------------------------------------------------------- /** * Base class for reading from an array of markdown lines and finding a block * of a given type. Readers are checked in `priority` order and `readBlock` is * called to see the reader implementation recognizes a particular kind of block * at the given line pointer, returning that block if so or null if not. * * Inline markdown is processed in a separate stage by `MDInlineReader`. */ class MDBlockReader { /** @type {number} */ #priority; /** * A unitless relative priority value that determines which readers are * tried first. Lower values are tried first. In the range of 0.0 to 100.0. * @type {number} priority */ get priority() { return this.#priority; } /** * Creates a block reader. * * When overriding the constructor, it is suggested to allow the caller to * specify an optional custom priority value, falling back to a reasonable * default if not specified. * * @param {number} priority */ constructor(priority) { this.#priority = priority; } /** * Attempts to read a block of this type from the given read state. If * successful, the state's line pointer should be incremented to the line * just after the last line of the block and the read block should be * returned. * * @param {MDState} state - read state * @returns {MDBlock|null} the read block */ readBlock(state) { throw Error(`Abstract readBlock must be overridden in ${this.constructor.name}`); } /** * Called after the full document has been generated for optional * post-processing. * * @param {MDState} state * @param {MDBlock[]} blocks - top-level document block list */ postProcess(state, blocks) { // no op } } /** * Reads markdown blocks for headers denoted with the underline syntax. * * Example: * * > ```markdown * > Header 1 * > ======== * > ``` */ class MDUnderlinedHeaderBlockReader extends MDBlockReader { constructor(priority=0.0) { super(priority); } /** * @param {MDState} state */ readBlock(state) { var p = state.p; if (!state.hasLines(2)) return null; var modifier; let contentLine = state.lines[p++].trim(); [contentLine, modifier] = MDTagModifier.fromLine(contentLine); let underLine = state.lines[p++].trim(); if (contentLine == '') return null; if (/^=+$/.exec(underLine)) { state.p = p; let block = new MDHeaderBlock(1, state.inlineMarkdownToSpan(contentLine)); if (modifier) modifier.applyTo(block); return block; } if (/^\-+$/.exec(underLine)) { state.p = p; let block = new MDHeaderBlock(2, state.inlineMarkdownToSpan(contentLine)); if (modifier) modifier.applyTo(block); return block; } return null; } } /** * Reads markdown blocks for headers denoted with hash marks. Header levels 1 to * 6 are supported. * * Examples: * * > ```markdown * > # Header 1 * > * > ## Header 2 * > * > # Enclosing Hashes Are Optional # * > * > ## Trailing Hashes Don't Have to Match in Number #### * > ``` */ class MDHashHeaderBlockReader extends MDBlockReader { static #hashHeaderRegex = /^(#{1,6})\s*([^#].*?)\s*\#*\s*$/; // 1=hashes, 2=content constructor(priority=5.0) { super(priority); } readBlock(state) { var p = state.p; let line = state.lines[p++]; var modifier; [line, modifier] = MDTagModifier.fromLine(line); var groups = MDHashHeaderBlockReader.#hashHeaderRegex.exec(line); if (groups === null) return null; state.p = p; const level = groups[1].length; const content = groups[2]; let block = new MDHeaderBlock(level, state.inlineMarkdownToSpan(content)); if (modifier) modifier.applyTo(block); return block; } } /** * Reads markdown blocks for blockquoted text. * * Example: * * > ```markdown * > > Blockquoted text * > ``` */ class MDBlockQuoteBlockReader extends MDBlockReader { constructor(priority=10.0) { super(priority); } /** * @param {MDState} state */ readBlock(state) { var blockquoteLines = []; var p = state.p; while (p < state.lines.length) { let line = state.lines[p++]; if (line.startsWith(">")) { blockquoteLines.push(line); } else { break; } } if (blockquoteLines.length > 0) { let contentLines = blockquoteLines.map(function(line) { return line.substring(1).replace(/^ {0,3}\t?/, ''); }); let substate = state.copy(contentLines); let quotedBlocks = substate.readBlocks(); state.p = p; return new MDBlockquoteBlock(quotedBlocks); } return null; } } class MDBaseListBlockReader extends MDBlockReader { constructor(priority) { super(priority); } #readItemLines(state, firstLineStartPos) { var p = state.p; var lines = []; var seenBlankLine = false; var stripTrailingBlankLines = true; while (state.hasLines(1, p)) { const isFirstLine = p == state.p; var line = state.lines[p++]; if (isFirstLine) { line = line.substring(firstLineStartPos); } if (/^(?:\*|\+|\-|\d+\.)\s+/.exec(line)) { // Found next list item stripTrailingBlankLines = false; // because this signals extra spacing intended break; } const isBlankLine = line.trim().length == 0; const isIndented = /^\s+\S/.exec(line) !== null; if (isBlankLine) { seenBlankLine = true; } else if (!isIndented && seenBlankLine) { // Post-list content break; } lines.push(line); } lines = MDUtils.withoutTrailingBlankLines(lines); return MDUtils.stripIndent(lines); } /** * @param {MDState} state * @param {number} firstLineStart * @return {MDBlock} */ readListItemContent(state, firstLineStartPos) { const itemLines = this.#readItemLines(state, firstLineStartPos); state.p += itemLines.length; if (itemLines.length == 1) { return new MDInlineBlock(state.inlineMarkdownToSpans(itemLines[0])); } const hasBlankLines = itemLines.filter((line) => line.trim().length == 0).length > 0; if (hasBlankLines) { const substate = state.copy(itemLines); const blocks = substate.readBlocks(); return (blocks.length == 1) ? blocks[0] : new MDMultiBlock(blocks); } // Multiline content with no blank lines. Search for new block // boundaries without the benefit of a blank line to demarcate it. for (var p = 1; p < itemLines.length; p++) { const line = itemLines[p]; if (/^(?:\*|\-|\+|\d+\.)\s+/.exec(line)) { // Nested list found const firstBlock = new MDInlineBlock(state.inlineMarkdownToSpans(itemLines.slice(0, p).join("\n"))); const substate = state.copy(itemLines.slice(p)); const blocks = substate.readBlocks(); return new MDMultiBlock([ firstBlock, ...blocks ]); } } // Ok, give up and just do a standard block read { const substate = state.copy(itemLines); const blocks = substate.readBlocks(); return (blocks.length == 1) ? blocks[0] : new MDMultiBlock(blocks); } } readBlock(state) { throw new Error(`Abstract readBlock must be overridden in ${this.constructor.name}`); } } /** * Block reader for unordered (bulleted) lists. * * Example: * * > ```markdown * > * First item * > * Second item * > * Third item * > ``` */ class MDUnorderedListBlockReader extends MDBaseListBlockReader { static #unorderedListRegex = /^([\*\+\-]\s+)(.*)$/; // 1=bullet, 2=content constructor(priority=15.0) { super(priority); } /** * @param {MDState} state * @returns {MDListItemBlock|null} */ #readUnorderedListItem(state) { var p = state.p; let line = state.lines[p]; let groups = MDUnorderedListBlockReader.#unorderedListRegex.exec(line); if (groups === null) return null; const firstLineOffset = groups[1].length; return new MDListItemBlock(this.readListItemContent(state, firstLineOffset)); } readBlock(state) { var items = []; var item = null; do { item = this.#readUnorderedListItem(state); if (item) items.push(item); } while (item); if (items.length == 0) return null; return new MDUnorderedListBlock(items); } } /** * Block reader for ordered (numbered) lists. The number of the first item is * used to begin counting. The subsequent items increase by 1, regardless of * their value. * * Example: * * > ```markdown * > 1. First * > 2. Second * > 3. Third * > ``` */ class MDOrderedListBlockReader extends MDBaseListBlockReader { static #orderedListRegex = /^(\d+)(\.\s+)(.*)$/; // 1=number, 2=dot, 3=content constructor(priority=16.0) { super(priority); } /** * @param {MDState} state * @returns {MDListItemBlock|null} */ #readOrderedListItem(state) { var p = state.p; let line = state.lines[p]; let groups = MDOrderedListBlockReader.#orderedListRegex.exec(line); if (groups === null) return null; const ordinal = parseInt(groups[1]); const firstLineOffset = groups[1].length + groups[2].length; return new MDListItemBlock(this.readListItemContent(state, firstLineOffset), ordinal); } readBlock(state) { var items = []; var item = null; do { item = this.#readOrderedListItem(state); if (item) items.push(item); } while (item); if (items.length == 0) return null; return new MDOrderedListBlock(items, items[0].ordinal); } } /** * Block reader for code blocks denoted by pairs of triple tickmarks. * * Example: * * > ```markdown * > \`\`\` * > function formattedAsCode() { * > } * > \`\`\` * > ``` */ class MDFencedCodeBlockReader extends MDBlockReader { constructor(priority=20.0) { super(priority); } readBlock(state) { if (!state.hasLines(2)) return null; var p = state.p; let openFenceLine = state.lines[p++]; var modifier; [openFenceLine, modifier] = MDTagModifier.fromLine(openFenceLine); if (openFenceLine.trim() != '```') return null; var codeLines = []; while (state.hasLines(1, p)) { let line = state.lines[p++]; if (line.trim() == '```') { state.p = p; let block = new MDCodeBlock(codeLines.join("\n")); if (modifier) modifier.applyTo(block); return block; } codeLines.push(line); } return null; } } /** * Block reader for code blocks denoted by indenting text. * * Example (indent spaces rendered visibly for clarity): * * > ```markdown * > ⎵⎵⎵⎵function formattedAsCode() { * > ⎵⎵⎵⎵} * > ``` */ class MDIndentedCodeBlockReader extends MDBlockReader { constructor(priority=21.0) { super(priority); } readBlock(state) { var p = state.p; var codeLines = []; while (state.hasLines(1, p)) { let line = state.lines[p++]; if (MDUtils.countIndents(line, true) < 1) { p--; break; } codeLines.push(MDUtils.stripIndent(line)); } if (codeLines.length == 0) return null; state.p = p; return new MDCodeBlock(codeLines.join("\n")); } } /** * Block reader for horizontal rules. Composed of three or more hypens or * asterisks on a line by themselves, with or without intermediate whitespace. * * Examples: * * > ```markdown * > --- * > * > - - - * > * > * * * * * * > * > **** * > ``` */ class MDHorizontalRuleBlockReader extends MDBlockReader { static #horizontalRuleRegex = /^\s*(?:\-(?:\s*\-){2,}|\*(?:\s*\*){2,})\s*$/; constructor(priority=25.0) { super(priority); } /** * @param {MDState} state * @returns {MDBlock|null} */ readBlock(state) { var p = state.p; let line = state.lines[p++]; var modifier; [line, modifier] = MDTagModifier.fromLine(line); if (MDHorizontalRuleBlockReader.#horizontalRuleRegex.exec(line)) { state.p = p; let block = new MDHorizontalRuleBlock(); if (modifier) modifier.applyTo(block); return block; } return null; } } /** * Block reader for tables. * * Examples: * * > ```markdown * > Name | Age * > --- | --- * > Joe | 34 * > Alice | 25 * > * > | Leading | And Trailing | * > | - | - | * > | Required | for single column tables | * > * > | Left aligned column | Center aligned | Right aligned | * > | :-- | :--: | --: | * > | Joe | x | 34 | * > ``` */ class MDTableBlockReader extends MDBlockReader { constructor(priority=30.0) { super(priority); } /** * @param {MDState} state * @param {boolean} isHeader * @return {MDTableRowBlock|null} */ #readTableRow(state, isHeader) { if (!state.hasLines(1)) return null; var p = state.p; let line = MDTagModifier.strip(state.lines[p++].trim()); if (/.*\|.*/.exec(line) === null) return null; if (line.startsWith('|')) line = line.substring(1); if (line.endsWith('|')) line = line.substring(0, line.length - 1); let cellTokens = line.split('|'); let cells = cellTokens.map(function(token) { let content = state.inlineMarkdownToSpan(token); return isHeader ? new MDTableHeaderCellBlock(content) : new MDTableCellBlock(content); }); state.p = p; return new MDTableRowBlock(cells); } /** * @param {string} line * @returns {string[]} */ #parseColumnAlignments(line) { line = line.trim(); if (line.startsWith('|')) line = line.substring(1); if (line.endsWith('|')) line = line.substring(0, line.length - 1); return line.split(/\s*\|\s*/).map(function(token) { if (token.startsWith(':')) { if (token.endsWith(':')) { return MDTableCellBlock.AlignCenter; } return MDTableCellBlock.AlignLeft; } else if (token.endsWith(':')) { return MDTableCellBlock.AlignRight; } return null; }); } static #tableDividerRegex = /^\s*[|]?\s*(?:[:]?-+[:]?)(?:\s*\|\s*[:]?-+[:]?)*\s*[|]?\s*$/; readBlock(state) { if (!state.hasLines(2)) return null; let startP = state.p; let firstLine = state.lines[startP]; var modifier = MDTagModifier.fromLine(firstLine)[1]; let headerRow = this.#readTableRow(state, true); if (headerRow === null) { state.p = startP; return null; } let dividerLine = state.lines[state.p++]; let dividerGroups = MDTableBlockReader.#tableDividerRegex.exec(dividerLine); if (dividerGroups === null) { state.p = startP; return null; } let columnAlignments = this.#parseColumnAlignments(dividerLine); headerRow.applyAlignments(columnAlignments); var bodyRows = []; while (state.hasLines(1)) { let row = this.#readTableRow(state, false); if (row === null) break; row.applyAlignments(columnAlignments); bodyRows.push(row); } let table = new MDTableBlock(headerRow, bodyRows); if (modifier) modifier.applyTo(table); return table; } } /** * Block reader for definition lists. Definitions go directly under terms starting * with a colon. * * Example: * * > ```markdown * > markdown * > : a language for generating HTML from simplified syntax * > parser * > : code that converts human-readable code into machine language * > ``` */ class MDDefinitionListBlockReader extends MDBlockReader { constructor(priority=35.0) { super(priority); } readBlock(state) { var p = state.p; var groups; var termCount = 0; var definitionCount = 0; var defLines = []; while (state.hasLines(1, p)) { let line = state.lines[p++]; if (line.trim().length == 0) { p--; break; } if (/^\s+/.exec(line)) { if (defLines.length == 0) return null; defLines[defLines.length - 1] += "\n" + line; } else if (/^:\s+/.exec(line)) { defLines.push(line); definitionCount++; } else { defLines.push(line); termCount++; } } if (termCount == 0 || definitionCount == 0) return null; let blocks = defLines.map(function(line) { if (groups = /^:\s+(.*)$/.exec(line)) { return new MDDefinitionDefinitionBlock(state.inlineMarkdownToSpans(groups[1])); } else { return new MDDefinitionTermBlock(state.inlineMarkdownToSpans(line)); } }); state.p = p; return new MDDefinitionListBlock(blocks); } } /** * Block reader for defining footnote contents. Footnotes can be defined anywhere * in the document but will always be rendered at the end of a page or end of * the document. * * Examples: * * > ```markdown * > [^1]: Content of a footnote. Anywhere `[^1]` appears in the * > main text, it will hyperlink to this content at the bottom * > of the document. There will also be backlinks at the end * > of this footnote to all references to it. * > ``` */ class MDFootnoteDefinitionBlockReader extends MDBlockReader { constructor(priority=40.0) { super(priority); } /** * @param {MDState} state */ readBlock(state) { var p = state.p; let groups = /^\s*\[\^\s*([^\]]+)\s*\]:\s+(.*)\s*$/.exec(state.lines[p++]); if (groups === null) return null; let symbol = groups[1]; let def = groups[2]; while (state.hasLines(1, p)) { let line = state.lines[p++]; if (/^\s+/.exec(line)) { def += "\n" + line; } else { p--; break; } } state.p = p; let content = state.inlineMarkdownToSpan(def); state.defineFootnote(symbol, content); state.p = p; return new MDMultiBlock([]); } } /** * Block reader for abbreviation definitions. Anywhere the abbreviation appears * in the text will have its definition available when hovering over it. * Definitions can appear anywhere in the document. Their content should only * contain simple text, not markdown. * * Example: * * > ```markdown * > *[HTML]: Hyper Text Markup Language * > ``` */ class MDAbbreviationDefinitionBlockReader extends MDBlockReader { constructor(priority=45.0) { super(priority); } readBlock(state) { var p = state.p; let line = state.lines[p++]; let groups = /^\s*\*\[([^\]]+?)\]:\s+(.*?)\s*$/.exec(line); if (groups === null) return null; let abbrev = groups[1]; let def = groups[2]; state.defineAbbreviation(abbrev, def); state.p = p; return new MDMultiBlock([]); } } /** * Block reader for URL definitions. Links in the document can include a * reference instead of a verbatim URL so it can be defined in one place and * reused in many places. These can be defined anywhere in the document. Nothing * of the definition is rendered in the document. * * Example: * * > ```markdown * > [foo]: https://example.com * > ``` */ class MDURLDefinitionBlockReader extends MDBlockReader { constructor(priority=50.0) { super(priority); } readBlock(state) { var p = state.p; let line = state.lines[p++]; var symbol; var url; var title = null; let groups = /^\s*\[(.+?)]:\s*(\S+)\s+"(.*?)"\s*$/.exec(line); if (groups) { symbol = groups[1]; url = groups[2]; title = groups[3]; } else { groups = /^\s*\[(.+?)]:\s*(\S+)\s*$/.exec(line); if (groups) { symbol = groups[1]; url = groups[2]; } else { return null; } } state.defineURL(symbol, url, title); state.p = p; return new MDInlineBlock([]); } } /** * Block reader for simple paragraphs. Paragraphs are separated by a blank (or * whitespace-only) line. This reader should be prioritized last since there * is no distinguishing syntax. * * Example: * * > ```markdown * > Lorem ipsum dolor * > sit amet. This is all one paragraph. * > * > Beginning of a new paragraph. * > ``` */ class MDParagraphBlockReader extends MDBlockReader { constructor(priority=100.0) { super(priority); } readBlock(state) { var paragraphLines = []; var p = state.p; while (p < state.lines.length) { let line = state.lines[p++]; if (line.trim().length == 0) { break; } paragraphLines.push(line); } if (paragraphLines.length > 0) { state.p = p; let content = paragraphLines.join("\n"); return new MDParagraphBlock(new MDInlineBlock(state.inlineMarkdownToSpans(content))); } return null; } } // -- Inline reader --------------------------------------------------------- class MDInlineReader { /** @type {number} */ #tokenizePriority; /** @type {number|number[]} */ #substitutePriority; /** * A unitless relative tokenizing priority value that determines which * readers are tried first. Lower values are tried first. Standard readers * are in the range of 0.0 to 100.0 but any value is valid. Longer * tokens should generally be prioritized over short or single character tokens. * * @returns {number} priority or priorities for tokenization */ get tokenizePriority() { return this.#tokenizePriority; } /** * A unitless relative substitution priority value that determines which * readers are tried first. Lower values are tried first. If an array of * values is given, the same reader will be included twice in the * prioritization. This allows for multiple passes. Standard readers * are in the range of 0.0 to 100.0 but any value is valid. Priority should * be used to help resolve ambiguous parsings, with longer, more complex * constructions best prioritized before shorter, simpler ones. * * @returns {number|number[]} priority or priorities for substitution */ get substitutePriority() { return this.#substitutePriority; } /** * @param {number} tokenizePriority * @param {number|number[]} substitutePriority */ constructor(tokenizePriority, substitutePriority) { this.#tokenizePriority = tokenizePriority; this.#substitutePriority = substitutePriority; } /** * Attempts to read a token from the start of the given string. * * @param {string} remainingText - remainder of the current line of markdown text left to tokenize * @returns {MDToken|null} a token or `null` if not found */ readFirstToken(state, priority, remainingText) { throw new Error(`Abstract readFirstToken must be overridden in ${this.constructor.name}`); } /** * Attempts to substitute one or more tokens in the given array. The given * array is edited in-place via `.splice` operations. It consists of mixed * elements of unprocessed `MDToken` and interpreted `MDSpan` elements. * * If a structure consists of inner content that is also markdown encoded, * those elements can be passed to `state.tokensToSpans` to resolve to an * array of `MDSpan` elements. * * @param {MDState} state * @param {number} priority - used to differentiate passes when multiple * values of `this.substitutePriority` are given * @param {MDToken[]|MDSpan[]} tokens - mixed array of `MDToken` and * `MDSpan` elements to be modified by reference * @returns {boolean} `true` if any substitutions were made; `false` if not */ substituteTokens(state, priority, tokens) { throw new Error(`Abstract substituteTokens must be overridden in ${this.constructor.name}`); } /** * Called after the full document has been generated for optional * post-processing. * * @param {MDState} state * @param {MDBlock[]} blocks - top-level document block list */ postProcess(state, blocks) { // no op } } /** * Abstract base class for readers that look for one or more delimiting tokens * around some content. */ class MDSimplePairInlineReader extends MDInlineReader { constructor(tokenizePriority, substitutePriority) { super(tokenizePriority, substitutePriority); } /** * Attempts a substitution of a matched pair of delimiting token types. * If successful, the substitution is performed on `tokens` and `true` is * returned, otherwise `false` is returned and the array is untouched. * * If multiple `substitutePriority` values are specified, the first pass * will reject matches with the delimiting character inside the content * tokens. If a single `substitutePriority` is given or a subsequent pass * is performed with multiple values any contents will be accepted. * * @param {MDState} state * @param {number} priority * @param {MDToken[]} tokens * @param {class} spanClass * @param {MDTokenType} delimiter * @param {number} count - how many times the token is repeated to form the delimiter * @returns {boolean} `true` if substitution performed, `false` if not */ attemptPair(state, priority, tokens, spanClass, delimiter, count=1, plaintext=false) { let delimiters = Array(count).fill(delimiter); let firstPassPriority = (this.substitutePriority instanceof Array) ? this.substitutePriority[0] : null; let match = MDToken.findPairedTokens(tokens, delimiters, delimiters, function(content) { const firstType = content[0] instanceof MDToken ? content[0].type : null; const lastType = content[content.length - 1] instanceof MDToken ? content[content.length - 1].type : null; if (firstType == MDTokenType.Whitespace) return false; if (lastType == MDTokenType.Whitespace) return false; if (priority == firstPassPriority) { var innerCount = 0; for (let token of content) { if (token instanceof MDToken && token.type == delimiter) innerCount++; } if ((innerCount % 2) != 0) return false; } return true; }); if (match === null) return false; let content = (plaintext) ? match.contentTokens.map((token) => token.original).join('') : state.tokensToSpans(match.contentTokens); tokens.splice(match.startIndex, match.totalLength, new spanClass(content)); return true; } } class MDStrongInlineReader extends MDSimplePairInlineReader { constructor(tokenizePriority=0.0, substitutePriority=[0.0, 50.0]) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { if (remainingText.startsWith('*')) return new MDToken('*', MDTokenType.Asterisk); if (remainingText.startsWith('_')) return new MDToken('_', MDTokenType.Underscore); return null; } substituteTokens(state, priority, tokens) { if (this.attemptPair(state, priority, tokens, MDStrongSpan, MDTokenType.Asterisk, 2)) return true; if (this.attemptPair(state, priority, tokens, MDStrongSpan, MDTokenType.Underscore, 2)) return true; return false; } } class MDEmphasisInlineReader extends MDSimplePairInlineReader { constructor(tokenizePriority=0.0, substitutePriority=[0.0, 50.0]) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { if (remainingText.startsWith('*')) return new MDToken('*', MDTokenType.Asterisk); if (remainingText.startsWith('_')) return new MDToken('_', MDTokenType.Underscore); return null; } substituteTokens(state, priority, tokens) { if (this.attemptPair(state, priority, tokens, MDEmphasisSpan, MDTokenType.Asterisk)) return true; if (this.attemptPair(state, priority, tokens, MDEmphasisSpan, MDTokenType.Underscore)) return true; return false; } } class MDCodeInlineReader extends MDSimplePairInlineReader { constructor(tokenizePriority=0.0, substitutePriority=0.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { if (remainingText.startsWith('`')) return new MDToken('`', MDTokenType.Backtick); return null; } substituteTokens(state, priority, tokens) { // ignore priority if (this.attemptPair(state, -1, tokens, MDCodeSpan, MDTokenType.Backtick, 2, true)) return true; if (this.attemptPair(state, -1, tokens, MDCodeSpan, MDTokenType.Backtick, 1, true)) return true; return false; } } class MDStrikethroughInlineReader extends MDSimplePairInlineReader { constructor(tokenizePriority=0.0, substitutePriority=[0.0, 50.0]) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { if (remainingText.startsWith('~')) return new MDToken('~', MDTokenType.Tilde); return null; } substituteTokens(state, priority, tokens) { if (this.attemptPair(state, priority, tokens, MDStrikethroughSpan, MDTokenType.Tilde, 2)) return true; if (this.attemptPair(state, priority, tokens, MDStrikethroughSpan, MDTokenType.Tilde)) return true; return false; } } class MDImageInlineReader extends MDInlineReader { constructor(tokenizePriority=0.0, substitutePriority=0.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { if (remainingText.startsWith('!')) return new MDToken('!', MDTokenType.Bang); var groups; if (groups = MDUtils.tokenizeLabel(remainingText)) { return new MDToken(groups[0], MDTokenType.Label, groups[1]); } if (groups = MDUtils.tokenizeURL(remainingText)) { return new MDToken(groups[0], MDTokenType.URL, groups[1], groups[2]); } return null; } substituteTokens(state, priority, tokens) { var match; if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Bang, MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.URL ])) { let alt = match.tokens[1].content; let url = match.tokens[match.tokens.length - 1].content; let title = match.tokens[match.tokens.length - 1].extra; tokens.splice(match.index, match.tokens.length, new MDImageSpan(url, alt, title)); return true; } if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Bang, MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.Label ])) { let alt = match.tokens[1].content; let ref = match.tokens[match.tokens.length - 1].content; tokens.splice(match.index, match.tokens.length, new MDReferencedImageSpan(ref, alt)); return true; } return false; } } class MDFootnoteInlineReader extends MDInlineReader { static #footnoteWithTitleRegex = /^\[\^([^\]]+?)\s+"(.*?)"\]/; // 1=symbol, 2=title static #footnoteRegex = /^\[\^([^\]]+?)\]/; // 1=symbol constructor(tokenizePriority=0.0, substitutePriority=0.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { var groups; if (groups = MDFootnoteInlineReader.#footnoteWithTitleRegex.exec(remainingText)) { return new MDToken(groups[0], MDTokenType.Footnote, groups[1], groups[2]); } if (groups = MDFootnoteInlineReader.#footnoteRegex.exec(remainingText)) { return new MDToken(groups[0], MDTokenType.Footnote, groups[1]); } return null; } substituteTokens(state, priority, tokens) { var match; if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Footnote ])) { let footnoteToken = match.tokens[0]; tokens.splice(match.index, 1, new MDFootnoteReferenceSpan(footnoteToken.content)); return true; } return false; } /** * @param {MDState} state * @param {MDBlock[]} blocks */ postProcess(state, blocks) { var unique = 1; for (const block of blocks) { block.visitChildren(function(node) { if (!(node instanceof MDFootnoteReferenceSpan)) return; node.differentiator = unique++; state.registerUniqueFootnote(node.symbol, node.differentiator); }); } if (Object.keys(state.footnotes).length == 0) return; blocks.push(new MDFootnoteListingBlock()); } } class MDLinkInlineReader extends MDInlineReader { constructor(tokenizePriority=0.0, substitutePriority=0.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { var groups; if (groups = MDUtils.tokenizeLabel(remainingText)) { return new MDToken(groups[0], MDTokenType.Label, groups[1]); } if (groups = MDUtils.tokenizeEmail(remainingText)) { return new MDToken(groups[0], MDTokenType.Email, groups[1], groups[2]); } if (groups = MDUtils.tokenizeURL(remainingText)) { return new MDToken(groups[0], MDTokenType.URL, groups[1], groups[2]); } return null; } /** * @param {MDState} state */ substituteTokens(state, priority, tokens) { var match; if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.URL ])) { let text = match.tokens[0].content; let url = match.tokens[match.tokens.length - 1].content; let title = match.tokens[match.tokens.length - 1].extra; tokens.splice(match.index, match.tokens.length, new MDLinkSpan(url, state.inlineMarkdownToSpan(text), title)); return true; } if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.Email ])) { let text = match.tokens[0].content; let email = match.tokens[match.tokens.length - 1].content; let url = `mailto:${email}`; let title = match.tokens[match.tokens.length - 1].extra; tokens.splice(match.index, match.tokens.length, new MDLinkSpan(url, state.inlineMarkdownToSpan(text), title)); return true; } if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.Label ])) { let text = match.tokens[0].content; let ref = match.tokens[match.tokens.length - 1].content; tokens.splice(match.index, match.tokens.length, new MDReferencedLinkSpan(ref, state.inlineMarkdownToSpan(text))); return true; } return false; } } class MDSimpleLinkInlineReader extends MDInlineReader { static #simpleEmailRegex = new RegExp("^<(" + MDUtils.baseEmailRegex.source + ")>", "i"); // 1=email static #simpleURLRegex = new RegExp("^<(" + MDUtils.baseURLRegex.source + ")>", "i"); // 1=URL constructor(tokenizePriority=0.0, substitutePriority=0.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { var groups; if (groups = MDSimpleLinkInlineReader.#simpleEmailRegex.exec(remainingText)) { return new MDToken(groups[0], MDTokenType.SimpleEmail, groups[1]); } if (groups = MDSimpleLinkInlineReader.#simpleURLRegex.exec(remainingText)) { return new MDToken(groups[0], MDTokenType.SimpleLink, groups[1]); } return null; } #substituteEmail(state, tokens) { const result = MDToken.findFirstTokens(tokens, [ MDTokenType.SimpleEmail ]); if (result === null) return false; /** @type {MDToken} */ const token = result.tokens[0]; const link = `mailto:${token.content}`; const span = new MDLinkSpan(link, new MDObfuscatedTextSpan(token.content)); tokens.splice(result.index, 1, span); return true; } #substituteURL(state, tokens) { const result = MDToken.findFirstTokens(tokens, [ MDTokenType.SimpleLink ]); if (result === null) return false; /** @type {MDToken} */ const token = result.tokens[0]; const link = token.content; const span = new MDLinkSpan(link, new MDTextSpan(link)); tokens.splice(result.index, 1, span); return true; } substituteTokens(state, priority, tokens) { if (this.#substituteEmail(state, tokens)) return true; if (this.#substituteURL(state, tokens)) return true; return false; } } class MDHTMLTagInlineReader extends MDInlineReader { constructor(tokenizePriority=0.0, substitutePriority=95.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { var tag = MDHTMLTag.fromLineStart(remainingText); if (tag) return new MDToken(tag.fullTag, MDTokenType.HTMLTag, tag.fullTag, null, tag); return null; } substituteTokens(state, priority, tokens) { const result = MDToken.findFirstTokens(tokens, [ MDTokenType.HTMLTag ]); if (result === null) return false; /** @type {MDToken} */ const token = result.tokens[0]; const tag = token.tag; const span = new MDHTMLSpan(tag.fullTag); tokens.splice(result.index, 1, span); return true; } } class MDModifierInlineReader extends MDInlineReader { constructor(tokenizePriority=0.0, substitutePriority=100.0) { super(tokenizePriority, substitutePriority); } readFirstToken(state, priority, remainingText) { var modifier = MDTagModifier.fromStart(remainingText); if (modifier) return new MDToken(modifier.original, MDTokenType.Modifier, modifier); return null; } substituteTokens(state, priority, tokens) { // Modifiers are applied elsewhere, and if they're not it's fine if they're // rendered as the original syntax. return false; } } // -- Blocks ---------------------------------------------------------------- class MDBlock { /** @type {string[]} */ cssClasses = []; /** @type {string|null} */ cssId = null; /** @type {object} */ attributes = {}; /** * @param {MDState} state */ toHTML(state) { throw new Error(`Abstract toHTML must be overridden in ${self.constructor.name}`); } htmlAttributes() { var html = ''; if (this.cssClasses.length > 0) { html += ` class="${this.cssClasses.join(' ')}"`; } if (this.cssId !== null) { html += ` id="${this.cssId}"`; } for (const name in this.attributes) { let value = this.attributes[name]; html += ` ${name}="${MDUtils.escapeHTML(value)}"`; } return html; } /** * @param {MDBlock[]} blocks * @param {MDState} state * @returns {string} */ static toHTML(blocks, state) { return blocks.map((block) => block.toHTML(state)).join("\n"); } /** * Visits all block and inline children of this block, calling the given * function with each. Should be implemented for any block with child nodes. * * @param {function} fn */ visitChildren(fn) {} } class MDMultiBlock extends MDBlock { /** @type {MDBlock[]} */ #blocks; /** * @param {MDBlock[]} blocks */ constructor(blocks) { super(); if (blocks instanceof Array) { this.#blocks = blocks; } else { throw new Error(`${MDUtils.typename(this)} expects MDBlock[], got ${MDUtils.typename(blocks)}`); } } toHTML(state) { return MDBlock.toHTML(this.#blocks, state); } visitChildren(fn) { for (const block of this.#blocks) { fn(block); block.visitChildren(fn); } } } class MDParagraphBlock extends MDBlock { /** @type {MDBlock[]} */ #content; /** * @param {MDBlock|MDBlock[]} content */ constructor(content) { super(); if (content instanceof Array) { this.#content = content; } else if (content instanceof MDBlock) { this.#content = [ content ]; } else { throw new Error(`${MDUtils.typename(this)} expects MDBlock[] or MDBlock, got ${MDUtils.typename(content)}`); } } toHTML(state) { const contentHTML = MDBlock.toHTML(this.#content, state); return `
${contentHTML}
\n`; } visitChildren(fn) { for (const child of this.#content) { fn(child); child.visitChildren(fn); } } } class MDHeaderBlock extends MDBlock { /** @type {number} */ #level; /** @type {MDBlock} */ #content; /** * @param {number} level * @param {MDBlock} content */ constructor(level, content) { super(); this.#level = level; this.#content = content; } toHTML(state) { let contentHTML = this.#content.toHTML(state); return `\n${contentHTML}\n`; } visitChildren(fn) { for (const block of this.content) { fn(block); block.visitChildren(fn); } } } class MDUnorderedListBlock extends MDBlock { /** @type {MDListItemBlock[]} */ items; /** * @param {MDListItemBlock[]} items */ constructor(items) { super(); this.items = items; } toHTML(state) { let contentHTML = MDBlock.toHTML(this.items, state); return `
${MDUtils.escapeHTML(this.#code)}`;
}
}
class MDHorizontalRuleBlock extends MDBlock {
toHTML(state) {
return `${MDUtils.escapeHTML(this.#content)}`;
}
}
class MDImageSpan extends MDSpan {
/** @type {string} */
source;
/** @type {string|null} */
alt;
/** @type {string|null} */
title;
/**
* @param {string} source - image URL
* @param {string} alt - alt text
* @param {string|null} title - optional title attribute
*/
constructor(source, alt, title=null) {
super();
this.source = source;
this.alt = alt;
this.title = title;
}
toHTML(state) {
let html = `