// TODO: Linked image not parsed correctly. [![](image.jpg)](link.html) // Blocks // - Paragraph // - Header 1-6 # ## ### #### ##### ###### or === --- // - Blockquote (nestable) > // - Unordered list (nestable) *_ // - Ordered list (nestable) 1._ // - Code block ```\ncode\n``` or 4 spaces/tab indent // - Horizontal rule --- - - - * * * etc // - Table -|- // - Definition list term\n: definition\n: alternate definition // - Footnote (bottom) citation[^1] // - Abbreviation (definition) *[ABC]: Abbrev Blah Cat // Inline // - Link [text](https://url) // - Emphasis *emphasized* // - Strong **bold** // - Inline code `code` // - Strikethrough ~strike~ // - Image ![alt text](https://image){.cssclass} // - Footnote (inline) [^1]: footnote text // - Abbreviation (inline) class _MDHAlign { static Left = new _MDHAlign('Left'); static Center = new _MDHAlign('Center'); static Right = new _MDHAlign('Right'); /** @var {String} */ name; constructor(name) { this.name = name; } toString() { return `_MDHAlign.${this.name}`; } static toHTMLAttribute(align) { switch (align) { case _MDHAlign.Left: return ' align="left"'; case _MDHAlign.Center: return ' align="center"'; case _MDHAlign.Right: return ' align="right"'; } return ''; } } class _MDTokenType { static Text = new _MDTokenType('Text'); static Whitespace = new _MDTokenType('Whitespace'); static Underscore = new _MDTokenType('Underscore'); static Asterisk = new _MDTokenType('Asterisk'); static Slash = new _MDTokenType('Slash'); static Tilde = new _MDTokenType('Tilde'); static Bang = new _MDTokenType('Bang'); static Backtick = new _MDTokenType('Backtick'); static Label = new _MDTokenType('Label'); // content=label static URL = new _MDTokenType('URL'); // content=URL, extra=title static Email = new _MDTokenType('Email'); // content=email address, extra=title static SimpleLink = new _MDTokenType('SimpleLink'); // content=URL static SimpleEmail = new _MDTokenType('SimpleEmail'); // content=email address static Footnote = new _MDTokenType('Footnote'); // content=symbol static HTMLTag = new _MDTokenType('HTMLTag'); // content=tag string, tag=_MDHTMLTag static META_AnyNonWhitespace = new _MDTokenType('METAAnyNonWhitespace'); #name; get name() { return this.#name; } constructor(name) { this.#name = name; } toString() { return this.constructor.name + `.${this.#name}`; } } class _MDToken { /** @var {String} */ original; /** @var {_MDTokenType} */ type; /** @var {String|null} */ content; /** @var {String|null} */ extra; /** @var {_MDHTMLTag|null} */ tag; constructor(original, type, content=null, extra=null, tag=null) { this.original = original; this.type = type; this.content = content; this.extra = extra; this.tag = tag; } } // -- Spans ----------------------------------------------------------------- class _MDSpan { toHTML(config) { throw new Error(self.constructor.name + ".toHTML not implemented"); } /** * @param {_MDSpan[]} spans * @param {_MDConfig} config */ static toHTML(spans, config) { return spans.map((span) => span.toHTML(config)).join(""); } } class _MDMultiSpan extends _MDSpan { /** @var {_MDSpan[]} */ content; /** * @param {_MDSpan[]} content */ constructor(content) { super(); this.content = content; } toHTML() { return _MDSpan.toHTML(this.content); } } class _MDTextSpan extends _MDSpan { /** @param {String} text */ text; /** * @param {String} text */ constructor(text) { super(); this.text = text; } toHTML(config) { return this.text.replace('<', '<'); } } class _MDHTMLSpan extends _MDSpan { /** @param {String} html */ html; /** * @param {String} html */ constructor(html) { super(); this.html = html; } toHTML(config) { return this.html; } } class _MDLinkSpan extends _MDSpan { /** @var {String} */ link; /** @var {String|null} */ target = null; /** @var {_MDSpan} */ content; /** * @param {String} link * @param {_MDSpan} content */ constructor(link, content) { super(); this.link = link; this.content = content; } toHTML(config) { let escapedLink = this.link.replace('"', '"'); var html = `'; return html; } } class _MDReferencedLinkSpan extends _MDLinkSpan { /** @var {String} id */ id; constructor(id, content) { super(null, content); this.id = id; } toHTML(config) { if (this.link) { return super.toHTML(config); } else { let contentHTML = this.content.toHTML(config); return `[${contentHTML}][${this.id}]`; } } } class _MDEmphasisSpan extends _MDSpan { /** @var {_MDSpan} content */ #content; /** * @param {_MDSpan} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); return `${contentHTML}`; } } class _MDStrongSpan extends _MDSpan { /** @var {_MDSpan} content */ #content; /** * @param {_MDSpan} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); return `${contentHTML}`; } } class _MDStrikethroughSpan extends _MDSpan { /** @var {_MDSpan} content */ #content; /** * @param {_MDSpan} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); return `${contentHTML}`; } } class _MDCodeSpan extends _MDSpan { /** @var {_MDSpan} content */ #content; /** * @param {_MDSpan} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); return `${contentHTML}`; } } class _MDImageSpan extends _MDSpan { /** @var {String} */ source; /** @var {String|null} */ alt; /** * @param {String} source */ constructor(source, alt) { super(); this.source = source; this.alt = alt; } toHTML(config) { let escapedSource = this.source.replace('"', '"'); let html = `${altEscaped}${this.symbol}`; } } class _MDAbbreviationSpan extends _MDSpan { /** @var {String} */ abbreviation; /** @var {String|null} definition */ definition; /** * @param {String} abbreviation * @param {String|null} definition */ constructor(abbreviation, definition=null) { super(); this.abbreviation = abbreviation; this.definition = definition; } toHTML(config) { let definitionEscaped = this.definition.replace('"', '"'); return `${this.abbreviation}`; } } // -- Blocks ---------------------------------------------------------------- class _MDBlock { toHTML(config) { throw new Error(self.constructor.name + ".toHTML not implemented"); } /** * @param {_MDBlock[]} blocks * @returns {String} */ static toHTML(blocks, config) { return blocks.map((block) => block.toHTML(config)).join("\n"); } } class _MDMultiBlock extends _MDBlock { /** @var {_MDBlock[]} */ #blocks; /** * @param {_MDBlock[]} blocks */ constructor(blocks) { super(); this.#blocks = blocks; } toHTML(config) { return _MDBlock.toHTML(this.#blocks, config); } } class _MDParagraphBlock extends _MDBlock { /** @var {_MDBlock} */ content; /** * @param {_MDBlock} content */ constructor(content) { super(); this.content = content; } toHTML(config) { let contentHTML = this.content.toHTML(config); return `

${contentHTML}

\n`; } } class _MDHeaderBlock extends _MDBlock { /** @var {number} */ level; /** @var {_MDBlock} */ content; /** * @param {number} level * @param {_MDBlock} content */ constructor(level, content) { super(); this.level = level; this.content = content; } toHTML(config) { let contentHTML = this.content.toHTML(config); return `${contentHTML}\n`; } } class _MDBlockquoteBlock extends _MDBlock { /** @var {_MDBlock[]} */ content; /** * @param {_MDBlock[]} content */ constructor(content) { super(); this.content = content; } toHTML(config) { let contentHTML = _MDBlock.toHTML(this.content, config); return `
\n${contentHTML}\n
`; } } class _MDUnorderedListBlock extends _MDBlock { /** @var {_MDListItemBlock[]} */ items; /** * @param {_MDListItemBlock[]} items */ constructor(items) { super(); this.items = items; } toHTML(config) { let contentHTML = _MDBlock.toHTML(this.items); return ``; } } class _MDOrderedListBlock extends _MDBlock { /** @var {_MDListItemBlock[]} */ items; /** * @param {_MDListItemBlock[]} items */ constructor(items) { super(); this.items = items; } toHTML(config) { let contentHTML = _MDBlock.toHTML(this.items); return `
    \n${contentHTML}\n
`; } } class _MDListItemBlock extends _MDBlock { /** @var {_MDBlock} */ content; /** * @param {_MDBlock} content */ constructor(content) { super(); this.content = content; } toHTML(config) { let contentHTML = this.content.toHTML(config); return `
  • ${contentHTML}
  • `; } } class _MDCodeBlock extends _MDBlock { /** @var {String} */ #code; /** * @param {String} code */ constructor(code) { super(); this.#code = code; } toHTML(config) { return `
    ${this.#code}
    `; } } class _MDHorizontalRuleBlock extends _MDBlock { toHTML(config) { return "
    \n"; } } class _MDTableCellBlock extends _MDBlock { /** @var {_MDBlock} */ #content; /** @var {_MDHAlign|null} */ align = null; /** * @param {_MDBlock} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); let alignAttribute = _MDHAlign.toHTMLAttribute(this.align); return `${contentHTML}`; } } class _MDTableHeaderCellBlock extends _MDTableCellBlock { toHTML(config) { let html = super.toHTML(config); let groups = /^$/.exec(html); return ``; } } class _MDTableRowBlock extends _MDBlock { /** @var {_MDTableCellBlock[]|_MDTableHeaderCellBlock[]} */ #cells; /** * @param {_MDTableCellBlock[]|_MDTableHeaderCellBlock[]} cells */ constructor(cells) { super(); this.#cells = cells; } /** * @param {_MDHAlign[]} alignments */ applyAlignments(alignments) { for (var i = 0; i < this.#cells.length; i++) { let cell = this.#cells[i]; let align = i < alignments.length ? alignments[i] : null; cell.align = align; } } toHTML(config) { let cellsHTML = _MDBlock.toHTML(this.#cells, config); return `\n${cellsHTML}\n`; } } class _MDTableBlock extends _MDBlock { /** @var {_MDTableRowBlock} */ #headerRow; /** @var {_MDTableRowBlock[]} */ #bodyRows; /** * @param {_MDTableRowBlock} headerRow * @param {_MDTableRowBlock[]} bodyRows */ constructor(headerRow, bodyRows) { super(); this.#headerRow = headerRow; this.#bodyRows = bodyRows; } toHTML(config) { let headerRowHTML = this.#headerRow.toHTML(config); let bodyRowsHTML = _MDBlock.toHTML(this.#bodyRows); return `\n\n${headerRowHTML}\n\n\n${bodyRowsHTML}\n\n
    `; } } class _MDDefinitionListBlock extends _MDBlock { /** @var {_MDBlock[]} */ #content; /** * @param {_MDBlock[]} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = _MDBlock.toHTML(this.#content); return `
    \n${contentHTML}\n
    `; } } class _MDDefinitionTermBlock extends _MDBlock { /** @var {_MDBlock} */ #content; /** * @param {_MDBlock} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); return `
    ${contentHTML}
    `; } } class _MDDefinitionDefinitionBlock extends _MDBlock { /** @var {_MDBlock} */ #content; /** * @param {_MDBlock} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { let contentHTML = this.#content.toHTML(config); return `
    ${contentHTML}
    `; } } class _MDFootnoteBlock extends _MDBlock { /** @var {String} */ #id; /** @var {_MDBlock} */ #content; /** * @param {String} id * @param {_MDBlock} content */ constructor(id, content) { super(); this.#id = id; this.#content = content; } toHTML(config) { // TODO: Forward and back links // TODO: Deferring footnotes to end of document //
      //
    1. //

      Footnote ↩︎

      //
    2. //
    return ''; } } class _MDInlineBlock extends _MDBlock { /** @var {_MDSpan[]} */ #content; /** * @param {_MDSpan[]} content */ constructor(content) { super(); this.#content = content; } toHTML(config) { return _MDSpan.toHTML(this.#content); } } class _MDHTMLTag { /** @var {String} */ fullTag; /** @var {String} */ tagName; /** @var {Boolean} */ isCloser; /** @var {Object} */ attributes; /** * @param {String} fullTag * @param {String} tagName * @param {Boolean} isCloser * @param {Object} attributes */ constructor(fullTag, tagName, isCloser, attributes) { this.fullTag = fullTag; this.tagName = tagName; this.isCloser = isCloser; this.attributes = attributes; } } class _MDState { /** @var {String[]} */ lines = []; /** @var {Object} */ #abbreviations = {}; /** @var {Object} */ #footnotes = {}; /** @var {number} */ p = 0; /** @var {_MDState|null} */ #parent = null; /** @var {Object} */ get abbreviations() { (this.#parent) ? this.#parent.abbreviations : this.#abbreviations; } /** @var {Object} */ get footnotes() { (this.#parent) ? this.#parent.footnotes : this.#footnotes; } /** * @param {String[]} lines */ copy(lines) { let cp = new _MDState(); cp.#parent = this; cp.lines = lines; cp.p = 0; return cp; } /** * @param {String} abbreviation * @param {String} definition */ defineAbbreviation(abbreviation, definition) { if (this.#parent) { this.#parent.defineAbbreviation(abbreviation, definition); } else { this.#abbreviations[abbreviation] = definition; } } /** * @param {String} symbol * @param {_MDBlock} footnote */ defineFootnote(symbol, footnote) { if (this.#parent) { this.#parent.defineFootnote(symbol, footnote); } else { this.#footnotes[symbol] = footnote; } } hasLines(minCount, p=-1) { let relativeTo = (p < 0) ? this.p : p; return relativeTo + minCount <= this.lines.length; } } class MDConfig { } class Markdown { /** * @param {String} line */ static #stripIndent(line, count=1) { let regex = new RegExp(`^(: {1,4}|\\t){${count}}`); return line.replace(regex, ''); } /** * @param {String} line * @param {Boolean} fullIndentsOnly * @returns {Number} indent count */ static #countIndents(line, fullIndentsOnly=false) { var count = 0; var lastLine = line; while (line.length > 0) { line = (fullIndentsOnly) ? line.replace(/^(?: {4}|\t)/, '') : line.replace(/^(?: {1,4}|\t)/, ''); if (line != lastLine) { count++; } else { break; } lastLine = line; } return count; } /** * @param {_MDState} state * @returns {_MDBlock[]} */ static #readBlocks(state) { var blocks = []; while (state.hasLines(1)) { let block = this.#readNextBlock(state); if (block) { blocks.push(block); } else { break; } } return blocks; } /** * @param {_MDState} state * @returns {_MDBlock} */ static #readNextBlock(state) { while (state.hasLines(1) && state.lines[state.p].trim().length == 0) { console.info("Skipping blank line " + state.p); state.p++; } var block; block = this.#readUnderlineHeader(state); if (block) return block; block = this.#readHashHeader(state); if (block) return block; block = this.#readBlockQuote(state); if (block) return block; block = this.#readUnorderedList(state); if (block) return block; block = this.#readOrderedList(state); if (block) return block; block = this.#readFencedCodeBlock(state); if (block) return block; block = this.#readIndentedCodeBlock(state); if (block) return block; block = this.#readHorizontalRule(state); if (block) return block; block = this.#readTable(state); if (block) return block; block = this.#readDefinitionList(state); if (block) return block; block = this.#readFootnoteDef(state); if (block) return block; block = this.#readAbbreviationDef(state); if (block) return block; block = this.#readParagraph(state); if (block) return block; return null; } static #htmlTagNameFirstRegex = /[a-z]/i; static #htmlTagNameMedialRegex = /[a-z0-9]/i; static #htmlAttributeNameFirstRegex = /[a-z]/i; static #htmlAttributeNameMedialRegex = /[a-z0-9-]/i; static #whitespaceCharRegex = /\s/; /** * @param {String} line * @returns {_MDHTMLTag|null} HTML tag if possible */ static #htmlTag(line) { let expectOpenBracket = 0; let expectCloserOrName = 1; let expectName = 2; let expectAttributeNameOrEnd = 3; let expectEqualsOrAttributeOrEnd = 4; let expectAttributeValue = 5; let expectCloseBracket = 6; var isCloser = false; var tagName = ''; var attributeName = ''; var attributeValue = ''; var attributeQuote = null; var attributes = {}; var fullTag = null; let endAttribute = function() { if (attributeName.length > 0) { if (attributeValue.length > 0 || attributeQuote) { attributes[attributeName] = attributeValue; } else { attributes[attributeName] = true; } } attributeName = ''; attributeValue = ''; attributeQuote = null; }; var expect = expectOpenBracket; for (var p = 0; p < line.length && fullTag === null; p++) { let ch = line.substring(p, p + 1); let isWhitespace = this.#whitespaceCharRegex.exec(ch) !== null; switch (expect) { case expectOpenBracket: if (ch != '<') return null; expect = expectCloserOrName; break; case expectCloserOrName: if (ch == '/') { isCloser = true; } else { p--; } expect = expectName; break; case expectName: if (tagName.length == 0) { if (this.#htmlTagNameFirstRegex.exec(ch) === null) return null; tagName += ch; } else { if (this.#htmlTagNameMedialRegex.exec(ch)) { tagName += ch; } else { p--; expect = (isCloser) ? expectCloseBracket : expectAttributeNameOrEnd; } } break; case expectAttributeNameOrEnd: if (attributeName.length == 0) { if (isWhitespace) { // skip whitespace } else if (ch == '/') { expect = expectCloseBracket; } else if (ch == '>') { fullTag = line.substring(0, p + 1); break; } else if (this.#htmlAttributeNameFirstRegex.exec(ch)) { attributeName += ch; } else { return null; } } else if (isWhitespace) { expect = expectEqualsOrAttributeOrEnd; } else if (ch == '/') { endAttribute(); expect = expectCloseBracket; } else if (ch == '>') { endAttribute(); fullTag = line.substring(0, p + 1); break; } else if (ch == '=') { expect = expectAttributeValue; } else if (this.#htmlAttributeNameMedialRegex.exec(ch)) { attributeName += ch; } else { return null; } break; case expectEqualsOrAttributeOrEnd: if (ch == '=') { expect = expectAttributeValue; } else if (isWhitespace) { // skip whitespace } else if (ch == '/') { expect = expectCloseBracket; } else if (ch == '>') { fullTag = line.substring(0, p + 1); break; } else if (this.#htmlAttributeNameFirstRegex.exec(ch)) { endAttribute(); expect = expectAttributeNameOrEnd; p--; } break; case expectAttributeValue: if (attributeValue.length == 0) { if (attributeQuote === null) { if (isWhitespace) { // skip whitespace } else if (ch == '"' || ch == "'") { attributeQuote = ch; } else { attributeQuote = ''; // explicitly unquoted p--; } } else { if (ch === attributeQuote) { // Empty string endAttribute(); expect = expectAttributeNameOrEnd; } else if (attributeQuote === '' && (ch == '/' || ch == '>')) { return null; } else { attributeValue += ch; } } } else { if (ch === attributeQuote) { endAttribute(); expect = expectAttributeNameOrEnd; } else if (attributeQuote === '' && isWhitespace) { endAttribute(); expect = expectAttributeNameOrEnd; } else { attributeValue += ch; } } break; case expectCloseBracket: if (isWhitespace) { // ignore whitespace } else if (ch == '>') { fullTag = line.substring(0, p + 1); break; } break; } } if (fullTag === null) return null; endAttribute(); return new _MDHTMLTag(fullTag, tagName, isCloser, attributes); } static #textWhitespaceRegex = /^(\s*)(?:(\S|\S.*\S)(\s*?))?$/; // 1=leading WS, 2=text, 3=trailing WS // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only. static #baseURLRegex = /(?:(?:(?:[a-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[a-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[a-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/i; // Modified from https://emailregex.com/ to remove capture groups. static #baseEmailRegex = /(?:(?:[^<>()\[\]\\.,;:\s@"]+(?:\.[^<>()\[\]\\.,;:\s@"]+)*)|(?:".+"))@(?:(?:\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(?:(?:[a-z\-0-9]+\.)+[a-z]{2,}))/i; static #footnoteWithTitleRegex = /^\[\^\s*([^\]"]+?)\s+"(.*?)"\s*\]/; // 1=symbol, 2=title static #footnoteRegex = /^\[\^\s*([^\]]+?)\s*\]/; // 1=symbol static #labelRegex = /^\[(.*?)\]/; // 1=content static #urlWithTitleRegex = /^\((\S+?)\s+"(.*?)"\)/i; static #urlRegex = /^\((\S+?)\)/i; // 1=URL static #emailWithTitleRegex = new RegExp("^\\(\\s*(" + this.#baseEmailRegex.source + ")\\s+\"(.*?)\"\\s*\\)", "i"); // 1=email, 2=title static #emailRegex = new RegExp("^\\(\\s*(" + this.#baseEmailRegex.source + ")\\s*\\)", "i"); // 1=email static #simpleURLRegex = new RegExp("^<" + this.#baseURLRegex.source + ">", "i"); // 1=URL static #simpleEmailRegex = new RegExp("^<" + this.#baseEmailRegex.source + ">", "i"); // 1=email /** * @param {String} line * @returns {_MDToken[]} tokens */ static #tokenize(line) { var tokens = []; var text = ''; var expectLiteral = false; var groups = null; var tag = null; const endText = function() { if (text.length == 0) return; let textGroups = Markdown.#textWhitespaceRegex.exec(text); if (textGroups !== null) { if (textGroups[1].length > 0) { tokens.push(new _MDToken(textGroups[1], _MDTokenType.Whitespace, textGroups[1])); } if (textGroups[2] !== undefined && textGroups[2].length > 0) { tokens.push(new _MDToken(textGroups[2], _MDTokenType.Text, textGroups[2])); } if (textGroups[3] !== undefined && textGroups[3].length > 0) { tokens.push(new _MDToken(textGroups[3], _MDTokenType.Whitespace, textGroups[3])); } } else { tokens.push(new _MDToken(text, _MDTokenType.Text, text)); } text = ''; } for (var p = 0; p < line.length; p++) { let ch = line.substring(p, p + 1); let remainder = line.substring(p); if (expectLiteral) { text += ch; expectLiteral = false; continue; } if (ch == '\\') { expectLiteral = true; } else if (ch == '*') { endText(); tokens.push(new _MDToken(ch, _MDTokenType.Asterisk)); } else if (ch == '_') { endText(); tokens.push(new _MDToken(ch, _MDTokenType.Underscore)); } else if (ch == '`') { endText(); tokens.push(new _MDToken(ch, _MDTokenType.Backtick)); } else if (ch == '~') { endText(); tokens.push(new _MDToken(ch, _MDTokenType.Tilde)); } else if (ch == '!') { endText(); tokens.push(new _MDToken(ch, _MDTokenType.Bang)); } else if (groups = this.#footnoteWithTitleRegex.exec(remainder)) { // Footnote with title [^1 "Foo"] endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.Footnote, groups[1], groups[2])); p += groups[0].length - 1; } else if (groups = this.#footnoteRegex.exec(remainder)) { // Footnote without title [^1] endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.Footnote, groups[1])); p += groups[0].length - 1; } else if (groups = this.#labelRegex.exec(remainder)) { // Label/ref for link/image [Foo] endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.Label, groups[1])); p += groups[0].length - 1; } else if (groups = this.#urlWithTitleRegex.exec(remainder)) { // URL with title (https://foo "Bar") endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.URL, groups[1], groups[2])); p += groups[0].length - 1; } else if (groups = this.#emailWithTitleRegex.exec(remainder)) { // Email address with title (user@example.com "Foo") endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.Email, groups[1])); p += groups[0].length - 1; } else if (groups = this.#urlRegex.exec(remainder)) { // URL (https://example.com) endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.URL, groups[1])); p += groups[0].length - 1; } else if (groups = this.#emailRegex.exec(remainder)) { // Email (user@example.com) endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.Email, groups[1])); p += groups[0].length - 1; } else if (groups = this.#simpleURLRegex.exec(remainder)) { // Simple URL endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.SimpleLink, groups[1])); p += groups[0].length - 1; } else if (groups = this.#simpleEmailRegex.exec(remainder)) { // Simple email endText(); tokens.push(new _MDToken(groups[0], _MDTokenType.SimpleEmail, groups[1])); p += groups[0].length - 1; } else if (tag = this.#htmlTag(remainder)) { endText(); tokens.push(new _MDToken(tag.fullTag, _MDTokenType.HTMLTag, tag.fullTag, null, tag)); p += tag.fullTag.length - 1; } else { text += ch; } } endText(); return tokens; } static #firstTokenIndex(tokens, pattern, startIndex=0) { for (var t = startIndex; t < tokens.length; t++) { var matchedAll = true; for (var p = 0; p < pattern.length; p++) { var t0 = t + p; if (t0 >= tokens.length) return null; let token = tokens[t0]; let elem = pattern[p]; if (elem == _MDTokenType.META_AnyNonWhitespace) { if (token instanceof _MDToken && token.type == _MDTokenType.Whitespace) { matchedAll = false; break; } } else { if (!(token instanceof _MDToken) || token.type != elem) { matchedAll = false; break; } } } if (matchedAll) { return t; } } return null; } /** * @param {_MDState} state * @param {String} line * @returns {_MDBlock|null} */ static #readInline(state, line) { var tokens = this.#tokenize(line); return new _MDInlineBlock(this.#tokensToSpans(tokens)); } /** * @param {Array} tokens * @returns {_MDSpan[]} spans */ static #tokensToSpans(tokens) { var spans = tokens.slice(0, tokens.length); var anyChanges = false; var index, index0; // First pass - contiguous constructs do { anyChanges = false; // ![alt](image.jpg) if ((index = this.#firstTokenIndex(spans, [ _MDTokenType.Bang, _MDTokenType.Label, _MDTokenType.URL, ])) !== null) { let alt = spans[index + 1]; let url = spans[index + 2]; spans.splice(index, 3, new _MDImageSpan(url.content, alt.content)); anyChanges = true; } // ![alt][ref] else if ((index = this.#firstTokenIndex(spans, [ _MDTokenType.Bang, _MDTokenType.Label, _MDTokenType.Label, ])) !== null) { let alt = spans[index + 1]; let ref = spans[index + 2]; spans.splice(index, 3, new _MDReferencedImageSpan(ref.content, alt.content)); anyChanges = true; } // [text](link.html) else if ((index = this.#firstTokenIndex(spans, [ _MDTokenType.Label, _MDTokenType.URL, ])) !== null) { let text = spans[index + 0]; let url = spans[index + 1]; spans.splice(index, 2, new _MDLinkSpan(url.content, this.#readInline(state, text.content))); anyChanges = true; } // [text][ref] else if ((index = this.#firstTokenIndex(spans, [ _MDTokenType.Label, _MDTokenType.Label, ])) !== null) { let text = spans[index + 0]; let ref = spans[index + 1]; spans.splice(index, 2, new _MDReferencedLinkSpan(ref, this.#readInline(state, text))); anyChanges = true; } // [^1] else if ((index = this.#firstTokenIndex(spans, [ _MDTokenType.Footnote, ])) !== null) { let symbol = spans[index]; spans.splice(index, 1, new _MDFootnoteReferenceSpan(symbol.content)); anyChanges = true; } } while (anyChanges); /** * @param {_MDTokenType[]} delimiter * @param {Set<_MDTokenType>} disallowedInnerTokens */ const matchPair = function(delimiter, disallowedInnerTokens=new Set()) { var searchStart = 0; var hasNewStart = false; do { hasNewStart = false; let startIndex = Markdown.#firstTokenIndex(spans, delimiter.concat(_MDTokenType.META_AnyNonWhitespace), searchStart); if (startIndex === null) return null; let endIndex = Markdown.#firstTokenIndex(spans, [_MDTokenType.META_AnyNonWhitespace].concat(delimiter), startIndex + delimiter.length); if (endIndex === null) return null; let contentTokens = spans.slice(startIndex + delimiter.length, endIndex + 1); if (disallowedInnerTokens.size > 0) { for (const token of contentTokens) { if (token instanceof _MDToken && disallowedInnerTokens.has(token.type)) { searchStart = startIndex + 1; hasNewStart = true; break; } } if (hasNewStart) continue; } let contentSpans = Markdown.#tokensToSpans(contentTokens); return { startIndex: startIndex, toDelete: endIndex - startIndex + delimiter.length + 1, content: new _MDMultiSpan(contentSpans), }; } while (hasNewStart); return null; }; var spanMatch = null; // Second pass - paired constructs. Prioritize pairs with no other paired tokens inside. const delimiterTokens = new Set([ _MDTokenType.Backtick, _MDTokenType.Tilde, _MDTokenType.Asterisk, _MDTokenType.Underscore ]); for (let disallowed of [ delimiterTokens, new Set() ]) { do { anyChanges = false; // ``code`` if (spanMatch = matchPair([ _MDTokenType.Backtick, _MDTokenType.Backtick ], disallowed)) { spans.splice(spanMatch.startIndex, spanMatch.toDelete, new _MDCodeSpan(spanMatch.content)); anyChanges = true; } // ~~strike~~ else if (spanMatch = matchPair([ _MDTokenType.Tilde, _MDTokenType.Tilde ], disallowed)) { spans.splice(spanMatch.startIndex, spanMatch.toDelete, new _MDStrikethroughSpan(spanMatch.content)); anyChanges = true; } // **strong** __strong__ else if (spanMatch = (matchPair([ _MDTokenType.Asterisk, _MDTokenType.Asterisk ], disallowed) || matchPair([ _MDTokenType.Underscore, _MDTokenType.Underscore ], disallowed))) { spans.splice(spanMatch.startIndex, spanMatch.toDelete, new _MDStrongSpan(spanMatch.content)); anyChanges = true; } // `code` if (spanMatch = matchPair([ _MDTokenType.Backtick ], disallowed)) { spans.splice(spanMatch.startIndex, spanMatch.toDelete, new _MDCodeSpan(spanMatch.content)); anyChanges = true; } // ~strike~ else if (spanMatch = matchPair([ _MDTokenType.Tilde ], disallowed)) { spans.splice(spanMatch.startIndex, spanMatch.toDelete, new _MDStrikethroughSpan(spanMatch.content)); anyChanges = true; } // *strong* _strong_ else if (spanMatch = (matchPair([ _MDTokenType.Asterisk ], disallowed) || matchPair([ _MDTokenType.Underscore ], disallowed))) { spans.splice(spanMatch.startIndex, spanMatch.toDelete, new _MDEmphasisSpan(spanMatch.content)); anyChanges = true; } } while (anyChanges); } spans = spans.map(function(span) { if (span instanceof _MDToken) { return new _MDTextSpan(span.original); } else if (span instanceof _MDSpan) { return span; } else { throw new Error(`Unexpected span type ${span.constructor.name}`); } }); return spans; } /** * Reads the contents of something like a list item * @param {_MDState} state * @param {number} firstLineStartPos * @param {RegExp} stopRegex * @param {Boolean} inList * @returns {_MDBlock} */ static #readInteriorContent(state, firstLineStartPos, stopRegex, inList=false) { // FIXME: When reading
  • content need to detect nested list without // a blank line var p = state.p; var seenBlankLine = false; var needsBlocks = false; var lines = []; while (p < state.lines.length) { let line = state.lines[p++]; if (p == state.p + 1) { line = line.substring(firstLineStartPos); } let isBlank = line.trim().length == 0; let isIndented = /^\s+/.exec(line) !== null; if (isBlank) { seenBlankLine = true; lines.push(line.trim()); } else if (stopRegex && stopRegex.exec(line)) { p--; break; } else if (isIndented) { if (seenBlankLine) { needsBlocks = true; } lines.push(this.#stripIndent(line)); } else { if (seenBlankLine) { p--; break; } lines.push(this.#stripIndent(line)); } } while (lines.length > 0 && lines[lines.length - 1].trim().length == 0) { lines.pop(); } if (needsBlocks) { let substate = state.copy(lines); let blocks = this.#readBlocks(substate); state.p = p; return new _MDMultiBlock(blocks); } else { state.p = p; return this.#readInline(state, lines.join("\n")); } } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readUnderlineHeader(state) { var p = state.p; if (!state.hasLines(2)) return null; let contentLine = state.lines[p++].trim(); let underLine = state.lines[p++].trim(); if (contentLine == '') return null; if (/^=+$/.exec(underLine)) { state.p = p; return new _MDHeaderBlock(1, this.#readInline(state, contentLine)); } if (/^\-+$/.exec(underLine)) { state.p = p; return new _MDHeaderBlock(2, this.#readInline(state, contentLine)); } return null; } static #hashHeaderRegex = /^(#{1,6})\s*([^#].*)\s*$/; // 1=hashes, 2=content /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readHashHeader(state) { var p = state.p; var groups = this.#hashHeaderRegex.exec(state.lines[p++]); if (groups === null) return null; state.p = p; return new _MDHeaderBlock(groups[1].length, this.#readInline(state, groups[2])); } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readBlockQuote(state) { var blockquoteLines = []; var p = state.p; while (p < state.lines.length) { let line = state.lines[p++]; if (line.startsWith(">")) { blockquoteLines.push(line); } else { break; } } if (blockquoteLines.length > 0) { let contentLines = blockquoteLines.map(function(line) { return line.substring(1).replace(/^ {0,3}\t?/, ''); }); let substate = state.copy(contentLines); let quotedBlocks = this.#readBlocks(substate); state.p = p; return new _MDBlockquoteBlock(quotedBlocks); } return null; } static #unorderedListRegex = /^([\*\+\-]\s+)(.*)$/; // 1=bullet, 2=content static #unorderedListItemRegex = /^[\*\+\-]\s+/; /** * @param {_MDState} state * @returns {_MDListItemBlock|null} */ static #readUnorderedListItem(state) { var p = state.p; let line = state.lines[p]; let groups = this.#unorderedListRegex.exec(line); if (groups === null) return null; return new _MDListItemBlock(this.#readInteriorContent(state, groups[1].length, this.#unorderedListItemRegex, true)); } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readUnorderedList(state) { var items = []; var item = null; do { item = this.#readUnorderedListItem(state); if (item) items.push(item); } while (item); if (items.length == 0) return null; return new _MDUnorderedListBlock(items); } static #orderedListRegex = /^(\d+)(\.\s+)(.*)$/; // 1=number, 2=dot, 3=content static #orderedListItemRegex = /^\d+\.\s+/; /** * @param {_MDState} state * @returns {_MDListItemBlock|null} */ static #readOrderedListItem(state) { var p = state.p; let line = state.lines[p]; let groups = this.#orderedListRegex.exec(line); if (groups === null) return null; return new _MDListItemBlock(this.#readInteriorContent(state, groups[1].length + groups[2].length, this.#orderedListItemRegex, true)); } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readOrderedList(state) { var items = []; var item = null; do { item = this.#readOrderedListItem(state); if (item) items.push(item); } while (item); if (items.length == 0) return null; return new _MDOrderedListBlock(items); } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readFencedCodeBlock(state) { var p = state.p; if (state.lines[p++].trim() != '```') return null; var codeLines = []; while (state.hasLines(1, p)) { let line = state.lines[p++]; if (line.trim() == '```') { state.p = p; return new _MDCodeBlock(codeLines.join("\n")); } codeLines.push(line); } return null; } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readIndentedCodeBlock(state) { var p = state.p; var codeLines = []; while (state.hasLines(1, p)) { let line = state.lines[p++]; if (this.#countIndents(line, true) < 1) { p--; break; } codeLines.push(this.#stripIndent(line)); } if (codeLines.length == 0) return null; state.p = p; return new _MDCodeBlock(codeLines.join("\n")); } static #horizontalRuleRegex = /^\s*(?:\-(?:\s*\-){2,}|\*(?:\s*\*){2,})\s*$/; /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readHorizontalRule(state) { var p = state.p; let line = state.lines[p++]; if (this.#horizontalRuleRegex.exec(line)) { state.p = p; return new _MDHorizontalRuleBlock(); } return null; } /** * @param {_MDState} state * @param {Boolean} isHeader * @return {_MDTableRowBlock|null} */ static #readTableRow(state, isHeader) { if (!state.hasLines(1)) return null; var p = state.p; let line = state.lines[p++].trim(); if (/.*\|.*/.exec(line) === null) return null; if (line.startsWith('|')) line = line.substring(1); if (line.endsWith('|')) line = line.substring(0, line.length - 1); let cellTokens = line.split('|'); let cells = cellTokens.map(function(token) { let content = Markdown.#readInline(state, token); return isHeader ? new _MDTableHeaderCellBlock(content) : new _MDTableCellBlock(content); }); state.p = p; return new _MDTableRowBlock(cells); } /** * @param {String} line * @returns {_MDHAlign[]} */ static #parseColumnAlignments(line) { line = line.trim(); if (line.startsWith('|')) line = line.substring(1); if (line.endsWith('|')) line = line.substring(0, line.length - 1); return line.split('|').map(function(token) { token = token.trim(); if (token.startsWith(':')) { if (token.endsWith(':')) { return _MDHAlign.Center; } return _MDHAlign.Left; } else if (token.endsWith(':')) { return _MDHAlign.Right; } return null; }); } static #tableDividerRegex = /^\s*[|]?(?:\s*[:]?-+[:]?\s*\|)(?:\s*[:]?-+[:]?\s*)[|]?\s*$/; /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readTable(state) { if (!state.hasLines(2)) return null; let startP = state.p; let headerRow = this.#readTableRow(state, true); if (headerRow === null) { state.p = startP; return null; } let dividerLine = state.lines[state.p++]; let dividerGroups = this.#tableDividerRegex.exec(dividerLine); if (dividerGroups === null) { state.p = startP; return null; } let columnAlignments = this.#parseColumnAlignments(dividerLine); headerRow.applyAlignments(columnAlignments); var bodyRows = []; while (state.hasLines(1)) { let row = this.#readTableRow(state, false); if (row === null) break; row.applyAlignments(columnAlignments); bodyRows.push(row); } return new _MDTableBlock(headerRow, bodyRows); } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readDefinitionList(state) { // TODO: Definition list return null; } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readFootnoteDef(state) { // TODO: Footnote definition return null; } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readAbbreviationDef(state) { // TODO: Abbreviation definition return null; } /** * @param {_MDState} state * @returns {_MDBlock|null} */ static #readParagraph(state) { var paragraphLines = []; var p = state.p; while (p < state.lines.length) { let line = state.lines[p++]; if (line.trim().length == 0) { break; } paragraphLines.push(line); } if (paragraphLines.length > 0) { state.p = p; let content = paragraphLines.join("\n"); return new _MDParagraphBlock(this.#readInline(state, content)); } return null; } /** * @param {String} markdown * @returns {String} HTML */ static toHTML(markdown, config=new MDConfig()) { var state = new _MDState(); let lines = markdown.split(/(?:\n|\r|\r\n)/); state.lines = lines; let blocks = this.#readBlocks(state); let html = _MDBlock.toHTML(blocks); return html; } }