`
* element.
*
* Supports `MDTagModifier` suffix.
*/
class MDFencedCodeBlockReader extends MDReader {
public function readBlock(MDState $state): ?MDBlockNode {
if (!$state->hasLines(2)) return null;
$p = $state->p;
$openFenceLine = $state->lines[$p++];
[$openFenceLine, $modifier] = MDTagModifier::fromLine($openFenceLine, $state);
if (!mb_eregi('```\\s*([a-z0-9]*)\\s*$', $openFenceLine, $groups)) return null;
$language = MDUtils::makeString($groups[1], null);
$codeLines = [];
while ($state->hasLines(1, $p)) {
$line = $state->lines[$p++];
if (trim($line) == '```') {
$state->p = $p;
$block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
if ($modifier) $modifier->applyTo($block);
return $block;
}
array_push($codeLines, $line);
}
return null;
}
}
/**
* Block reader for code blocks denoted by indenting text.
*/
class MDIndentedCodeBlockReader extends MDReader {
public function readBlock(MDState $state): ?MDBlockNode {
$p = $state->p;
$codeLines = [];
while ($state->hasLines(1, $p)) {
$line = $state->lines[$p++];
if (MDUtils::countIndents($line, true) < 1) {
$p--;
break;
}
array_push($codeLines, MDUtils::stripIndent($line));
}
if (sizeof($codeLines) == 0) return null;
$state->p = $p;
return new MDCodeBlockNode(implode("\n", $codeLines));
}
}
/**
* Block reader for horizontal rules. Composed of three or more hypens or
* asterisks on a line by themselves, with or without intermediate whitespace.
*/
class MDHorizontalRuleReader extends MDReader {
private const horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
public function readBlock(MDState $state): ?MDBlockNode {
$p = $state->p;
$line = $state->lines[$p++];
[$line, $modifier] = MDTagModifier::fromLine($line, $state);
if (mb_eregi(self::horizontalRuleRegex, $line)) {
$state->p = $p;
$block = new MDHorizontalRuleNode();
if ($modifier) $modifier->applyTo($block);
return $block;
}
return null;
}
public function compareBlockOrdering(MDReader $other): int {
if ($other instanceof MDUnorderedListReader) {
return -1;
}
return 0;
}
}
/**
* Block reader for tables.
*
* Supports `MDTagModifier` suffix.
*/
class MDTableReader extends MDReader {
/**
* If cell contents begin with `=`, treat entire contents as plaintext.
* Used by spreadsheet add-on to prevent equation operators from being
* interpreted as markdown.
* @type {boolean}
*/
public bool $preferFormulas = false;
private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
if (!$state->hasLines(1)) return null;
$p = $state->p;
$line = MDTagModifier::strip(trim($state->lines[$p++]));
if (!mb_eregi('.*\\|.*', $line)) return null;
if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
$cellTokens = explode('|', $line);
$cells = array_map(function($token) use ($state, $isHeader) {
$trimmedToken = trim($token);
if ($this->preferFormulas && strpos($trimmedToken, '=') !== false) {
$content = $this->preserveFormula($state, $trimmedToken);
if ($content === null) {
$content = $state->inlineMarkdownToNode($trimmedToken);
}
} else {
$content = $state->inlineMarkdownToNode($trimmedToken);
}
return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
}, $cellTokens);
$state->p = $p;
return new MDTableRowNode($cells);
}
/**
* @param MDState $state
* @param string $cellContents
* @return ?MDNode
*/
private function preserveFormula(MDState $state, string $cellContents): ?MDNode {
// Up to three prefix punctuation patterns, formula, then three matching
// suffixes. Not guaranteed to catch every possible syntax but an awful lot.
// Using preg_match instead for... reasons.
$regex = '/^([^a-z0-9\\s]*)([^a-z0-9\\s]*)([^a-z0-9\\s]*)(=.*)\\3\\2\\1$/i';
if (!preg_match($regex, $cellContents, $groups)) {
return null;
}
$prefix = $groups[1] . $groups[2] . $groups[3];
$formula = $groups[4];
if ($prefix === '') {
return new MDTextNode($formula);
}
$suffix = $groups[3] . $groups[2] . $groups[1];
// Parse substitute markdown with the same prefix and suffix but just
// an "x" as content. We'll swap in the unaltered formula into the
// parsed nodes.
$tempInline = $prefix . 'x' . $suffix;
$tempNodes = $state->inlineMarkdownToNodes($tempInline);
if (count($tempNodes) != 1) return null;
$foundText = false;
if ($tempNodes[0] instanceof MDTextNode && $tempNodes[0]->text === 'x') {
$tempNodes[0]->text = $formula;
$foundText = true;
} else {
$tempNodes[0]->visitChildren(function($node) use ($formula, &$foundText) {
if ($node instanceof MDTextNode && $node->text === 'x') {
$node->text = $formula;
$foundText = true;
}
});
}
if (!$foundText) return null;
return $tempNodes[0];
}
/**
* @param string $line
* @return string[]
*/
private function parseColumnAlignments(string $line): array {
$line = trim($line);
if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
return array_map(function($token) {
if (str_starts_with($token, ':')) {
if (str_ends_with($token, ':')) {
return 'center';
}
return 'left';
} elseif (str_ends_with($token, ':')) {
return 'right';
}
return null;
}, mb_split('\\s*\\|\\s*', $line));
}
private const tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
public function readBlock(MDState $state): ?MDBlockNode {
if (!$state->hasLines(2)) return null;
$startP = $state->p;
$firstLine = $state->lines[$startP];
$modifier = MDTagModifier::fromLine($firstLine, $state)[1];
$headerRow = $this->readTableRow($state, true);
if ($headerRow === null) {
$state->p = $startP;
return null;
}
$dividerLine = $state->lines[$state->p++];
if (!mb_eregi(self::tableDividerRegex, $dividerLine, $dividerGroups)) {
$state->p = $startP;
return null;
}
$columnAlignments = $this->parseColumnAlignments($dividerLine);
$bodyRows = [];
while ($state->hasLines(1)) {
$row = $this->readTableRow($state, false);
if ($row === null) break;
array_push($bodyRows, $row);
}
$table = new MDTableNode($headerRow, $bodyRows);
$table->columnAlignments = $columnAlignments;
if ($modifier) $modifier->applyTo($table);
return $table;
}
}
/**
* Block reader for definition lists. Definitions go directly under terms starting
* with a colon.
*/
class MDDefinitionListReader extends MDReader {
public function readBlock(MDState $state): ?MDBlockNode {
$p = $state->p;
$groups;
$termCount = 0;
$definitionCount = 0;
$defLines = [];
while ($state->hasLines(1, $p)) {
$line = $state->lines[$p++];
if (trim($line) === '') {
break;
}
if (mb_eregi('^\\s+', $line)) {
if (sizeof($defLines) == 0) return null;
$defLines[sizeof($defLines) - 1] .= "\n" . $line;
} elseif (mb_eregi('^:\\s+', $line)) {
array_push($defLines, $line);
$definitionCount++;
} else {
array_push($defLines, $line);
$termCount++;
}
}
if ($termCount == 0 || $definitionCount == 0) return null;
$blocks = array_map(function($line) use ($state) {
if (mb_eregi('^:\\s+(.*?)$', $line, $groups)) {
$content = MDUtils::makeString($groups[1]);
return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($content));
} else {
return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
}
}, $defLines);
$state->p = $p;
return new MDDefinitionListNode($blocks);
}
}
/**
* Block reader for defining footnote contents. Footnotes can be defined anywhere
* in the document but will always be rendered at the end of a page or end of
* the document.
*/
class MDFootnoteReader extends MDReader {
private const footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
private const footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
/**
* @param MDState $state
* @param string $symbol
* @param MDNode[] $footnote
*/
private function defineFootnote(MDState $state, string $symbol, array $footnote) {
$footnotes = $state->root()->userInfo['footnotes'] ?? [];
$footnotes[$symbol] = $footnote;
$state->root()->userInfo['footnotes'] = $footnotes;
}
private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
$footnoteInstances = $state->root()->userInfo['footnoteInstances'];
$instances = $footnoteInstances[$symbol] ?? [];
array_push($instances, $unique);
$footnoteInstances[$symbol] = $instances;
$state->root()->userInfo['footnoteInstances'] = $footnoteInstances;
}
private function idForFootnoteSymbol(MDState $state, string $symbol): int {
$footnoteIds = $state->root()->userInfo['footnoteIds'] ?? [];
$existing = $footnoteIds[$symbol] ?? null;
if ($existing !== null) return $existing;
$nextFootnoteId = $state->root()->userInfo['nextFootnoteId'] ?? 1;
$id = $nextFootnoteId++;
$footnoteIds[$symbol] = $id;
$state->root()->userInfo['nextFootnoteId'] = $nextFootnoteId;
$state->root()->userInfo['footnoteIds'] = $footnoteIds;
return $id;
}
public function preProcess(MDState $state) {
$state->root()->userInfo['footnoteInstances'] = [];
$state->root()->userInfo['footnotes'] = [];
$state->root()->userInfo['footnoteIds'] = [];
$state->root()->userInfo['nextFootnoteId'] = 1;
}
public function readBlock(MDState $state): ?MDBlockNode {
$p = $state->p;
if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
$symbol = MDUtils::makeString($groups[1]);
$def = MDUtils::makeString($groups[2]);
while ($state->hasLines(1, $p)) {
$line = $state->lines[$p++];
if (mb_eregi('^\\s+', $line)) {
$def .= "\n" . $line;
} else {
$p--;
break;
}
}
$content = $state->inlineMarkdownToNodes($def);
$this->defineFootnote($state, $symbol, $content);
$state->p = $p;
return new MDBlockNode(); // empty
}
public function readToken(MDState $state, string $line): ?MDToken {
$groups;
if (mb_eregi(self::footnoteWithTitleRegex, $line, $groups)) {
return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
}
if (mb_eregi(self::footnoteRegex, $line, $groups)) {
return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
}
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
$symbol = $match->tokens[0]->content;
array_splice($tokens, $match->index, 1, [new MDFootnoteNode($symbol)]);
return true;
}
return false;
}
/**
* @param MDState $state
* @param MDBlockNode[] $blocks
*/
public function postProcess(MDState $state, array &$blocks) {
$nextOccurrenceId = 1;
foreach ($blocks as $block) {
$block->visitChildren(function($node) use (&$nextOccurrenceId, $state) {
if (!($node instanceof MDFootnoteNode)) return;
$node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
$node->occurrenceId = $nextOccurrenceId++;
$node->displaySymbol = strval($node->footnoteId);
$this->registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
});
}
if (sizeof($state->userInfo['footnotes']) == 0) return;
array_push($blocks, new MDFootnoteListNode());
}
public function compareBlockOrdering(MDReader $other): int {
if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
return -1;
}
return 0;
}
public function compareTokenizeOrdering(MDReader $other): int {
if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
return -1;
}
return 0;
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
return -1;
}
return 0;
}
}
/**
* Block reader for abbreviation definitions. Anywhere the abbreviation appears
* in plain text will have its definition available when hovering over it.
* Definitions can appear anywhere in the document. Their content should only
* contain simple text, not markdown.
*/
class MDAbbreviationReader extends MDReader {
private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
$abbrevs = $state->root()->userInfo['abbreviations'];
$abbrevs[$abbreviation] = $definition;
$state->root()->userInfo['abbreviations'] = $abbrevs;
}
public function preProcess(MDState $state) {
$state->root()->userInfo['abbreviations'] = [];
}
public function readBlock(MDState $state): ?MDBlockNode {
$p = $state->p;
$line = $state->lines[$p++];
if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
$abbrev = MDUtils::makeString($groups[1]);
$def = MDUtils::makeString($groups[2]);
$this->defineAbbreviation($state, $abbrev, $def);
$state->p = $p;
return new MDBlockNode(); // empty
}
/**
* @param MDState $state
* @param MDNode[] $blocks
*/
public function postProcess(MDState $state, array &$blocks) {
$abbreviations = $state->root()->userInfo['abbreviations'];
MDNode::replaceNodes($state, $blocks, function($original) use ($abbreviations) {
if (!($original instanceof MDTextNode)) return null;
$changed = false;
$elems = [ $original->text ]; // mix of strings and MDNodes
for ($i = 0; $i < sizeof($elems); $i++) {
$text = $elems[$i];
if (!is_string($text)) continue;
foreach ($abbreviations as $abbreviation => $definition) {
$index = strpos($text, $abbreviation);
if ($index === false) continue;
$prefix = substr($text, 0, $index);
$suffix = substr($text, $index + strlen($abbreviation));
array_splice($elems, $i, 1, [$prefix,
new MDAbbreviationNode($abbreviation, $definition),
$suffix]);
$i = -1; // start over
$changed = true;
break;
}
}
if (!$changed) return null;
$nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem, $elems);
return new MDNode($nodes);
});
}
}
/**
* Block reader for simple paragraphs. Paragraphs are separated by a blank (or
* whitespace-only) line. This reader is prioritized after every other reader
* since there is no distinguishing syntax.
*/
class MDParagraphReader extends MDReader {
public function readBlock(MDState $state): ?MDBlockNode {
$paragraphLines = [];
$p = $state->p;
while ($state->hasLines(1, $p)) {
$line = $state->lines[$p++];
if (trim($line) === '') {
break;
}
array_push($paragraphLines, $line);
}
if ($state->p == 0 && $p >= sizeof($state->lines)) {
// If it's the entire document don't wrap it in a paragraph
return null;
}
if (sizeof($paragraphLines) > 0) {
$state->p = $p;
$content = implode("\n", $paragraphLines);
return new MDParagraphNode($state->inlineMarkdownToNodes($content));
}
return null;
}
public function compareBlockOrdering(MDReader $other): int {
return 1; // always dead last
}
}
/**
* Abstract base class for readers that look for one or two delimiting tokens
* on either side of some content. E.g. `**strong**`.
*/
class MDSimplePairInlineReader extends MDReader {
// Passes:
// 1. Syntaxes with two delimiting tokens, interior tokens of the same
// kind must be even in number
// 2. Syntaxes with one delimiting token, interior tokens of the same
// kind must be even in number
// 3. Syntaxes with two delimiting tokens, any tokens inside
// 4. Syntaxes with one delimiting token, any tokens inside
public function substitutionPassCount(): int { return 4; }
/**
* Attempts a substitution of a matched pair of delimiting token types.
* If successful, the substitution is performed on `$tokens` and `true` is
* returned, otherwise `false` is returned and the array is untouched.
*
* If `this->substitutionPassCount()` is greater than 1, the first pass
* will reject matches with the delimiting character inside the content
* tokens. If the reader uses a single pass or a subsequent pass is performed
* with multiple pass any contents will be accepted.
*
* @param MDState $state
* @param int $pass pass number, starting with `1`
* @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on
* @param string $nodeClass class of the node to return if matched
* @param MDTokenType $delimiter delimiting token
* @param int $count how many times the token is repeated to form the delimiter
* @param bool $plaintext whether to create `$nodeClass` with a verbatim
* content string instead of parsed `MDNode`s
* @return bool `true` if substitution was performed, `false` if not
*/
public function attemptPair(MDState $state, int $pass, array &$tokens,
string $nodeClass, MDTokenType $delimiter, int $count=1,
bool $plaintext=false): bool {
// We do four passes. #1: doubles without inner tokens, #2: singles
// without inner tokens, #3: doubles with paired inner tokens,
// #4: singles with paired inner tokens
if ($count == 1 && $pass != 2 && $pass != 4) return false;
if ($count > 1 && $pass != 1 && $pass != 3) return false;
$delimiters = array_fill(0, $count, $delimiter);
$isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1;
$match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters,
function($content) use ($nodeClass, $isFirstOfMultiplePasses, $delimiter) {
$firstType = $content[0] instanceof MDToken ? $content[0]->type : null;
$lastType = $content[sizeof($content) - 1] instanceof MDToken ?
$content[sizeof($content) - 1]->type : null;
if ($firstType == MDTokenType::Whitespace) return false;
if ($lastType == MDTokenType::Whitespace) return false;
foreach ($content as $token) {
// Don't allow nesting
if (MDUtils::typename($token) == $nodeClass) return false;
}
if ($isFirstOfMultiplePasses) {
$innerCount = 0;
foreach ($content as $token) {
if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++;
}
if (($innerCount % 2) != 0) return false;
}
return true;
});
if ($match === null) return false;
$state->checkExecutionTime();
if ($plaintext) {
$content = implode('', array_map(fn($token) => $token instanceof MDToken ?
$token->original : $token->toPlaintext($state), $match->contentTokens));
} else {
$content = $state->tokensToNodes($match->contentTokens);
}
$ref = new ReflectionClass($nodeClass);
$node = $ref->newInstanceArgs([ $content ]);
array_splice($tokens, $match->startIndex, $match->totalLength, [$node]);
return true;
}
private static $firstTime = null;
}
/**
* Reader for emphasis syntax. Denoted with a single underscore on either side of
* some text (preferred) or a single asterisk on either side.
*/
class MDEmphasisReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true;
if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true;
return false;
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if ($other instanceof MDStrongReader) {
return 1;
}
return 0;
}
}
/**
* Reader for strong syntax. Denoted with two asterisks on either side of some
* text (preferred) or two underscores on either side. Note that if
* `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
*/
class MDStrongReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true;
if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true;
return false;
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if ($other instanceof MDEmphasisReader) {
return -1;
}
return 0;
}
}
/**
* Reader for strikethrough syntax. Consists of two tildes on either side of
* some text (preferred) or single tildes on either side. Note that if
* `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
*
* The number of recognized tildes can be configured.
*/
class MDStrikethroughReader extends MDSimplePairInlineReader {
public bool $singleTildeEnabled = true;
public bool $doubleTildeEnabled = true;
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($this->singleTildeEnabled) {
if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true;
}
if ($this->doubleTildeEnabled) {
if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true;
}
return false;
}
}
/**
* Reader for underline syntax. Consists of two underscores on either side of
* some text. If used with `MDStrongReader` which also looks for double
* underscores, this reader will take priority.
*/
class MDUnderlineReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2);
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if ($other instanceof MDStrongReader) {
return -1;
}
return 0;
}
}
/**
* Reader for highlight syntax. Consists of pairs of equal signs on either side
* of some text.
*/
class MDHighlightReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2);
}
}
/**
* Reader for inline code syntax. Consists of one or two delimiting backticks
* around text. The contents between the backticks will be rendered verbatim,
* ignoring any inner markdown syntax. To include a backtick inside, escape it
* with a backslash.
*/
class MDCodeSpanReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true;
if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true;
return false;
}
}
/**
* Reader for subscript syntax. Consists of single tildes on either side of
* some text. If used with `MDStrikethroughReader`, this reader will take
* precedence, and strikethrough can only be done with double tildes.
*/
class MDSubscriptReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde);
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if ($other instanceof MDStrikethroughReader) {
return -1;
}
return 0;
}
}
/**
* Reader for superscript syntax. Consists of single caret characters on either
* side of some text.
*/
class MDSuperscriptReader extends MDSimplePairInlineReader {
public function readToken(MDState $state, string $line): ?MDToken {
if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret);
}
}
/**
* Reads a hypertext link. Consists of link text between square brackets
* followed immediately by a URL in parentheses.
*/
class MDLinkReader extends MDReader {
public function readToken(MDState $state, string $line): ?MDToken {
$simpleEmailRegex = "^<(" . MDUtils::baseEmailRegex . ")>";
$simpleURLRegex = "^<(" . MDUtils::baseURLRegex . ")>";
if ($groups = MDToken::tokenizeLabel($line)) {
return new MDToken($groups[0], MDTokenType::Label, $groups[1]);
}
if ($groups = MDToken::tokenizeEmail($line)) {
return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]);
}
if ($groups = MDToken::tokenizeURL($line)) {
return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]);
}
if (mb_eregi($simpleEmailRegex, $line, $groups)) {
return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]);
}
if (mb_eregi($simpleURLRegex, $line, $groups)) {
return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]);
}
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
$text = $match->tokens[0]->content;
$url = $match->tokens[sizeof($match->tokens) - 1]->content;
$title = $match->tokens[sizeof($match->tokens) - 1]->extra;
array_splice($tokens, $match->index, sizeof($match->tokens),
[new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title)]);
return true;
}
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) {
$text = $match->tokens[0]->content;
$email = $match->tokens[sizeof($match->tokens) - 1]->content;
$url = "mailto:{$email}";
$title = $match->tokens[sizeof($match->tokens) - 1]->extra;
array_splice($tokens, $match->index, sizeof($match->tokens),
[new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title)]);
return true;
}
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) {
$token = $match->tokens[0];
$link = "mailto:{$token->content}";
$node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content));
array_splice($tokens, $match->index, 1, [$node]);
return true;
}
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) {
$token = $match->tokens[0];
$link = $token->content;
$node = new MDLinkNode($link, new MDTextNode($link));
array_splice($tokens, $match->index, 1, [$node]);
return true;
}
return false;
}
}
/**
* Reader for referential URL definitions. Consists of link text between square
* brackets followed immediately by a reference symbol also in square brackets.
* The URL can be defined elsewhere on a line by itself with the symbol in square
* brackets, colon, and the URL (and optional title in quotes).
*/
class MDReferencedLinkReader extends MDLinkReader {
public function readBlock(MDState $state): ?MDBlockNode {
$p = $state->p;
$line = $state->lines[$p++];
if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) {
$symbol = $groups[1];
$url = $groups[2];
$title = $groups[3];
} else {
if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) {
$symbol = $groups[1];
$url = $groups[2];
$title = null;
} else {
return null;
}
}
$state->defineURL($symbol, $url, $title);
$state->p = $p;
return new MDBlockNode([]); // empty
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
$text = $match->tokens[0]->content;
$ref = $match->tokens[sizeof($match->tokens) - 1]->content;
array_splice($tokens, $match->index, sizeof($match->tokens),
[new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text))]);
return true;
}
return false;
}
}
/**
* Reader for images. Consists of an exclamation, alt text in square brackets,
* and image URL in parentheses.
*/
class MDImageReader extends MDLinkReader {
public function readToken(MDState $state, string $line): ?MDToken {
$s = parent::readToken($state, $line);
if ($s) return $s;
if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang,
MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
$alt = $match->tokens[1]->content;
$url = $match->tokens[sizeof($match->tokens) - 1]->content;
$title = $match->tokens[sizeof($match->tokens) - 1]->extra;
$node = new MDImageNode($url, $alt);
if ($title !== null) {
$node->attributes['title'] = $title;
}
array_splice($tokens, $match->index, sizeof($match->tokens), [$node]);
return true;
}
return false;
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
return -1;
}
return 0;
}
}
/**
* Reader for images with referential URL definitions. Consists of an
* exclamation, alt text in square brackets, and link symbol in square brackets.
* URL is defined the same as for `MDReferencedLinkReader`.
*/
class MDReferencedImageReader extends MDReferencedLinkReader {
public function readToken(MDState $state, string $line): ?MDToken {
$s = parent::readToken($state, $line);
if ($s) return $s;
if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang,
MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
$alt = $match->tokens[1]->content;
$ref = $match->tokens[sizeof($match->tokens) - 1]->content;
array_splice($tokens, $match->index, sizeof($match->tokens),
[new MDReferencedImageNode($ref, $alt)]);
return true;
}
return false;
}
public function compareSubstituteOrdering(MDReader $other, int $pass): int {
if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
return -1;
}
return 0;
}
}
/**
* Converts line breaks within blocks into line breaks in the HTML. Not
* included in any of the default reader sets since most flavors ignore
* line breaks within blocks.
*/
class MDLineBreakReader extends MDReader {
public function postProcess(MDState $state, array &$blocks) {
MDNode::replaceNodes($state, $blocks, function(MDNode $original) {
if (!($original instanceof MDTextNode)) return null;
$lines = explode("\n", $original->text);
if (sizeof($lines) == 1) return null;
$nodes = [];
foreach ($lines as $i => $line) {
if ($i > 0) {
array_push($nodes, new MDLineBreakNode());
}
array_push($nodes, new MDTextNode($line));
}
return new MDNode($nodes);
});
}
}
/**
* Reads a verbatim HTML tag, and if it passes validation by `MDState->$tagFilter`,
* will be rendered in the final HTML document. Disallowed tags will be rendered
* as plain text in the resulting document.
*/
class MDHTMLTagReader extends MDReader {
public function readToken(MDState $state, string $line): ?MDToken {
$tag = MDHTMLTag::fromLineStart($line, $state);
if ($tag === null) return null;
if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null;
$state->root()->tagFilter->scrubTag($tag);
return new MDToken($tag->original, MDTokenType::HTMLTag, $tag);
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) {
$tag = $match->tokens[0]->tag;
array_splice($tokens, $match->index, 1, [new MDHTMLTagNode($tag)]);
return true;
}
return false;
}
}
/**
* Reads tag modifiers. Consists of curly braces with one or more CSS classes,
* IDs, or custom attributes separated by spaces to apply to the preceding
* node. Validation is performed on modifiers and only acceptable values are
* applied.
*/
class MDModifierReader extends MDReader {
public function readToken(MDState $state, string $line): ?MDToken {
$modifier = MDTagModifier::fromStart($line);
if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier);
return null;
}
public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
// Modifiers are applied elsewhere, and if they're not it's fine if they're
// rendered as the original syntax.
return false;
}
}
// -- Nodes -----------------------------------------------------------------
/**
* Base class for nodes in the assembled document tree.
*/
class MDNode {
/**
* Array of CSS classes to add to the node when rendered as HTML.
* @var string[]
*/
public array $cssClasses = [];
public ?string $cssId = null;
/**
* Mapping of CSS attributes to values.
* @var string[]
*/
public array $cssStyles = [];
/**
* Mapping of arbitrary attributes and values to add to this node's top-level
* tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
* `$cssClasses`, `$cssId`, and `$cssStyles` instead.
* @var array
*/
public array $attributes = [];
/**
* All child nodes in this node.
* @var MDNode[]
*/
public array $children = [];
/**
* @param MDNode|MDNode[] $children
*/
public function __construct(MDNode|array $children=[]) {
if (is_array($children)) {
foreach ($children as $elem) {
if (!($elem instanceof MDNode)) {
$thisClassName = MDUtils::typename($this);
$elemClassName = MDUtils::typename($elem);
throw new Error("{$thisClassName} expects children of type " .
"MDNode[] or MDNode, got array with {$elemClassName} element");
}
}
$this->children = $children;
} elseif ($children instanceof MDNode) {
$this->children = [ $children ];
} else {
$thisClassName = MDUtils::typename($this);
$elemClassName = MDUtils::typename($children);
throw new Error("{$thisClassName} expects children of type MDNode[] " .
"or MDNode, got {$elemClassName}");
}
}
public function __toString(): string {
$s = "<" . get_class($this);
foreach ($this->children as $child) {
$s .= " {$child}";
}
$s .= ">";
return $s;
}
/**
* Adds a CSS class. If already present it will not be duplicated.
*/
public function addClass(string $cssClass): bool {
if (array_search($cssClass, $this->cssClasses) !== false) return false;
array_push($this->cssClasses, $cssClass);
return true;
}
/**
* Removes a CSS class.
*
* @param string $cssClass
* @return bool whether the class was present and removed
*/
public function removeClass(string $cssClass): bool {
$beforeLength = sizeof($this->cssClasses);
$this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
return sizeof($this->cssClasses) != $beforeLength;
}
/**
* Renders this node and any children as an HTML string. If the node has no
* content an empty string should be returned.
*/
public function toHTML(MDState $state): string {
return MDNode::arrayToHTML($this->children, $state);
}
/**
* Renders this node and any children as a plain text string. The conversion
* should only render ordinary text, not attempt markdown-like formatting
* (e.g. list items should not be prefixed with asterisks, only have their
* content text returned). If the node has no renderable content an empty
* string should be returned.
*/
public function toPlaintext(MDState $state): string {
return MDNode::arrayToPlaintext($this->children, $state);
}
/**
* Protected helper method that renders an HTML fragment of the attributes
* to apply to the root HTML tag representation of this node.
*
* Example result with a couple `$cssClasses`, a `$cssId`, and a custom
* `$attributes` key-value pair:
*
* ```
* class="foo bar" id="baz" lang="en"
* ```
*
* The value includes a leading space if it's non-empty so that it can be
* concatenated directly after the tag name and before the closing `>`.
*/
protected function htmlAttributes(): string {
$html = '';
if (sizeof($this->cssClasses) > 0) {
$classlist = MDUtils::escapeHTML(implode(' ', $this->cssClasses));
$html .= " class=\"{$classlist}\"";
}
if ($this->cssId !== null && mb_strlen($this->cssId) > 0) {
$html .= " id=\"" . MDUtils::escapeHTML($this->cssId) . "\"";
}
$styles = [];
foreach ($this->cssStyles as $key => $value) {
array_push($styles, "{$key}: {$value};");
}
if (sizeof($styles) > 0) {
$escaped = MDUtils::escapeHTML(implode(' ', $styles));
$html .= " style=\"{$escaped}\"";
}
foreach ($this->attributes as $key => $value) {
if ($key === 'class' || $key === 'id' || $key === 'style') continue;
$cleanKey = MDUtils::scrubAttributeName($key);
if (mb_strlen($cleanKey) == 0) continue;
$cleanValue = MDUtils::escapeHTML($value);
$html .= " {$cleanKey}=\"{$cleanValue}\"";
}
return $html;
}
/**
* Protected helper that renders and concatenates the HTML of all children
* of this node. Mostly for use by subclasses in their `toHTML`
* implementations.
*/
protected function childHTML(MDState $state): string {
return MDNode::arrayToHTML($this->children, $state);
}
/**
* Protected helper that renders and concatenates the plaintext of all
* children of this node.
*/
protected function childPlaintext(MDState $state): string {
return MDNode::arrayToPlaintext($this->children, $state);
}
/**
* Protected helper for rendering nodes represented by simple paired HTML
* tags. Custom CSS classes and attributes will be included in the result,
* and child content will be rendered between the tags.
*/
protected function simplePairedTagHTML(MDState $state, string $tagName): string {
$openTagSuffix = ($this->children[0] ?? null) instanceof MDBlockNode ? "\n" : "";
$closeTagPrefix = ($this->children[sizeof($this->children) - 1] ?? null) instanceof MDBlockNode ? "\n" : '';
$closeTagSuffix = $this instanceof MDBlockNode ? "\n" : '';
$attr = $this->htmlAttributes();
$childHTML = $this->childHTML($state);
return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}{$tagName}>{$closeTagSuffix}";
}
/**
* Calls the given callback function with every child node, recursively.
* Nodes are visited depth-first.
*/
public function visitChildren(callable $fn) {
foreach ($this->children as $child) {
$fn($child);
$child->visitChildren($fn);
}
}
/**
* Helper for rendering and concatenating HTML from an array of `MDNode`s.
*
* @param MDNode[] $nodes
* @param MDState $state
* @return string HTML string
*/
public static function arrayToHTML(array $nodes, MDState $state): string {
return implode('', array_map(fn($node) => $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : ''), $nodes));
}
/**
* Helper for rendering and concatenating plaintext from an array of `MDNode`s.
*
* @param MDNode[] $nodes
* @param MDState $state
* @return string plaintext
*/
public static function arrayToPlaintext(array $nodes, MDState $state): string {
return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes));
}
/**
* Recursively searches and replaces nodes in a tree. The given `$replacer`
* is passed every node in the tree. If `$replacer` returns a new `MDNode`
* the original will be replaced with it. If the function returns `null` no
* change will be made to that node. Traversal is depth-first.
*
* @param MDState $state
* @param MDNode[] $nodes
* @param callable $replacer takes a node as an argument, returns either
* a new node or `null` to leave it unchanged
*/
public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) {
for ($i = 0; $i < sizeof($nodes); $i++) {
$originalNode = $nodes[$i];
$replacement = $replacer($originalNode);
if ($replacement instanceof MDNode) {
array_splice($nodes, $i, 1, [$replacement]);
} else {
self::replaceNodes($state, $originalNode->children, $replacer);
}
}
}
}
/**
* Marker subclass that indicates a node represents block syntax.
*/
class MDBlockNode extends MDNode {}
/**
* Paragraph block.
*/
class MDParagraphNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'p');
}
}
/**
* A heading block with a level from 1 to 6.
*/
class MDHeadingNode extends MDBlockNode {
public int $level;
/**
* @param int $level
* @param MDNode|MDNode[] $children
*/
public function __construct(int $level, MDNode|array $children) {
parent::__construct($children);
if (!is_int($level) || ($level < 1 || $level > 6)) {
$thisClassName = MDUtils::typename($this);
throw new Error("{$thisClassName} requires heading level 1 to 6");
}
$this->level = $level;
}
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, "h{$this->level}");
}
}
/**
* A sub-text block with smaller, less prominent text.
*/
class MDSubtextNode extends MDBlockNode {
public function toHTML(MDState $state): string {
$this->addClass('subtext');
return $this->simplePairedTagHTML($state, 'div');
}
}
/**
* Node for a horizontal dividing line.
*/
class MDHorizontalRuleNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return "
htmlAttributes() . ">";
}
}
/**
* A block quote, usually rendered indented from other text.
*/
class MDBlockquoteNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'blockquote');
}
}
/**
* A bulleted list. Contains `MDListItemNode` children.
*/
class MDUnorderedListNode extends MDBlockNode {
/** @var MDListItemNode[] $children */
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'ul');
}
}
/**
* A numbered list. Contains `MDListItemNode` children.
*/
class MDOrderedListNode extends MDBlockNode {
/** @var MDListItemNode[] $children */
public ?int $startOrdinal;
/**
* @param MDListItemNode[] $children
* @param ?int $startOrdinal
*/
public function __construct(array $children, ?int $startOrdinal=null) {
parent::__construct($children);
$this->startOrdinal = $startOrdinal;
}
public function toHTML(MDState $state): string {
if ($this->startOrdinal !== null && $this->startOrdinal != 1) {
$this->attributes['start'] = strval($this->startOrdinal);
}
return $this->simplePairedTagHTML($state, 'ol');
}
}
/**
* An item in a bulleted or numbered list.
*/
class MDListItemNode extends MDBlockNode {
public ?int $ordinal;
/**
* @param MDNode|MDNode[] $children
* @param ?int $ordinal
*/
public function __construct(MDNode|array $children, ?int $ordinal=null) {
parent::__construct($children);
$this->ordinal = $ordinal;
}
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'li');
}
}
/**
* A block of preformatted computer code. Inner markdown is ignored.
*/
class MDCodeBlockNode extends MDBlockNode {
public string $text;
/**
* The programming language of the content.
*/
public ?string $language;
public function __construct(string $text, ?string $language=null) {
parent::__construct([]);
$this->text = $text;
$this->language = $language;
}
public function toHTML(MDState $state): string {
$languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : '';
return "htmlAttributes() . ">" .
MDUtils::escapeHTML($this->text) . "
\n";
}
}
/**
* A table node with a single header row and any number of body rows.
*/
class MDTableNode extends MDBlockNode {
/** @var MDTableRowNode[] $children */
public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; }
public function bodyRows(): array { return array_slice($this->children, 1); }
/**
* How to align each column. Columns beyond the length of the array or with
* corresponding `null` elements will have no alignment set. Values should
* be valid CSS `text-align` values.
*
* @var string[]
*/
public array $columnAlignments = [];
/**
* @param MDTableRowNode $headerRow
* @param MDTableRowNode[] $bodyRows
*/
public function __construct(MDTableRowNode $headerRow, array $bodyRows) {
parent::__construct(array_merge([ $headerRow ], $bodyRows));
}
/**
* Returns a given body cell.
*
* @param {number} column
* @param {number} row
* @returns {MDTableCellNode|null} cell or `null` if out of bounds
*/
public function bodyCellAt(int $column, int $row): ?MDTableCellNode {
$rowNode = $this->bodyRows()[$row] ?? null;
if ($rowNode === null) return null;
$cellNode = $rowNode->children[$column] ?? null;
return ($cellNode === null) ? null : $cellNode;
}
public function applyAlignments() {
foreach ($this->children as $child) {
$this->applyAlignmentsToRow($child);
}
}
private function applyAlignmentsToRow(MDTableRowNode $row) {
foreach ($row->children as $columnIndex => $cell) {
$alignment = $this->columnAlignments[$columnIndex] ?? null;
$this->applyAlignmentToCell($cell, $alignment);
}
}
public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) {
if ($alignment) {
$cell->cssStyles['text-align'] = $alignment;
} else {
unset($cell->cssStyles['text-align']);
}
}
public function toHTML(MDState $state): string {
$this->applyAlignments();
$html = '';
$html .= "htmlAttributes() . ">\n";
$html .= "\n";
$html .= $this->headerRow()->toHTML($state) . "\n";
$html .= "\n";
$html .= "\n";
$html .= MDNode::arrayToHTML($this->bodyRows(), $state) . "\n";
$html .= "\n";
$html .= "
\n";
return $html;
}
}
/**
* Node for one row (header or body) in a table.
*/
class MDTableRowNode extends MDBlockNode {
/** @var MDTableCellNode[] $children */
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'tr');
}
}
/**
* Node for one cell in a table row.
*/
class MDTableCellNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'td');
}
}
/**
* Node for a header cell in a header table row.
*/
class MDTableHeaderCellNode extends MDTableCellNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'th');
}
}
/**
* Definition list with `MDDefinitionListTermNode` and
* `MDDefinitionListDefinitionNode` children.
*/
class MDDefinitionListNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'dl');
}
}
/**
* A word or term in a definition list.
*/
class MDDefinitionListTermNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'dt');
}
}
/**
* The definition of a word or term in a definition list. Should follow a
* definition term, or another definition to serve as an alternate.
*/
class MDDefinitionListDefinitionNode extends MDBlockNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'dd');
}
}
/**
* Block at the bottom of a document listing all the footnotes with their
* content.
*/
class MDFootnoteListNode extends MDBlockNode {
private function footnoteId(MDState $state, string $symbol): ?int {
$lookup = $state->root()->userInfo['footnoteIds'];
if (!$lookup) return null;
return $lookup[$symbol] ?? null;
}
public function toHTML(MDState $state): string {
$footnotes = $state->root()->userInfo['footnotes'];
$symbolOrder = array_keys($footnotes);
if (sizeof($footnotes) == 0) return '';
$footnoteUniques = $state->root()->userInfo['footnoteInstances'];
$html = '';
$html .= '';
return $html;
}
public function toPlaintext(MDState $state): string {
$footnotes = $state->userInfo['footnotes'];
$symbolOrder = array_keys($footnotes);
if (sizeof($footnotes) == 0) return '';
$text = '';
foreach ($symbolOrder as $symbolRaw) {
$symbol = "{$symbolRaw}";
$content = $footnotes[$symbol];
if (!$content) continue;
$text .= "{$symbol}. " . $this->childPlaintext(state) . "\n";
}
return trim($text);
}
}
/**
* Marker subclass that indicates a node represents inline syntax.
*/
class MDInlineNode extends MDNode {}
/**
* Contains plain text. Special HTML characters are escaped when rendered.
*/
class MDTextNode extends MDInlineNode {
public string $text;
public function __construct(string $text) {
parent::__construct([]);
$this->text = $text;
}
public function toHTML(MDState $state): string {
return MDUtils::escapeHTML($this->text);
}
public function toPlaintext(MDState $state): string {
return $this->text;
}
public function __toString(): string {
return "text}\">";
}
}
/**
* Contains plain text which is rendered with HTML entities when rendered to
* be marginally more difficult for web scapers to decipher. Used for
* semi-sensitive info like email addresses.
*/
class MDObfuscatedTextNode extends MDTextNode {
public function toHTML(MDState $state): string {
return MDUtils::escapeObfuscated($this->text);
}
}
/**
* Emphasized (italicized) content.
*/
class MDEmphasisNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'em');
}
}
/**
* Strong (bold) content.
*/
class MDStrongNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'strong');
}
}
/**
* Content rendered with a line through it.
*/
class MDStrikethroughNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 's');
}
}
/**
* Underlined content.
*/
class MDUnderlineNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'u');
}
}
/**
* Highlighted content. Usually rendered with a bright colored background.
*/
class MDHighlightNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'mark');
}
}
/**
* Superscripted content.
*/
class MDSuperscriptNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'sup');
}
}
/**
* Subscripted content.
*/
class MDSubscriptNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return $this->simplePairedTagHTML($state, 'sub');
}
}
/**
* Inline plaintext indicating computer code.
*/
class MDCodeNode extends MDInlineNode {
public string $text;
public function __construct(string $text) {
parent::__construct([]);
$this->text = $text;
}
public function toHTML(MDState $state): string {
return "htmlAttributes() . ">" . MDUtils::escapeHTML($this->text) . "";
}
}
/**
* A footnote symbol in a document. Denoted as a superscripted number that can
* be clicked to go to its content at the bottom of the document.
*/
class MDFootnoteNode extends MDInlineNode {
/**
* Symbol the author used to match up the footnote to its content definition.
*/
public string $symbol;
/**
* The superscript symbol rendered in HTML. May be the same or different
* than `$symbol`.
*/
public ?string $displaySymbol = null;
/**
* Unique ID for the footnote definition.
*/
public ?int $footnoteId = null;
/**
* Unique number for backlinking to a footnote occurrence. Populated by
* `MDFootnoteReader->postProcess()`.
*/
public ?int $occurrenceId = null;
public function __construct(string $symbol, ?string $title=null) {
parent::__construct([]);
$this->symbol = $symbol;
if ($title) $this->attributes['title'] = $title;
}
public function toHTML(MDState $state): string {
if ($this->footnoteId !== null) {
return "";
}
return "";
}
}
/**
* A clickable hypertext link.
*/
class MDLinkNode extends MDInlineNode {
public string $href;
/**
* @param string $href
* @param MDNode|MDNode[] $children
* @param ?string $title
*/
public function __construct(string $href, MDNode|array $children, ?string $title=null) {
parent::__construct($children);
$this->href = $href;
if ($title !== null) $this->attributes['title'] = $title;
}
public function toHTML(MDState $state): string {
if (str_starts_with($this->href, 'mailto:')) {
$escapedLink = MDUtils::escapeObfuscated($this->href);
} else {
$escapedLink = MDUtils::escapeHTML($this->href);
}
return "htmlAttributes() . ">" . $this->childHTML($state) . "";
}
}
/**
* A clickable hypertext link where the URL is defined elsewhere by reference.
*/
class MDReferencedLinkNode extends MDLinkNode {
public string $reference;
/**
* @param string $reference
* @param MDNode|MDNode[] $children
*/
public function __construct(string $reference, MDNode|array $children) {
parent::__construct('', $children);
$this->reference = $reference;
}
public function toHTML(MDState $state): string {
if ($this->href === '') {
$url = $state->urlForReference($this->reference);
if ($url) $this->href = $url;
$title = $state->urlTitleForReference($this->reference);
if ($title) $this->attributes['title'] = $title;
}
return parent::toHTML($state);
}
}
/**
* An inline image.
*/
class MDImageNode extends MDInlineNode {
public string $src;
public ?string $alt;
public function __construct(string $src, ?string $alt) {
parent::__construct([]);
$this->src = $src;
$this->alt = $alt;
}
public function toHTML(MDState $state): string {
$html = "
src) . "\"";
if ($this->alt) $html .= " alt=\"" . MDUtils::escapeHTML($this->alt) . "\"";
$html .= $this->htmlAttributes() . ">";
return $html;
}
}
/**
* An inline image where the URL is defined elsewhere by reference.
*/
class MDReferencedImageNode extends MDImageNode {
public string $reference;
public function __construct(string $reference, ?string $alt=null) {
parent::__construct('', $alt, []);
$this->reference = $reference;
}
public function toHTML(MDState $state): string {
if ($this->src === '') {
$url = $state->urlForReference($this->reference);
if ($url !== null) $this->src = $url;
$title = $state->urlTitleForReference($this->reference);
if ($title !== null) $this->attributes['title'] = $title;
}
return parent::toHTML($state);
}
}
/**
* An abbreviation that can be hovered over to see its full expansion.
*/
class MDAbbreviationNode extends MDInlineNode {
public string $abbreviation;
/**
* @param string $abbreviation
* @param string $definition
*/
public function __construct(string $abbreviation, string $definition) {
parent::__construct([]);
$this->abbreviation = $abbreviation;
$this->attributes['title'] = $definition;
}
public function toHTML(MDState $state): string {
return "htmlAttributes() . ">" . MDUtils::escapeHTML($this->abbreviation) . "";
}
}
/**
* A line break that is preserved when rendered to HTML.
*/
class MDLineBreakNode extends MDInlineNode {
public function toHTML(MDState $state): string {
return '
';
}
public function toPlaintext(MDState $state): string {
return "\n";
}
}
/**
* A verbatim HTML tag. May be altered to strip out disallowed attributes or
* CSS values.
*/
class MDHTMLTagNode extends MDInlineNode {
public MDHTMLTag $tag;
public function __construct(MDHTMLTag $tag) {
parent::__construct([]);
$this->tag = $tag;
}
public function toHTML(MDState $state): string {
return "{$this->tag}";
}
}
// -- Main class ------------------------------------------------------------
/**
* Markdown parser.
*/
class Markdown {
/**
* Set of standard readers to handle common syntax.
* @return MDReader[]
*/
public static function standardReaders(): array {
if (self::$sharedStandardReaders === null) {
self::$sharedStandardReaders = [
new MDUnderlinedHeadingReader(),
new MDHashHeadingReader(),
new MDBlockQuoteReader(),
new MDHorizontalRuleReader(),
new MDUnorderedListReader(),
new MDOrderedListReader(),
new MDFencedCodeBlockReader(),
new MDIndentedCodeBlockReader(),
new MDParagraphReader(),
new MDStrongReader(),
new MDEmphasisReader(),
new MDCodeSpanReader(),
new MDImageReader(),
new MDLinkReader(),
new MDHTMLTagReader(),
];
}
return self::$sharedStandardReaders;
}
private static ?array $sharedStandardReaders = null;
/**
* All supported readers except `MDLineBreakReader`.
* @return MDReader[]
*/
public static function allReaders(): array {
if (self::$sharedAllReaders === null) {
$sharedAllReaders = array_merge(self::standardReaders(), [
new MDSubtextReader(),
new MDTableReader(),
new MDDefinitionListReader(),
new MDFootnoteReader(),
new MDAbbreviationReader(),
new MDUnderlineReader(),
new MDSubscriptReader(),
new MDStrikethroughReader(),
new MDHighlightReader(),
new MDSuperscriptReader(),
new MDReferencedImageReader(),
new MDReferencedLinkReader(),
new MDModifierReader(),
]);
}
return $sharedAllReaders;
}
private static ?array $sharedAllReaders = null;
/**
* Shared instance of a parser with standard syntax.
*/
public static function standardParser(): Markdown {
if (self::$sharedStandardMarkdown === null) {
self::$sharedStandardMarkdown = new Markdown(self::standardReaders());
}
return self::$sharedStandardMarkdown;
}
private static ?Markdown $sharedStandardMarkdown = null;
/**
* Shared instance of a parser with all supported syntax.
*/
public static function completeParser(): Markdown {
if (self::$sharedCompleteParser === null) {
self::$sharedCompleteParser = new Markdown(self::allReaders());
}
return self::$sharedCompleteParser;
}
public static ?Markdown $sharedCompleteParser = null;
/**
* Filter for what non-markdown HTML is permitted. HTML generated as a
* result of markdown is unaffected.
*/
public MDHTMLFilter $tagFilter;
/**
* If an exception occurs, attempts to narrow down the portion of the
* markdown that triggered the error and outputs it to the console. For
* debugging. Investigation mode can be slow.
*/
public bool $investigateErrors = false;
/** @var MDReader[] */
private array $readers;
/** @var MDReader[] */
private array $readersByBlockPriority;
/** @var MDReader[] */
private array $readersByTokenPriority;
private array $readersBySubstitutePriority;
/**
* Creates a Markdown parser with the given syntax readers.
*
* @param MDReader[] $readers
*/
public function __construct(?array $readers=null) {
$this->readers = $readers ?? self::allReaders();
$this->readersByBlockPriority = MDReader::sortReaderForBlocks($this->readers);
$this->readersByTokenPriority = MDReader::sortReadersForTokenizing($this->readers);
$this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($this->readers);
$this->tagFilter = new MDHTMLFilter();
}
/**
* Converts a markdown string to an HTML string.
*
* @param string $markdown
* @param string $elementIdPrefix Optional prefix for generated element
* `id`s and links to them. For differentiating multiple markdown docs in
* the same HTML page.
* @return string HTML
*/
public function toHTML(string $markdown, string $elementIdPrefix='') {
$lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown);
try {
return $this->parse($lines, $elementIdPrefix);
} catch (Error $e) {
if ($this->investigateErrors) {
$this->investigateException($lines, $elementIdPrefix);
}
throw $e;
}
}
/**
* @param string[] $lines
* @param string $elementIdPrefix
*/
private function parse(array $lines, string $elementIdPrefix) {
$state = new MDState($lines);
$state->readersByBlockPriority = $this->readersByBlockPriority;
$state->readersByTokenPriority = $this->readersByTokenPriority;
$state->readersBySubstitutePriority = $this->readersBySubstitutePriority;
$state->tagFilter = $this->tagFilter;
$state->elementIdPrefix = $elementIdPrefix;
foreach ($this->readers as $reader) {
$reader->preProcess($state);
}
$nodes = $state->readBlocks();
foreach ($this->readers as $reader) {
$reader->postProcess($state, $nodes);
}
return MDNode::arrayToHTML($nodes, $state);
}
/**
* Keeps removing first and last lines of markdown to locate the source of
* an exception and prints the minimal snippet.
*
* @param string[] $lines
* @param string $elementIdPrefix
*/
private function investigateException(array $lines, string $elementIdPrefix) {
print("Investigating error...\n");
$startIndex = 0;
$endIndex = sizeof($lines);
// Keep stripping away first line until an exception stops being thrown
for ($i = 0; $i < sizeof($lines); $i++) {
try {
$this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix);
break;
} catch (Error $e0) {
$startIndex = $i;
}
}
// Keep stripping away last line until an exception stops being thrown
for ($i = sizeof($lines); $i > $startIndex; $i--) {
try {
$this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix);
break;
} catch (Error $e0) {
$endIndex = $i;
}
}
$problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex));
print("This portion of markdown caused an unexpected exception:\n{$problematicMarkdown}\n");
}
}
?>