()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))'; /** * Encodes characters as HTML numeric entities to make it marginally more * difficult for web scrapers to grab sensitive info. If `$text` starts with * `mailto:` only the email address following it will be obfuscated. */ public static function escapeObfuscated(string $text): string { if (str_starts_with($text, 'mailto:')) { return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7)); } $html = ''; $l = mb_strlen($text); for ($p = 0; $p < $l; $p++) { $cp = mb_ord(mb_substr($text, $p, 1)); $html .= "&#{$cp};"; } return $html; } /** * Removes illegal characters from an HTML attribute name. */ public static function scrubAttributeName(string $name): string { return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name); } /** * Strips one or more leading indents from a line or lines of markdown. An * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3 * spaces) are treated like one indent level. * * @param string|string[] $line * @param int $levels * @return string|string[] */ public static function stripIndent(string|array &$line, int $levels=1): string|array { $regex = "^(?: {1,4}|\\t){{$levels}}"; return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line); } /** * Counts the number of indent levels in a line of text. Partial indents * (1 to 3 spaces) are counted as one indent level unless `$fullIndentsOnly` * is `true`. */ public static function countIndents(string $line, bool $fullIndentsOnly=false): int { // normalize indents to tabs $t = mb_ereg_replace($fullIndentsOnly ? "(?: {4}|\\t)" : "(?: {1,4}|\\t)", "\t", $line); // remove content after indent $t = mb_ereg_replace("^(\\t*)(.*?)$", "\\1", $t); // count tabs return mb_strlen($t); } /** * Returns a copy of an array without any whitespace-only lines at the end. * * @param string[] $lines * @return string[] */ public static function withoutTrailingBlankLines(array $lines): array { $stripped = $lines; while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) { array_pop($stripped); } return $stripped; } /** * Tests if an array of lines contains at least one blank. A blank line * can contain whitespace. * * @param string[] $lines */ public static function containsBlankLine(array $lines): bool { foreach ($lines as $line) { if (mb_strlen(trim($line)) == 0) return true; } return false; } /** * Returns a type or class name of a value. * * @param mixed $value * @return string */ public static function typename($value): string { $tn = gettype($value); return ($tn === 'object') ? get_class($value) : $tn; } } /** * Token type enum for `MDToken`. */ enum MDTokenType { case Text; /** * Only used for the leading and trailing whitespace around a run of text, * not every single whitespace character. */ case Whitespace; case Underscore; case Asterisk; case Slash; case Tilde; case Bang; case Backtick; case Equal; case Caret; case Label; // content=label case URL; // content=URL, extra=title case Email; // content=email address, extra=title case SimpleLink; // content=URL case SimpleEmail; // content=email address case Footnote; // content=symbol case Modifier; // modifier=MDTagModifier case HTMLTag; // tag=MDHTMLTag /** Wildcard for `MDToken::findFirstTokens` */ case META_AnyNonWhitespace; /** Wildcard for `MDToken::findFirstTokens` */ case META_OptionalWhitespace; } /** * Search results from `MDToken.findFirstTokens`. */ class MDTokenMatch { /** @var MDToken[] */ public array $tokens; public int $index; /** * @param MDToken[] $tokens * @param int $index */ public function __construct(array $tokens, int $index) { $this->tokens = $tokens; $this->index = $index; } } /** * Search results from `MDToken.findPairedTokens`. */ class MDPairedTokenMatch { /** @var MDToken[] */ public array $startTokens; /** @var MDToken[] */ public array $contentTokens; /** @var MDToken[] */ public array $endTokens; public int $startIndex; public int $contentIndex; public int $endIndex; public int $totalLength; public function __construct(array $startTokens, array $contentTokens, array $endTokens, int $startIndex, int $contentIndex, int $endIndex, int $totalLength) { $this->startTokens = $startTokens; $this->contentTokens = $contentTokens; $this->endTokens = $endTokens; $this->startIndex = $startIndex; $this->contentIndex = $contentIndex; $this->endIndex = $endIndex; $this->totalLength = $totalLength; } } /** * One lexical unit in inline markdown syntax parsing. */ class MDToken { /** * The original verbatim token string. Required as a plaintext fallback if * the token remains unresolved. */ public string $original; public MDTokenType $type; public ?string $content = null; public ?string $extra = null; public ?MDHTMLTag $tag = null; public ?MDTagModifier $modifier = null; /** * Creates a token. * * @param string $original verbatim token string * @param MDTokenType $type token type * @param string|MDTagModifier|MDHTMLTag|null $content primary content of * the token * @param string|null $extra additional content */ public function __construct(string $original, MDTokenType $type, string|MDTagModifier|MDHTMLTag|null $content=null, ?string $extra=null) { $this->original = $original; $this->type = $type; if ($content instanceof MDTagModifier) { $this->modifier = $content; } elseif ($content instanceof MDHTMLTag) { $this->tag = $content; } else { $this->content = $content; } $this->extra = $extra; } public function __toString(): string { return "<{" . MDUtils::typename($this) . " type={$this->type->name} " . "content=\"{$this->content}\">"; } /** * Attempts to parse a label token from the beginning of `$line`. A label is * of the form `[content]`. If found, returns an array: * - `0`: the entire label including brackets * - `1`: the content of the label * * @param string $line * @return ?string[] match groups or null if not found */ public static function tokenizeLabel(string $line): ?array { if (!str_starts_with($line, '[')) return null; $parenCount = 0; $bracketCount = 0; $l = mb_strlen($line); for ($p = 1; $p < $l; $p++) { $ch = mb_substr($line, $p, 1); if ($ch == '\\') { $p++; } elseif ($ch == '(') { $parenCount++; } elseif ($ch == ')') { $parenCount--; if ($parenCount < 0) return null; } elseif ($ch == '[') { $bracketCount++; } elseif ($ch == ']') { if ($bracketCount > 0) { $bracketCount--; } else { return [ mb_substr($line, 0, $p + 1), mb_substr($line, 1, $p - 1) ]; } } } return null; } private const urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title private const urlRegex = '^\\((\\S+?)\\)'; // 1=URL /** * Attempts to parse a URL token from the beginning of `$line`. A URL token * is of the form `(url)` or `(url "title")`. If found, returns an array: * - `0`: the entire URL token including parentheses * - `1`: the URL * - `2`: the optional title, or `null` * * @param string $line * @return ?array token tuple */ public static function tokenizeURL(string $line): ?array { $groups = []; if (mb_eregi(self::urlWithTitleRegex, $line, $groups)) { // make sure it's not better described as an email address if (self::tokenizeEmail($line)) return null; return $groups; } if (mb_eregi(self::urlRegex, $line, $groups)) { if (self::tokenizeEmail($line)) return null; return [ $groups[0], $groups[1], null ]; } return null; } /** * Attempts to parse an email address from the beginning of `$line`. An * email address is of the form `(user@example.com)` or * `(user@example.com "link title")`. If found, returns an array: * - `0`: the entire token including parentheses * - `1`: the email address * - `2`: the optional link title, or `null` * * @param string $line * @return ?string[] token tuple */ public static function tokenizeEmail(string $line): ?array { $groups; if (mb_eregi("^\\(\\s*(" . MDUtils::baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)", $line, $groups)) { return $groups; } if (mb_eregi("^\\(\\s*(" . MDUtils::baseEmailRegex . ")\\s*\\)", $line, $groups)) { return [ $groups[0], $groups[1], null ]; } return null; } /** * Searches an array of `MDToken` for the given pattern of `MDTokenType`s. * If found, returns a `MDTokenMatch`, otherwise `null`. * * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace` * are special supported token types. Note that `META_OptionalWhitespace` * may give a result with a variable number of tokens. * * @param (MDToken|MDNode)[] $tokensToSearch mixed array of `MDToken` and * `MDNode` elements * @param MDTokenType[] $pattern contiguous run of token types to find * @param int $startIndex token index to begin searching (defaults to 0) * @return ?MDTokenMatch match object, or `null` if not found */ public static function findFirstTokens(array $tokensToSearch, array $pattern, int $startIndex=0): ?MDTokenMatch { if (sizeof($pattern) == 0) { throw new Error("Pattern cannot be empty"); } $matched = []; for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) { $matchedAll = true; $matched = []; $patternOffset = 0; for ($p = 0; $p < sizeof($pattern); $p++) { $t0 = $t + $p + $patternOffset; if ($t0 >= sizeof($tokensToSearch)) return null; $token = $tokensToSearch[$t0]; $elem = $pattern[$p]; if ($elem == MDTokenType::META_OptionalWhitespace) { if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) { array_push($matched, $token); } else { $patternOffset--; } } elseif ($elem == MDTokenType::META_AnyNonWhitespace) { if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) { $matchedAll = false; break; } array_push($matched, $token); } else { if (!($token instanceof MDToken) || $token->type != $elem) { $matchedAll = false; break; } array_push($matched, $token); } } if ($matchedAll) { return new MDTokenMatch($matched, $t); } } return null; } /** * Searches an array of MDToken for a given starting pattern and ending * pattern and returns match info about both and the tokens in between. * * If `$contentValidator` is specified, it will be called with the content * tokens of a potential match. If the validator returns `true`, the result * will be accepted and returned by this method. If the validator returns * `false`, this method will keep looking for another matching pair. If no * validator is given the first match will be returned regardless of content. * * If a match is found, a `MDPairedTokenMatch` is returned with details * of the opening tokens, closing tokens, and content tokens between. Otherwise * `null` is returned. * * @param MDToken[] $tokensToSearch array of `MDToken` to search in * @param MDTokenType[] $startPattern pattern to find first * @param MDTokenType[] $endPattern pattern to find positioned after * `$startPattern` * @param ?callable $contentValidator optional validator function. If * provided, will be passed an array of inner `MDToken`, and the function * can return `true` to accept the contents or `false` to keep searching * @param number $startIndex token index where searching should begin * @return ?MDPairedTokenMatch match, or `null` */ public static function findPairedTokens(array $tokensToSearch, array $startPattern, array $endPattern, ?callable $contentValidator=null, int $startIndex=0): ?MDPairedTokenMatch { for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) { $startMatch = self::findFirstTokens($tokensToSearch, $startPattern, $s); if ($startMatch === null) return null; $endStart = $startMatch->index + sizeof($startMatch->tokens); while ($endStart < sizeof($tokensToSearch)) { $endMatch = self::findFirstTokens($tokensToSearch, $endPattern, $endStart); if ($endMatch === null) break; $contentStart = $startMatch->index + sizeof($startMatch->tokens); $contentLength = $endMatch->index - $contentStart; $contents = array_slice($tokensToSearch, $contentStart, $contentLength); if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) { return new MDPairedTokenMatch($startMatch->tokens, $contents, $endMatch->tokens, $startMatch->index, $startMatch->index + sizeof($startMatch->tokens), $endMatch->index, $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index); } else { // Contents rejected. Try next end match. $endStart = $endMatch->index + 1; } } // No end matches. Increment start match. $s = $startMatch->index; } return null; } public function equals($other) { if (!($other instanceof MDToken)) return false; if ($other->original !== $this->original) return false; if ($other->type != $this->type) return false; if ($other->content !== $this->content) return false; if ($other->extra !== $this->extra) return false; if ($other->tag !== $this->tag) return false; if ($other->modifier != $this->modifier) return false; return true; } } /** * Parsing and rendering state. Passed around throughout the parsing process. * * States are hierarchical. A sub-state can be created by calling `->copy()` with * a new array of lines. The sub-state points back to its parent state. This * is done to parse inner content of a syntax as its own standalone document. * * If a custom `MDReader` implementation wants to store data in this object, * always do so on `$state->root()` to ensure it's stored on the original state, * not a child state. Otherwise data may be lost when the sub-state is discarded. */ class MDState { /** * Ascends the parent chain to the root `MDState` instance. This should be * used when referencing most stored fields except `$lines` and `$p`. */ public function root(): MDState { return $this->parent ? $this->parent->root() : $this; } /** * Lines of the markdown document. The current line index is pointed to by `$p`. * * @var string[] */ public array $lines; /** * The current line in `$lines`. */ public function currentLine(): ?string { return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null; } /** * Current line pointer into array `$lines`. */ public int $p = 0; /** * General storage for anything readers need to track during the parsing * process. */ public array $userInfo = []; private ?MDState $parent = null; /** * Array of `MDReader`s sorted by block reading priority. * @var MDReader[] */ public array $readersByBlockPriority = []; /** * Array of `MDReader`s sorted by tokenization priority. * @var MDReader[] */ public array $readersByTokenPriority = []; /** * Array of tuples of `pass:number` and `MDReader` sorted by substitution * priority. * @var array[] */ public array $readersBySubstitutePriority = []; /** * Prefix to include in any generated `id` attributes on HTML elements. * Useful for keeping elements unique in multiple parsed documents in the * same HTML page. */ public string $elementIdPrefix = ''; /** * Filter for removing unapproved HTML tags, attributes, and values. */ public MDHTMLFilter $tagFilter; /** * @param string[] $lines lines of markdown text */ public function __construct(array $lines) { $this->lines = $lines; $this->startTime = microtime(true); } /** * Creates a copy of this state with new lines. Useful for parsing nested * content. * * @param string[] $lines * @return MDState copied sub-state */ public function copy(array $lines): MDState { $cp = new MDState($lines); $cp->parent = $this; return $cp; } /** * Tests if there are at least `$minCount` lines available to read. If `$p` * is not provided it will be relative to `$this->p`. */ public function hasLines(int $minCount, ?int $p=null): bool { $relativeTo = ($p === null) ? $this->p : $p; return $relativeTo + $minCount <= sizeof($this->lines); } /** * Reads and returns an array of blocks from the current line pointer. * * @return MDBlockNode[] parsed blocks */ public function readBlocks(): array { $blocks = []; while ($this->hasLines(1)) { $block = $this->readNextBlock(); if ($block) { array_push($blocks, $block); } else { break; } } return $blocks; } /** * Creates a simple `MDBlockNode` if no other registered blocks match. */ private function readFallbackBlock(): ?MDBlockNode { if ($this->p >= sizeof($this->lines)) return null; $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p)); if (sizeof($lines) == 0) return null; $this->p = sizeof($this->lines); return new MDBlockNode($this->inlineMarkdownToNode(implode("\n", $lines))); } /** * Attempts to read one block from the current line pointer. The pointer * will be positioned just after the end of the block. */ private function readNextBlock(): ?MDBlockNode { while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) { $this->p++; } if (!$this->hasLines(1)) return null; foreach ($this->root()->readersByBlockPriority as $reader) { $startP = $this->p; $block = $reader->readBlock($this); if ($block) { if ($this->p == $startP) { $readerClassName = MDUtils::typename($reader); $blockClassName = MDUtils::typename($block); throw new Error("{$readerClassName} returned an " . "{$blockClassName} without incrementing MDState.p. " . "This could lead to an infinite loop."); } return $block; } } $fallback = $this->readFallbackBlock(); return $fallback; } /** * @param string $line * @return MDToken[] */ private function inlineMarkdownToTokens(string $line): array { if ($this->parent) return $this->parent->inlineMarkdownToTokens($line); $tokens = []; $text = ''; $expectLiteral = false; /** * Flushes accumulated content in `$text` to `$tokens`. */ $endText = function() use (&$tokens, &$text) { if (mb_strlen($text) == 0) return; $textGroups = []; if (mb_eregi('^(\\s+)(.*?)$', $text, $textGroups)) { array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1])); $text = is_string($textGroups[2]) ? $textGroups[2] : ''; } if (mb_eregi('^(.*?)(\\s+)$', $text, $textGroups)) { array_push($tokens, new MDToken($textGroups[1], MDTokenType::Text, $textGroups[1])); array_push($tokens, new MDToken($textGroups[2], MDTokenType::Whitespace, $textGroups[2])); } else { array_push($tokens, new MDToken($text, MDTokenType::Text, $text)); } $text = ''; }; for ($p = 0; $p < mb_strlen($line); $p++) { $ch = mb_substr($line, $p, 1); $remainder = mb_substr($line, $p); if ($expectLiteral) { $text .= $ch; $expectLiteral = false; continue; } if ($ch == '\\') { $expectLiteral = true; continue; } $found = false; foreach ($this->root()->readersByTokenPriority as $reader) { $token = $reader->readToken($this, $remainder); if ($token === null) continue; $endText(); array_push($tokens, $token); if ($token->original == null || mb_strlen($token->original) == 0) { $readerClassName = MDUtils::typename($reader); throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`); } $p += mb_strlen($token->original) - 1; $found = true; break; } if (!$found) { $text .= $ch; } } $endText(); return $tokens; } /** * Converts a line of markdown to an `MDInlineNode`. * * @param string|string[] $line * @return MDInlineNode */ public function inlineMarkdownToNode(string|array $line): MDInlineNode { $nodes = $this->inlineMarkdownToNodes($line); return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes); } /** * Converts a line of markdown to an array of `MDInlineNode`s. * * @param string|string[] $line * @return MDInlineNode[] */ public function inlineMarkdownToNodes(string|array $line): array { $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line); return $this->tokensToNodes($tokens); } /** * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array * of only `MDInlineNode` via repeated `MDReader` substition. * * @param (MDToken|MDInlineNode)[] $tokens * @return MDInlineNode[] */ public function tokensToNodes(array $tokens): array { $nodes = $tokens; // Perform repeated substitutions, converting sequences of tokens into // nodes, until no more substitutions can be made. $anyChanges = false; do { $anyChanges = false; foreach ($this->root()->readersBySubstitutePriority as $readerTuple) { /** @var int */ $pass = $readerTuple[0]; /** @var MDReader */ $reader = $readerTuple[1]; $changed = $reader->substituteTokens($this, $pass, $nodes); if (!$changed) continue; $anyChanges = true; break; } } while ($anyChanges); // Convert any remaining tokens to text nodes. Also apply any inline // CSS modifiers. $lastNode = null; $me = $this; $nodes = array_map(function($node) use (&$lastNode, $me, $nodes) { if ($node instanceof MDToken) { /** @var MDToken */ $token = $node; if ($token->type == MDTokenType::Modifier && $lastNode) { $me->root()->tagFilter->scrubModifier($token->modifier); $token->modifier->applyTo($lastNode); $lastNode = null; return new MDTextNode(''); } $lastNode = null; return new MDTextNode($token->original); } elseif ($node instanceof MDNode) { $lastNode = ($node instanceof MDTextNode) ? null : $node; return $node; } else { $nodeClassName = MDUtils::typename($node); throw new Error("Unexpected node type {$nodeClassName}"); } }, $nodes); return $nodes; } public $startTime; /** * Checks if parsing has taken an excessive length of time. Because I'm not * fully confident in my loops yet. :) */ public function checkExecutionTime(float $maxSeconds=1.0) { $elapsed = microtime(true) - $this->root()->startTime; if ($elapsed > $maxSeconds) { throw new Error("Markdown parsing taking too long. Infinite loop?"); } } /** * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader` * and `MDReferencedImageReader`. */ private array $referenceToURL = []; /** * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader` * and `MDReferencedImageReader`. */ private array $referenceToTitle = []; /** * Defines a URL by reference symbol. */ public function defineURL(string $reference, string $url, ?string $title=null) { $this->root()->referenceToURL[mb_strtolower($reference)] = $url; if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title; } /** * Returns the URL associated with a reference symbol. */ public function urlForReference(string $reference): ?string { return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null; } /** * Returns the link title associated with a reference symbol. */ public function urlTitleForReference(string $reference): ?string { return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null; } } /** * Defines a set of allowable HTML tags, attributes, and CSS. */ class MDHTMLFilter { /** * Mapping of permitted lowercase tag names to objects containing allowable * attributes for those tags. Does not need to include those attributes * defined in `$allowableGlobalAttributes`. * * Values are objects with allowable lowercase attribute names mapped to * allowable value patterns. A `*` means any value is acceptable. Multiple * allowable values can be joined together with `|`. These special symbols * represent certain kinds of values and can be used in combination or in * place of literal values. * * - `{classlist}`: A list of legal CSS classnames, separated by spaces * - `{int}`: An integer * - `{none}`: No value (an attribute with no `=` or value, like `checked`) * - `{style}`: One or more CSS declarations, separated by semicolons (simple * `key: value;` syntax only) * - `{url}`: A URL */ public array $allowableTags = [ 'address' => [ 'cite' => '{url}', ], 'h1' => [], 'h2' => [], 'h3' => [], 'h4' => [], 'h5' => [], 'h6' => [], 'blockquote' => [], 'dl' => [], 'dt' => [], 'dd' => [], 'div' => [], 'hr' => [], 'ul' => [], 'ol' => [ 'start' => '{int}', 'type' => 'a|A|i|I|1', ], 'li' => [ 'value' => '{int}', ], 'p' => [], 'pre' => [], 'table' => [], 'thead' => [], 'tbody' => [], 'tfoot' => [], 'tr' => [], 'td' => [], 'th' => [], 'a' => [ 'href' => '{url}', 'target' => '*', ], 'abbr' => [], 'b' => [], 'br' => [], 'cite' => [], 'code' => [], 'data' => [ 'value' => '*', ], 'dfn' => [], 'em' => [], 'i' => [], 'kbd' => [], 'mark' => [], 'q' => [ 'cite' => '{url}', ], 's' => [], 'samp' => [], 'small' => [], 'span' => [], 'strong' => [], 'sub' => [], 'sup' => [], 'time' => [ 'datetime' => '*', ], 'u' => [], 'var' => [], 'wbr' => [], 'img' => [ 'alt' => '*', 'href' => '{url}', ], 'figure' => [], 'figcaption' => [], 'del' => [], 'ins' => [], 'details' => [], 'summary' => [], ]; /** * Mapping of allowable lowercase global attributes to their permitted * values. Uses same value pattern syntax as described in `$allowableTags`. */ public array $allowableGlobalAttributes = [ 'class' => '{classlist}', 'data-*' => '*', 'dir' => 'ltr|rtl|auto', 'id' => '*', 'lang' => '*', 'style' => '{style}', 'title' => '*', 'translate' => 'yes|no|{none}', ]; /** * Mapping of allowable CSS style names to their allowable value patterns. * Multiple values can be delimited with `|` characters. Limited support * so far. * * Recognized special values: * - `{color}`: A hex or named color */ public array $allowableStyleKeys = [ 'background-color' => '{color}', 'color' => '{color}', ]; /** * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name * itself has already been whitelisted. * * @param MDHTMLTag $tag HTML tag */ public function scrubTag(MDHTMLTag $tag) { foreach ($tag->attributes as $name => $value) { if (!$this->isValidAttributeName($tag->tagName, $name)) { unset($tag->attributes[$name]); } if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) { unset($tag->attributes[$name]); } } } /** * Scrubs all forbidden attributes from an HTML modifier. * * @param MDTagModifier $modifier * @param ?string $tagName HTML tag name, if known, otherwise only * global attributes will be permitted */ public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) { if (sizeof($modifier->cssClasses) > 0) { $classList = implode(' ', $modifier->cssClasses); if (!$this->isValidAttributeValue($tagName, 'class', $classList)) { $modifier->cssClasses = []; } } if ($modifier->cssId !== null) { if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) { $modifier->cssId = null; } } if (!$this->isValidAttributeName($tagName, 'style')) { $modifier->cssStyles = []; } else { foreach ($modifier->cssStyles as $key => $val) { if (!$this->isValidStyleValue($key, $val)) { unset($modifier->cssStyles[$key]); } } } foreach ($modifier->attributes as $key => $val) { if (!$this->isValidAttributeValue($tagName, $key, $val)) { unset($modifier->attributes[$key]); } } } /** * Tests if an HTML tag name is permitted. */ public function isValidTagName(string $tagName): bool { return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null; } /** * Tests if an HTML attribute name is permitted. */ public function isValidAttributeName(?string $tagName, string $attributeName): bool { $lcAttributeName = mb_strtolower($attributeName); if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) { return true; } foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) { if (!str_ends_with($pattern, '*')) continue; $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1); if (str_starts_with($lcAttributeName, $patternPrefix)) { return true; } } if ($tagName === null) return false; $lcTagName = mb_strtolower($tagName); $tagAttributes = $this->allowableTags[$lcTagName]; if ($tagAttributes !== null) { return ($tagAttributes[$lcAttributeName] ?? null) !== null; } return false; } /** * Tests if an attribute value is allowable. */ public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool { $lcAttributeName = mb_strtolower($attributeName); $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null; if ($globalPattern !== null) { return $this->attributeValueMatchesPattern($attributeValue, $globalPattern); } foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) { if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) { return $this->attributeValueMatchesPattern($attributeValue, $valuePattern); } } if ($tagName === null) return false; $lcTagName = mb_strtolower($tagName); $tagAttributes = $this->allowableTags[$lcTagName] ?? null; if ($tagAttributes === null) return false; $valuePattern = $tagAttributes[$lcAttributeName] ?? null; if ($valuePattern === null) return false; return $this->attributeValueMatchesPattern($attributeValue, $valuePattern); } private const permissiveURLRegex = '^\\S+$'; private const integerRegex = '^[\\-]?\\d+$'; private const classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$'; private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool { $options = explode('|', $pattern); foreach ($options as $option) { switch ($option) { case '*': return true; case '{classlist}': if (mb_eregi(self::classListRegex, $value)) return true; break; case '{int}': if (mb_eregi(self::integerRegex, $value)) return true; break; case '{none}': if ($value === true) return true; break; case '{style}': if ($this->isValidStyleDeclaration($value)) return true; break; case '{url}': if (mb_eregi(self::permissiveURLRegex, $value)) return true; break; default: if ($value === $option) return true; break; } } return false; } /** * Tests if a string of one or more style `key: value;` declarations is * fully allowable. */ public function isValidStyleDeclaration(string $styles): bool { $settings = explode(';', $styles); foreach ($settings as $setting) { if (mb_strlen(trim($setting)) == 0) continue; $parts = explode(':', $setting); if (sizeof($parts) != 2) return false; $name = trim($parts[0]); if (!$this->isValidStyleKey($name)) return false; $value = trim($parts[1]); if (!$this->isValidStyleValue($name, $value)) return false; } return true; } /** * Tests if a CSS style key is allowable. */ public function isValidStyleKey(string $key): bool { return ($this->allowableStyleKeys[$key] ?? null) !== null; } /** * Tests if a CSS style value is allowable. */ public function isValidStyleValue(string $key, string $value): bool { $pattern = $this->allowableStyleKeys[$key] ?? null; if ($pattern === null) return false; $options = explode('|', $pattern); foreach ($options as $option) { switch ($option) { case '{color}': if ($this->isValidCSSColor($value)) return true; default: if ($value === $option) return true; } } return false; } private const styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$'; private function isValidCSSColor(string $value): bool { return mb_eregi(self::styleColorRegex, $value); } } /** * Represents a single HTML tag. Paired tags are represented separately. */ class MDHTMLTag { /** * Verbatim string of the original parsed tag. Not modified. Should be * considered unsafe for inclusion in the final document. Use `->toString()` * instead. */ public string $original; public string $tagName; public bool $isCloser; /** * Map of attribute names to value strings. */ public array $attributes; /** * @param string $original * @param string $tagName * @param bool $isCloser * @param array $attributes */ public function __construct(string $original, string $tagName, bool $isCloser, array $attributes) { $this->original = $original; $this->tagName = $tagName; $this->isCloser = $isCloser; $this->attributes = $attributes; } public function __toString(): string { if ($this->isCloser) { return "tagName}>"; } $html = '<'; $html .= $this->tagName; foreach ($this->attributes as $key => $value) { $safeName = MDUtils::scrubAttributeName($key); if ($value === true) { $html .= " {$safeName}"; } else { $escapedValue = htmlentities("{$value}"); $html .= " {$safeName}=\"{$escapedValue}\""; } } $html .= '>'; return $html; } public function equals($other): bool { if (!($other instanceof MDHTMLTag)) return false; if ($other->tagName != $this->tagName) return false; if ($other->isCloser != $this->isCloser) return false; return MDUtils::equal($other->attributes, $this->attributes); } private const htmlTagNameFirstRegex = '[a-z]'; private const htmlTagNameMedialRegex = '[a-z0-9]'; private const htmlAttributeNameFirstRegex = '[a-z]'; private const htmlAttributeNameMedialRegex = '[a-z0-9-]'; private const whitespaceCharRegex = '\\s'; /** * Checks the start of the given string for presence of an HTML tag. */ public static function fromLineStart(string $line): ?MDHTMLTag { $expectOpenBracket = 0; $expectCloserOrName = 1; $expectName = 2; $expectAttributeNameOrEnd = 3; $expectEqualsOrAttributeOrEnd = 4; $expectAttributeValue = 5; $expectCloseBracket = 6; $isCloser = false; $tagName = ''; $attributeName = ''; $attributeValue = ''; $attributeQuote = null; $attributes = []; $fullTag = null; $endAttribute = function(bool $unescape=false) use (&$attributes, &$attributeName, &$attributeValue, &$attributeQuote) { if (mb_strlen($attributeName) > 0) { if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) { $attributes[$attributeName] = $unescape ? html_entity_decode($attributeValue, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') : $attributeValue; } else { $attributes[$attributeName] = true; } } $attributeName = ''; $attributeValue = ''; $attributeQuote = null; }; $expect = $expectOpenBracket; for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) { $ch = mb_substr($line, $p, 1); $isWhitespace = mb_eregi(self::whitespaceCharRegex, $ch); switch ($expect) { case $expectOpenBracket: if ($ch != '<') return null; $expect = $expectCloserOrName; break; case $expectCloserOrName: if ($ch == '/') { $isCloser = true; } else { $p--; } $expect = $expectName; break; case $expectName: if (mb_strlen($tagName) == 0) { if (!mb_eregi(self::htmlTagNameFirstRegex, $ch)) return null; $tagName .= $ch; } else { if (mb_eregi(self::htmlTagNameMedialRegex, $ch)) { $tagName .= $ch; } else { $p--; $expect = ($isCloser) ? $expectCloseBracket : $expectAttributeNameOrEnd; } } break; case $expectAttributeNameOrEnd: if (mb_strlen($attributeName) == 0) { if ($isWhitespace) { // skip whitespace } elseif ($ch == '/') { $expect = $expectCloseBracket; } elseif ($ch == '>') { $fullTag = mb_substr($line, 0, $p + 1); break; } elseif (mb_eregi(self::htmlAttributeNameFirstRegex, $ch)) { $attributeName .= $ch; } else { return null; } } elseif ($isWhitespace) { $expect = $expectEqualsOrAttributeOrEnd; } elseif ($ch == '/') { $endAttribute(); $expect = $expectCloseBracket; } elseif ($ch == '>') { $endAttribute(); $fullTag = mb_substr($line, 0, $p + 1); break; } elseif ($ch == '=') { $expect = $expectAttributeValue; } elseif (mb_eregi(self::htmlAttributeNameMedialRegex, $ch)) { $attributeName .= $ch; } else { return null; } break; case $expectEqualsOrAttributeOrEnd: if ($ch == '=') { $expect = $expectAttributeValue; } elseif ($isWhitespace) { // skip whitespace } elseif ($ch == '/') { $expect = $expectCloseBracket; } elseif ($ch == '>') { $fullTag = mb_substr($line, 0, $p + 1); break; } elseif (mb_eregi(self::htmlAttributeNameFirstRegex, $ch)) { $endAttribute(); $expect = $expectAttributeNameOrEnd; $p--; } break; case $expectAttributeValue: if (mb_strlen($attributeValue) == 0) { if ($attributeQuote === null) { if ($isWhitespace) { // skip whitespace } elseif ($ch == '"' || $ch == "'") { $attributeQuote = $ch; } else { $attributeQuote = ''; // explicitly unquoted $p--; } } else { if ($ch === $attributeQuote) { // Empty string $endAttribute($attributeQuote != ''); $expect = $expectAttributeNameOrEnd; } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) { return null; } else { $attributeValue .= $ch; } } } else { if ($ch === $attributeQuote) { $endAttribute($attributeQuote != ''); $expect = $expectAttributeNameOrEnd; } elseif ($attributeQuote === '' && $isWhitespace) { $endAttribute(); $expect = $expectAttributeNameOrEnd; } else { $attributeValue .= $ch; } } break; case $expectCloseBracket: if ($isWhitespace) { // ignore whitespace } elseif ($ch == '>') { $fullTag = mb_substr($line, 0, $p + 1); break; } break; } } if ($fullTag === null) return null; $endAttribute(); return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes); } } /** * Represents HTML modifications to a node, such as CSS classes to add or * additional attributes. See `MDHTMLFilter->scrubModifier()` to remove disallowed * values. */ class MDTagModifier { /** * Verbatim markdown syntax. Unmodified by changes to other properties. */ public string $original; /** @var string[] */ public array $cssClasses = []; public ?string $cssId = null; public array $cssStyles = []; public array $attributes = []; private const leadingClassRegex = '^\\{([^}]+?)}'; private const trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$'; private const classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname private const idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id private const attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value public function applyTo(MDNode $node) { if ($node instanceof MDNode) { foreach ($this->cssClasses as $cssClass) { $node->addClass($cssClass); } if ($this->cssId) $node->cssId = $this->cssId; foreach ($this->attributes as $name => $value) { $node->attributes[$name] = $value; } foreach ($this->cssStyles as $name => $value) { $node->cssStyles[$name] = $value; } } } /** * Adds a CSS class. If already present it will not be duplicated. */ public function addClass(string $cssClass): bool { if (array_search($cssClass, $this->cssClasses) !== false) return false; array_push($this->cssClasses, $cssClass); return true; } /** * Removes a CSS class. */ public function removeClass(string $cssClass): bool { $beforeLength = sizeof($this->cssClasses); $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]); return sizeof($this->cssClasses) != $beforeLength; } public function equals($other): bool { if (!($other instanceof MDTagModifier)) return false; if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false; if ($other->cssId !== $this->cssId) return false; if (!MDUtils::equal($other->attributes, $this->attributes)) return false; return true; } public function __toString(): string { return $this->original; } private static function styleToObject(string $styleValue): array { $pairs = explode(';', $styleValue); $styles = []; foreach ($pairs as $pair) { $keyAndValue = explode(':', $pair); if (sizeof($keyAndValue) != 2) continue; $styles[$keyAndValue[0]] = $keyAndValue[1]; } return $styles; } private static function fromContents(string $contents): ?MDTagModifier { $modifierTokens = mb_split('\\s+', $contents); $mod = new MDTagModifier(); $mod->original = "{{$contents}}"; foreach ($modifierTokens as $token) { if (trim($token) == '') continue; if (mb_eregi(self::classRegex, $token, $groups)) { $mod->addClass($groups[1]); } elseif (mb_eregi(self::idRegex, $token, $groups)) { $mod->cssId = $groups[1]; } elseif (mb_eregi(self::attributeRegex, $token, $groups)) { if ($groups[1] == 'style') { $mod->cssStyles = self::styleToObject($groups[2]); } else { $mod->attributes[$groups[1]] = $groups[2]; } } else { return null; } } return $mod; } /** * Extracts block modifier from end of a line. Always returns a 2-element * tuple array: * - `0`: the line without the modifier * - `1`: an `MDTagModifier` if found or `null` if not * * @param string $line * @param ?MDState $state * @return array tuple with remaining line and `MDTagModifier` or `null` */ public static function fromLine(string $line, ?MDState $state): array { if ($state) { $found = false; foreach ($state->root()->readersByBlockPriority as $reader) { if ($reader instanceof MDModifierReader) { $found = true; break; } } if (!$found) return [ $line, null ]; } if (!mb_eregi(self::trailingClassRegex, $line, $groups)) return [ $line, null ]; $bareLine = $groups[1]; $mod = self::fromContents($groups[2]); return [ $bareLine, $mod ]; } /** * Attempts to extract modifier from head of string. */ public static function fromStart(string $line): ?MDTagModifier { if (!mb_eregi(self::leadingClassRegex, $line, $groups)) return null; return self::fromContents($groups[1]); } /** * Discards any modifiers from a line and returns what remains. */ public static function strip(string $line): string { if (!mb_eregi(self::trailingClassRegex, $line, $groups)) return $line; return $groups[1]; } } // -- Readers --------------------------------------------------------------- /** * Base class for readers of various markdown syntax. A `Markdown` instance can * be created with any combination of subclasses of these to customize the * flavor of markdown parsed. * * @see {@link custom.md} for details on subclassing */ class MDReader { /** * Called before processing begins. `$state->lines` is populated and the * line pointer `$state->p` will be at `0`. * * Default implementation does nothing. */ public function preProcess(MDState $state) {} /** * Attempts to read an `MDBlockNode` subclass at the current line pointer * `$state->p`. Only matches if the block pattern starts at the line pointer, * not elsewhere in the `$state->lines` array. If a block is found, `$state->p` * should be incremented to the next line _after_ the block structure and * a `MDBlockNode` subclass instance is returned. If no block is found, * returns `null`. * * Default implementation always returns `null`. */ public function readBlock(MDState $state): ?MDBlockNode { return null; } /** * Attempts to read an inline token from the beginning of `$line`. Only the * start of the given `$line` is considered. If a matching token is found, an * `MDToken` is returned. Otherwise `null` is returned. * * Default implementation always returns `null`. */ public function readToken(MDState $state, string $line): ?MDToken { return null; } /** * Attempts to find a pattern anywhere in `$tokens` and perform a _single_ * in-place substitution with one or more `MDNode` subclass instances. * If a substitution is performed, must return `true`, otherwise `false`. * * Default implementation always returns `false`. * * @param MDState $state * @param int $pass what substitution pass this is, starting with 1 * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and * `MDInlineNode` elements * @return bool `true` if a substitution was performed, `false` if not */ public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return false; } /** * Called after all parsing has completed. An array `$blocks` is passed of * all the top-level `MDBlockNode` elements in the document which this * method can traverse or alter in-place via `array_splice` operations if * necessary. * * `MDNode->visitChildren` is useful for recursively looking for certain * `MDNode` instances. `MDNode::replaceNodes` is useful for swapping in * replacements. * * Default implementation does nothing. * * @param MDState $state * @param MDBlockNode[] $blocks */ public function postProcess(MDState $state, array &$blocks) {} /** * Can be overridden to influence ordering of this reader with respect to * another during the block parsing phase. Return `-1` to be ordered before * the given reader, `1` to be ordered after it, or `0` for no preference. * Only return non-`0` values to resolve specific conflicts. * * Default implementation always returns `0` (no preference). * * @param MDReader $other * @return int a negative, positive, or 0 value to be ordered before, * after, or anwhere relative to `$other`, respectively */ public function compareBlockOrdering(MDReader $other): int { return 0; } /** * Can be overridden to influence ordering of this reader with respect to * another during the tokenizing phase. Return `-1` to be ordered before * the given reader, `1` to be ordered after it, or `0` for no preference. * Only return non-`0` values to resolve specific conflicts. * * Default implementation always returns `0` (no preference). * * @param MDReader $other * @return int a negative, positive, or 0 value to be ordered before, * after, or anwhere relative to `$other`, respectively */ public function compareTokenizeOrdering(MDReader $other): int { return 0; } /** * Can be overridden to influence ordering of this reader with respect to * another during the substitution phase. Return `-1` to be ordered before * the given reader, `1` to be ordered after it, or `0` for no preference. * Only return non-`0` values to resolve specific conflicts. * * Readers are sorted within each substitution pass. All pass 1 readers are * processed first, then all pass 2 readers, etc. The number of passes this * reader participates in is dictated by `substitionPassCount()`. * * Default implementation always returns `0` (no preference). * * @param MDReader $other * @param int $pass substitution pass, with numbering starting at `1` * @return int a negative, positive, or 0 value to be ordered before, * after, or anwhere relative to `$other`, respectively */ public function compareSubstituteOrdering(MDReader $other, int $pass): int { return 0; } /** * How many substitution passes this reader requires. Substitution allows * all pass 1 readers to process first, then all pass 2 readers, etc. */ public function substitutionPassCount(): int { return 1; } /** * For sorting readers with ordering preferences. The `compare` methods * don't have the properties of normal sorting compares so need to sort * differently. * * @param MDReader[] $arr array to sort * @param callable $compareFn comparison function, taking two array element * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b, * respectively * @param callable $idFn function for returning a unique hashable id for * the array element * @return MDReader[] sorted array */ private static function kahnTopologicalSort(array $arr, callable $compareFn, callable $idFn): array { $graph = []; $inDegrees = []; $valuesById = []; // Build the graph and compute in-degrees foreach ($arr as $index => $elem) { $id = $idFn($elem); $graph[$id] = []; $inDegrees[$id] = 0; $valuesById[$id] = $elem; } for ($i = 0; $i < sizeof($arr); $i++) { $elemA = $arr[$i]; $idA = $idFn($elemA); for ($j = 0; $j < sizeof($arr); $j++) { if ($i === $j) continue; $elemB = $arr[$j]; $idB = $idFn($elemB); $comparisonResult = $compareFn($elemA, $elemB); if ($comparisonResult < 0) { array_push($graph[$idA], $idB); $inDegrees[$idB]++; } elseif ($comparisonResult > 0) { array_push($graph[$idB], $idA); $inDegrees[$idA]++; } } } // Initialize the queue with zero-inDegree nodes $queue = []; foreach ($inDegrees as $elemId => $degree) { if ($degree === 0) { array_push($queue, $elemId); } } // Process the queue and build the topological order list $sorted = []; while (sizeof($queue) > 0) { $elemId = array_shift($queue); array_push($sorted, $valuesById[$elemId]); unset($valuesById[$elemId]); foreach ($graph[$elemId] as $neighbor) { $inDegrees[$neighbor]--; if ($inDegrees[$neighbor] === 0) { array_push($queue, $neighbor); } } } // Anything left over can go at the end. No ordering dependencies. foreach ($valuesById as $elemId => $value) { array_push($sorted, $value); } return $sorted; } /** * Returns a sorted array of readers by their block priority preferences. * * @param MDReader[] $readers * @return MDReader[] sorted readers */ public static function sortReaderForBlocks(array &$readers): array { $sorted = $readers; return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int { return $a->compareBlockOrdering($b); }, fn($elem) => MDUtils::typename($elem)); } /** * Returns a sorted array of readers by their tokenization priority preferences. * * @param MDReader[] $readers * @return MDReader[] sorted readers */ public static function sortReadersForTokenizing(array &$readers): array { $sorted = $readers; return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int { return $a->compareTokenizeOrdering($b); }, fn($elem) => MDUtils::typename($elem)); } /** * Returns a sorted array of tuples (arrays) containing the substitution * pass number and reader instance, sorted by their substitution priority * preferences. * * For readers with `substitutionPassCount()` > `1`, the same reader will * appear multiple times in the resulting array, one per pass. * * @param MDReader[] $readers * @return MDReader[] sorted array of tuples with the pass number and * reader instance in each */ public static function sortReadersForSubstitution(array &$readers): array { $tuples = []; $maxPass = 1; foreach ($readers as $reader) { $passCount = $reader->substitutionPassCount(); $maxPass = max($maxPass, $passCount); for ($pass = 1; $pass <= $passCount; $pass++) { array_push($tuples, [ $pass, $reader ]); } } $result = []; for ($pass = 1; $pass <= $maxPass; $pass++) { $readersThisPass = array_values(array_filter($tuples, fn($tup) => $tup[0] === $pass)); $passResult = self::kahnTopologicalSort($readersThisPass, function(array $a, array $b) use ($pass): int { $aReader = $a[1]; $bReader = $b[1]; return $aReader->compareSubstituteOrdering($bReader, $pass); }, fn($elem) => MDUtils::typename($elem[1])); $result = array_merge($result, $passResult); } return $result; } } /** * Reads markdown blocks for headings denoted with the underline syntax. * * Supports `MDTagModifier` suffixes. */ class MDUnderlinedHeadingReader extends MDReader { public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; if (!$state->hasLines(2)) return null; $modifier; $contentLine = trim($state->lines[$p++]); [$contentLine, $modifier] = MDTagModifier::fromLine($contentLine, $state); $underLine = trim($state->lines[$p++]); if ($contentLine == '') return null; if (mb_eregi('^=+$', $underLine)) { $state->p = $p; $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine)); if ($modifier) $modifier->applyTo($block); return $block; } if (mb_eregi('^\-+$', $underLine)) { $state->p = $p; $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine)); if ($modifier) $modifier->applyTo($block); return $block; } return null; } } /** * Reads markdown blocks for headings denoted with hash marks. Heading levels 1 * to 6 are supported. * * Supports `MDTagModifier` suffixes. */ class MDHashHeadingReader extends MDReader { private const hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $line = $state->lines[$p++]; $modifier; [$line, $modifier] = MDTagModifier::fromLine($line, $state); if (!mb_eregi(self::hashHeadingRegex, $line, $groups)) return null; $state->p = $p; $level = mb_strlen($groups[1]); $content = $groups[2]; $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content)); if ($modifier) $modifier->applyTo($block); return $block; } } /** * Reads subtext blocks. Subtext is smaller, fainter text for things like * disclaimers or sources. * * Supports `MDTagModifier` suffixes. */ class MDSubtextReader extends MDReader { private const subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $line = $state->lines[$p++]; $modifier; [$line, $modifier] = MDTagModifier::fromLine($line, $state); if (!mb_eregi(self::subtextRegex, $line, $groups)) return null; $state->p = $p; $content = $groups[1]; $block = new MDSubtextNode($state->inlineMarkdownToNodes($content)); if ($modifier) $modifier->applyTo($block); return $block; } public function compareBlockOrdering(MDReader $other): int { if ($other instanceof MDUnorderedListReader) { return -1; } return 0; } } /** * Reads markdown blocks for blockquoted text. */ class MDBlockQuoteReader extends MDReader { public function readBlock(MDState $state): ?MDBlockNode { $blockquoteLines = []; $p = $state->p; while ($p < sizeof($state->lines)) { $line = $state->lines[$p++]; if (str_starts_with($line, ">")) { array_push($blockquoteLines, $line); } else { break; } } if (sizeof($blockquoteLines) == 0) return null; $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '', mb_substr($line, 1)), $blockquoteLines); $substate = $state->copy($contentLines); $quotedBlocks = $substate->readBlocks(); $state->p = $p; return new MDBlockquoteNode($quotedBlocks); } } /** * Internal abstract base class for ordered and unordered lists. */ class _MDListReader extends MDReader { private static function readItemLines(MDState $state, int $firstLineStartPos): array { $p = $state->p; $lines = []; $seenBlankLine = false; $stripTrailingBlankLines = true; while ($state->hasLines(1, $p)) { $isFirstLine = ($p == $state->p); $line = $state->lines[$p++]; if ($isFirstLine) { $line = mb_substr($line, $firstLineStartPos); } if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) { // Found next list item $stripTrailingBlankLines = false; // because this signals extra spacing intended break; } $isBlankLine = trim($line) == ''; $isIndented = mb_eregi('^\\s+\\S', $line); if ($isBlankLine) { $seenBlankLine = true; } elseif (!$isIndented && $seenBlankLine) { // Post-list content break; } array_push($lines, $line); } $lines = MDUtils::withoutTrailingBlankLines($lines); return MDUtils::stripIndent($lines); } protected function readListItemContent(MDState $state, int $firstLineStartPos): MDBlockNode { $itemLines = $this->readItemLines($state, $firstLineStartPos); $state->p += max(sizeof($itemLines), 1); if (sizeof($itemLines) == 1) { return new MDBlockNode($state->inlineMarkdownToNodes($itemLines[0])); } $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0; if ($hasBlankLines) { $substate = $state->copy($itemLines); $blocks = $substate->readBlocks(); return (sizeof($blocks) == 1) ? $blocks[0] : new MBlockDNode($blocks); } // Multiline content with no blank lines. Search for new block // boundaries without the benefit of a blank line to demarcate it. for ($p = 1; $p < sizeof($itemLines); $p++) { $line = $itemLines[$p]; if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) { // Nested list found $firstBlock = new MDBlockNode($state->inlineMarkdownToNodes( implode("\n", array_slice($itemLines, 0, $p)))); $substate = $state->copy(array_slice($itemLines, $p)); $blocks = $substate->readBlocks(); return new MDBlockNode(array_merge([ $firstBlock ], $blocks)); } } // Ok, give up and just do a standard block read { $substate = $state->copy($itemLines); $blocks = $substate->readBlocks(); return (sizeof($blocks) == 1) ? $blocks[0] : new MDBlockNode($blocks); } } public function readBlock(MDState $state): ?MDBlockNode { $className = MDUtils::typename($this); throw new Error("Abstract readBlock must be overridden in {$className}"); } } /** * Block reader for unordered (bulleted) lists. */ class MDUnorderedListReader extends _MDListReader { private const unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content private function readUnorderedListItem(MDState $state): ?MDListItemNode { if (!$state->hasLines(1)) return null; $p = $state->p; $line = $state->lines[$p]; if (!mb_eregi(self::unorderedListRegex, $line, $groups)) return null; $firstLineOffset = mb_strlen($groups[1]); return new MDListItemNode($this->readListItemContent($state, $firstLineOffset)); } public function readBlock(MDState $state): ?MDBlockNode { $items = []; $item = null; do { $item = $this->readUnorderedListItem($state); if ($item) array_push($items, $item); } while ($item); if (sizeof($items) == 0) return null; return new MDUnorderedListNode($items); } } /** * Block reader for ordered (numbered) lists. The number of the first item is * used to begin counting. The subsequent items increase by 1, regardless of * their value. */ class MDOrderedListReader extends _MDListReader { private const orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content private function readOrderedListItem(MDState $state): ?MDListItemNode { if (!$state->hasLines(1)) return null; $p = $state->p; $line = $state->lines[$p]; if (!mb_eregi(self::orderedListRegex, $line, $groups)) return null; $ordinal = intval($groups[1]); $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]); return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal); } public function readBlock(MDState $state): ?MDBlockNode { $items = []; $item = null; do { $item = $this->readOrderedListItem($state); if ($item) array_push($items, $item); } while ($item); if (sizeof($items) == 0) return null; return new MDOrderedListNode($items, $items[0]->ordinal); } } /** * Block reader for code blocks denoted by pairs of triple tickmarks. If * a programming language name, _xyz_, immediately follows the backticks, a * `language-xyz` CSS class will be added to the resulting `` * element. * * Supports `MDTagModifier` suffix. */ class MDFencedCodeBlockReader extends MDReader { public function readBlock(MDState $state): ?MDBlockNode { if (!$state->hasLines(2)) return null; $p = $state->p; $openFenceLine = $state->lines[$p++]; [$openFenceLine, $modifier] = MDTagModifier::fromLine($openFenceLine, $state); if (!mb_eregi('```\\s*([a-z0-9]*)\\s*$', $openFenceLine, $groups)) return null; $language = mb_strlen($groups[1]) > 0 ? $groups[1] : null; $codeLines = []; while ($state->hasLines(1, $p)) { $line = $state->lines[$p++]; if (trim($line) == '```') { $state->p = $p; $block = new MDCodeBlockNode(implode("\n", $codeLines), $language); if ($modifier) $modifier->applyTo($block); return $block; } array_push($codeLines, $line); } return null; } } /** * Block reader for code blocks denoted by indenting text. */ class MDIndentedCodeBlockReader extends MDReader { public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $codeLines = []; while ($state->hasLines(1, $p)) { $line = $state->lines[$p++]; if (MDUtils::countIndents($line, true) < 1) { $p--; break; } array_push($codeLines, MDUtils::stripIndent($line)); } if (sizeof($codeLines) == 0) return null; $state->p = $p; return new MDCodeBlockNode(implode("\n", $codeLines)); } } /** * Block reader for horizontal rules. Composed of three or more hypens or * asterisks on a line by themselves, with or without intermediate whitespace. */ class MDHorizontalRuleReader extends MDReader { private const horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$'; public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $line = $state->lines[$p++]; [$line, $modifier] = MDTagModifier::fromLine($line, $state); if (mb_eregi(self::horizontalRuleRegex, $line)) { $state->p = $p; $block = new MDHorizontalRuleNode(); if ($modifier) $modifier->applyTo($block); return $block; } return null; } public function compareBlockOrdering(MDReader $other): int { if ($other instanceof MDUnorderedListReader) { return -1; } return 0; } } /** * Block reader for tables. * * Supports `MDTagModifier` suffix. */ class MDTableReader extends MDReader { private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode { if (!$state->hasLines(1)) return null; $p = $state->p; $line = MDTagModifier::strip(trim($state->lines[$p++])); if (!mb_eregi('.*\\|.*', $line)) return null; if (str_starts_with($line, '|')) $line = mb_substr($line, 1); if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1); $cellTokens = explode('|', $line); $cells = array_map(function($token) use ($isHeader, $state) { $content = $state->inlineMarkdownToNode(trim($token)); return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content); }, $cellTokens); $state->p = $p; return new MDTableRowNode($cells); } /** * @param string $line * @return string[] */ private function parseColumnAlignments(string $line): array { $line = trim($line); if (str_starts_with($line, '|')) $line = mb_substr($line, 1); if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1); return array_map(function($token) { if (str_starts_with($token, ':')) { if (str_ends_with($token, ':')) { return 'center'; } return 'left'; } elseif (str_ends_with($token, ':')) { return 'right'; } return null; }, mb_split('\\s*\\|\\s*', $line)); } private const tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$'; public function readBlock(MDState $state): ?MDBlockNode { if (!$state->hasLines(2)) return null; $startP = $state->p; $firstLine = $state->lines[$startP]; $modifier = MDTagModifier::fromLine($firstLine, $state)[1]; $headerRow = $this->readTableRow($state, true); if ($headerRow === null) { $state->p = $startP; return null; } $dividerLine = $state->lines[$state->p++]; if (!mb_eregi(self::tableDividerRegex, $dividerLine, $dividerGroups)) { $state->p = $startP; return null; } $columnAlignments = $this->parseColumnAlignments($dividerLine); $bodyRows = []; while ($state->hasLines(1)) { $row = $this->readTableRow($state, false); if ($row === null) break; array_push($bodyRows, $row); } $table = new MDTableNode($headerRow, $bodyRows); $table->columnAlignments = $columnAlignments; if ($modifier) $modifier->applyTo($table); return $table; } } /** * Block reader for definition lists. Definitions go directly under terms starting * with a colon. */ class MDDefinitionListReader extends MDReader { public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $groups; $termCount = 0; $definitionCount = 0; $defLines = []; while ($state->hasLines(1, $p)) { $line = $state->lines[$p++]; if (trim($line) === '') { break; } if (mb_eregi('^\\s+', $line)) { if (sizeof($defLines) == 0) return null; $defLines[sizeof($defLines) - 1] .= "\n" . $line; } elseif (mb_eregi('^:\\s+', $line)) { array_push($defLines, $line); $definitionCount++; } else { array_push($defLines, $line); $termCount++; } } if ($termCount == 0 || $definitionCount == 0) return null; $blocks = array_map(function($line) use ($state) { if (mb_eregi('^:\\s+(.*?)$', $line, $groups)) { return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1])); } else { return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line)); } }, $defLines); $state->p = $p; return new MDDefinitionListNode($blocks); } } /** * Block reader for defining footnote contents. Footnotes can be defined anywhere * in the document but will always be rendered at the end of a page or end of * the document. */ class MDFootnoteReader extends MDReader { private const footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title private const footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol /** * @param MDState $state * @param string $symbol * @param MDNode[] $footnote */ private function defineFootnote(MDState $state, string $symbol, array $footnote) { $footnotes = $state->root()->userInfo['footnotes'] ?? []; $footnotes[$symbol] = $footnote; $state->root()->userInfo['footnotes'] = $footnotes; } private function registerUniqueInstance(MDState $state, string $symbol, int $unique) { $footnoteInstances = $state->root()->userInfo['footnoteInstances']; $instances = $footnoteInstances[$symbol] ?? []; array_push($instances, $unique); $footnoteInstances[$symbol] = $instances; $state->root()->userInfo['footnoteInstances'] = $footnoteInstances; } private function idForFootnoteSymbol(MDState $state, string $symbol): int { $footnoteIds = $state->root()->userInfo['footnoteIds'] ?? []; $existing = $footnoteIds[$symbol] ?? null; if ($existing !== null) return $existing; $nextFootnoteId = $state->root()->userInfo['nextFootnoteId'] ?? 1; $id = $nextFootnoteId++; $footnoteIds[$symbol] = $id; $state->root()->userInfo['nextFootnoteId'] = $nextFootnoteId; $state->root()->userInfo['footnoteIds'] = $footnoteIds; return $id; } public function preProcess(MDState $state) { $state->root()->userInfo['footnoteInstances'] = []; $state->root()->userInfo['footnotes'] = []; $state->root()->userInfo['footnoteIds'] = []; $state->root()->userInfo['nextFootnoteId'] = 1; } public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null; $symbol = $groups[1]; $def = $groups[2]; while ($state->hasLines(1, $p)) { $line = $state->lines[$p++]; if (mb_eregi('^\\s+', $line)) { $def .= "\n" . $line; } else { $p--; break; } } $content = $state->inlineMarkdownToNodes($def); $this->defineFootnote($state, $symbol, $content); $state->p = $p; return new MDBlockNode(); // empty } public function readToken(MDState $state, string $line): ?MDToken { $groups; if (mb_eregi(self::footnoteWithTitleRegex, $line, $groups)) { return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]); } if (mb_eregi(self::footnoteRegex, $line, $groups)) { return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]); } return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) { $symbol = $match->tokens[0]->content; array_splice($tokens, $match->index, 1, [new MDFootnoteNode($symbol)]); return true; } return false; } /** * @param MDState $state * @param MDBlockNode[] $blocks */ public function postProcess(MDState $state, array &$blocks) { $nextOccurrenceId = 1; foreach ($blocks as $block) { $block->visitChildren(function($node) use (&$nextOccurrenceId, $state) { if (!($node instanceof MDFootnoteNode)) return; $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol); $node->occurrenceId = $nextOccurrenceId++; $node->displaySymbol = strval($node->footnoteId); $this->registerUniqueInstance($state, $node->symbol, $node->occurrenceId); }); } if (sizeof($state->userInfo['footnotes']) == 0) return; array_push($blocks, new MDFootnoteListNode()); } public function compareBlockOrdering(MDReader $other): int { if ($other instanceof MDLinkReader || $other instanceof MDImageReader) { return -1; } return 0; } public function compareTokenizeOrdering(MDReader $other): int { if ($other instanceof MDLinkReader || $other instanceof MDImageReader) { return -1; } return 0; } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if ($other instanceof MDLinkReader || $other instanceof MDImageReader) { return -1; } return 0; } } /** * Block reader for abbreviation definitions. Anywhere the abbreviation appears * in plain text will have its definition available when hovering over it. * Definitions can appear anywhere in the document. Their content should only * contain simple text, not markdown. */ class MDAbbreviationReader extends MDReader { private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) { $abbrevs = $state->root()->userInfo['abbreviations']; $abbrevs[$abbreviation] = $definition; $state->root()->userInfo['abbreviations'] = $abbrevs; } public function preProcess(MDState $state) { $state->root()->userInfo['abbreviations'] = []; } public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $line = $state->lines[$p++]; if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null; $abbrev = $groups[1]; $def = $groups[2]; $this->defineAbbreviation($state, $abbrev, $def); $state->p = $p; return new MDBlockNode(); // empty } /** * @param MDState $state * @param MDNode[] $blocks */ public function postProcess(MDState $state, array &$blocks) { $abbreviations = $state->root()->userInfo['abbreviations']; MDNode::replaceNodes($state, $blocks, function($original) use ($abbreviations) { if (!($original instanceof MDTextNode)) return null; $changed = false; $elems = [ $original->text ]; // mix of strings and MDNodes for ($i = 0; $i < sizeof($elems); $i++) { $text = $elems[$i]; if (!is_string($text)) continue; foreach ($abbreviations as $abbreviation => $definition) { $index = strpos($text, $abbreviation); if ($index === false) continue; $prefix = substr($text, 0, $index); $suffix = substr($text, $index + strlen($abbreviation)); array_splice($elems, $i, 1, [$prefix, new MDAbbreviationNode($abbreviation, $definition), $suffix]); $i = -1; // start over $changed = true; break; } } if (!$changed) return null; $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem, $elems); return new MDNode($nodes); }); } } /** * Block reader for simple paragraphs. Paragraphs are separated by a blank (or * whitespace-only) line. This reader is prioritized after every other reader * since there is no distinguishing syntax. */ class MDParagraphReader extends MDReader { public function readBlock(MDState $state): ?MDBlockNode { $paragraphLines = []; $p = $state->p; while ($state->hasLines(1, $p)) { $line = $state->lines[$p++]; if (trim($line) === '') { break; } array_push($paragraphLines, $line); } if ($state->p == 0 && $p >= sizeof($state->lines)) { // If it's the entire document don't wrap it in a paragraph return null; } if (sizeof($paragraphLines) > 0) { $state->p = $p; $content = implode("\n", $paragraphLines); return new MDParagraphNode($state->inlineMarkdownToNodes($content)); } return null; } public function compareBlockOrdering(MDReader $other): int { return 1; // always dead last } } /** * Abstract base class for readers that look for one or two delimiting tokens * on either side of some content. E.g. `**strong**`. */ class MDSimplePairInlineReader extends MDReader { // Passes: // 1. Syntaxes with two delimiting tokens, interior tokens of the same // kind must be even in number // 2. Syntaxes with one delimiting token, interior tokens of the same // kind must be even in number // 3. Syntaxes with two delimiting tokens, any tokens inside // 4. Syntaxes with one delimiting token, any tokens inside public function substitutionPassCount(): int { return 4; } /** * Attempts a substitution of a matched pair of delimiting token types. * If successful, the substitution is performed on `$tokens` and `true` is * returned, otherwise `false` is returned and the array is untouched. * * If `this->substitutionPassCount()` is greater than 1, the first pass * will reject matches with the delimiting character inside the content * tokens. If the reader uses a single pass or a subsequent pass is performed * with multiple pass any contents will be accepted. * * @param MDState $state * @param int $pass pass number, starting with `1` * @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on * @param string $nodeClass class of the node to return if matched * @param MDTokenType $delimiter delimiting token * @param int $count how many times the token is repeated to form the delimiter * @param bool $plaintext whether to create `$nodeClass` with a verbatim * content string instead of parsed `MDNode`s * @return bool `true` if substitution was performed, `false` if not */ public function attemptPair(MDState $state, int $pass, array &$tokens, string $nodeClass, MDTokenType $delimiter, int $count=1, bool $plaintext=false): bool { // We do four passes. #1: doubles without inner tokens, #2: singles // without inner tokens, #3: doubles with paired inner tokens, // #4: singles with paired inner tokens if ($count == 1 && $pass != 2 && $pass != 4) return false; if ($count > 1 && $pass != 1 && $pass != 3) return false; $delimiters = array_fill(0, $count, $delimiter); $isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1; $match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters, function($content) use ($nodeClass, $isFirstOfMultiplePasses, $delimiter) { $firstType = $content[0] instanceof MDToken ? $content[0]->type : null; $lastType = $content[sizeof($content) - 1] instanceof MDToken ? $content[sizeof($content) - 1]->type : null; if ($firstType == MDTokenType::Whitespace) return false; if ($lastType == MDTokenType::Whitespace) return false; foreach ($content as $token) { // Don't allow nesting if (MDUtils::typename($token) == $nodeClass) return false; } if ($isFirstOfMultiplePasses) { $innerCount = 0; foreach ($content as $token) { if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++; } if (($innerCount % 2) != 0) return false; } return true; }); if ($match === null) return false; $state->checkExecutionTime(); if ($plaintext) { $content = implode('', array_map(fn($token) => $token instanceof MDToken ? $token->original : $token->toPlaintext($state), $match->contentTokens)); } else { $content = $state->tokensToNodes($match->contentTokens); } $ref = new ReflectionClass($nodeClass); $node = $ref->newInstanceArgs([ $content ]); array_splice($tokens, $match->startIndex, $match->totalLength, [$node]); return true; } private static $firstTime = null; } /** * Reader for emphasis syntax. Denoted with a single underscore on either side of * some text (preferred) or a single asterisk on either side. */ class MDEmphasisReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore); if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true; if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true; return false; } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if ($other instanceof MDStrongReader) { return 1; } return 0; } } /** * Reader for strong syntax. Denoted with two asterisks on either side of some * text (preferred) or two underscores on either side. Note that if * `MDUnderlineReader` is in use, it will replace the double-underscore syntax. */ class MDStrongReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk); if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true; if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true; return false; } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if ($other instanceof MDEmphasisReader) { return -1; } return 0; } } /** * Reader for strikethrough syntax. Consists of two tildes on either side of * some text (preferred) or single tildes on either side. Note that if * `MDSubscriptReader` is in use, it will replace the single-tilde syntax. * * The number of recognized tildes can be configured. */ class MDStrikethroughReader extends MDSimplePairInlineReader { public bool $singleTildeEnabled = true; public bool $doubleTildeEnabled = true; public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($this->singleTildeEnabled) { if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true; } if ($this->doubleTildeEnabled) { if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true; } return false; } } /** * Reader for underline syntax. Consists of two underscores on either side of * some text. If used with `MDStrongReader` which also looks for double * underscores, this reader will take priority. */ class MDUnderlineReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2); } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if ($other instanceof MDStrongReader) { return -1; } return 0; } } /** * Reader for highlight syntax. Consists of pairs of equal signs on either side * of some text. */ class MDHighlightReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2); } } /** * Reader for inline code syntax. Consists of one or two delimiting backticks * around text. The contents between the backticks will be rendered verbatim, * ignoring any inner markdown syntax. To include a backtick inside, escape it * with a backslash. */ class MDCodeSpanReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true; if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true; return false; } } /** * Reader for subscript syntax. Consists of single tildes on either side of * some text. If used with `MDStrikethroughReader`, this reader will take * precedence, and strikethrough can only be done with double tildes. */ class MDSubscriptReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde); } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if ($other instanceof MDStrikethroughReader) { return -1; } return 0; } } /** * Reader for superscript syntax. Consists of single caret characters on either * side of some text. */ class MDSuperscriptReader extends MDSimplePairInlineReader { public function readToken(MDState $state, string $line): ?MDToken { if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret); } } /** * Reads a hypertext link. Consists of link text between square brackets * followed immediately by a URL in parentheses. */ class MDLinkReader extends MDReader { public function readToken(MDState $state, string $line): ?MDToken { $simpleEmailRegex = "^<(" . MDUtils::baseEmailRegex . ")>"; $simpleURLRegex = "^<(" . MDUtils::baseURLRegex . ")>"; if ($groups = MDToken::tokenizeLabel($line)) { return new MDToken($groups[0], MDTokenType::Label, $groups[1]); } if ($groups = MDToken::tokenizeEmail($line)) { return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]); } if ($groups = MDToken::tokenizeURL($line)) { return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]); } if (mb_eregi($simpleEmailRegex, $line, $groups)) { return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]); } if (mb_eregi($simpleURLRegex, $line, $groups)) { return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]); } return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) { $text = $match->tokens[0]->content; $url = $match->tokens[sizeof($match->tokens) - 1]->content; $title = $match->tokens[sizeof($match->tokens) - 1]->extra; array_splice($tokens, $match->index, sizeof($match->tokens), [new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title)]); return true; } if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) { $text = $match->tokens[0]->content; $email = $match->tokens[sizeof($match->tokens) - 1]->content; $url = "mailto:{$email}"; $title = $match->tokens[sizeof($match->tokens) - 1]->extra; array_splice($tokens, $match->index, sizeof($match->tokens), [new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title)]); return true; } if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) { $token = $match->tokens[0]; $link = "mailto:{$token->content}"; $node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content)); array_splice($tokens, $match->index, 1, [$node]); return true; } if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) { $token = $match->tokens[0]; $link = $token->content; $node = new MDLinkNode($link, new MDTextNode($link)); array_splice($tokens, $match->index, 1, [$node]); return true; } return false; } } /** * Reader for referential URL definitions. Consists of link text between square * brackets followed immediately by a reference symbol also in square brackets. * The URL can be defined elsewhere on a line by itself with the symbol in square * brackets, colon, and the URL (and optional title in quotes). */ class MDReferencedLinkReader extends MDLinkReader { public function readBlock(MDState $state): ?MDBlockNode { $p = $state->p; $line = $state->lines[$p++]; if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) { $symbol = $groups[1]; $url = $groups[2]; $title = $groups[3]; } else { if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) { $symbol = $groups[1]; $url = $groups[2]; $title = null; } else { return null; } } $state->defineURL($symbol, $url, $title); $state->p = $p; return new MDBlockNode([]); // empty } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) { $text = $match->tokens[0]->content; $ref = $match->tokens[sizeof($match->tokens) - 1]->content; array_splice($tokens, $match->index, sizeof($match->tokens), [new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text))]); return true; } return false; } } /** * Reader for images. Consists of an exclamation, alt text in square brackets, * and image URL in parentheses. */ class MDImageReader extends MDLinkReader { public function readToken(MDState $state, string $line): ?MDToken { $s = parent::readToken($state, $line); if ($s) return $s; if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) { $alt = $match->tokens[1]->content; $url = $match->tokens[sizeof($match->tokens) - 1]->content; $title = $match->tokens[sizeof($match->tokens) - 1]->extra; $node = new MDImageNode($url, $alt); if ($title !== null) { $node->attributes['title'] = $title; } array_splice($tokens, $match->index, sizeof($match->tokens), [$node]); return true; } return false; } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') { return -1; } return 0; } } /** * Reader for images with referential URL definitions. Consists of an * exclamation, alt text in square brackets, and link symbol in square brackets. * URL is defined the same as for `MDReferencedLinkReader`. */ class MDReferencedImageReader extends MDReferencedLinkReader { public function readToken(MDState $state, string $line): ?MDToken { $s = parent::readToken($state, $line); if ($s) return $s; if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) { $alt = $match->tokens[1]->content; $ref = $match->tokens[sizeof($match->tokens) - 1]->content; array_splice($tokens, $match->index, sizeof($match->tokens), [new MDReferencedImageNode($ref, $alt)]); return true; } return false; } public function compareSubstituteOrdering(MDReader $other, int $pass): int { if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') { return -1; } return 0; } } /** * Converts line breaks within blocks into line breaks in the HTML. Not * included in any of the default reader sets since most flavors ignore * line breaks within blocks. */ class MDLineBreakReader extends MDReader { public function postProcess(MDState $state, array &$blocks) { MDNode::replaceNodes($state, $blocks, function(MDNode $original) { if (!($original instanceof MDTextNode)) return null; $lines = explode("\n", $original->text); if (sizeof($lines) == 1) return null; $nodes = []; foreach ($lines as $i => $line) { if ($i > 0) { array_push($nodes, new MDLineBreakNode()); } array_push($nodes, new MDTextNode($line)); } return new MDNode($nodes); }); } } /** * Reads a verbatim HTML tag, and if it passes validation by `MDState->$tagFilter`, * will be rendered in the final HTML document. Disallowed tags will be rendered * as plain text in the resulting document. */ class MDHTMLTagReader extends MDReader { public function readToken(MDState $state, string $line): ?MDToken { $tag = MDHTMLTag::fromLineStart($line, $state); if ($tag === null) return null; if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null; $state->root()->tagFilter->scrubTag($tag); return new MDToken($tag->original, MDTokenType::HTMLTag, $tag); } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) { $tag = $match->tokens[0]->tag; array_splice($tokens, $match->index, 1, [new MDHTMLTagNode($tag)]); return true; } return false; } } /** * Reads tag modifiers. Consists of curly braces with one or more CSS classes, * IDs, or custom attributes separated by spaces to apply to the preceding * node. Validation is performed on modifiers and only acceptable values are * applied. */ class MDModifierReader extends MDReader { public function readToken(MDState $state, string $line): ?MDToken { $modifier = MDTagModifier::fromStart($line); if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier); return null; } public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { // Modifiers are applied elsewhere, and if they're not it's fine if they're // rendered as the original syntax. return false; } } // -- Nodes ----------------------------------------------------------------- /** * Base class for nodes in the assembled document tree. */ class MDNode { /** * Array of CSS classes to add to the node when rendered as HTML. * @var string[] */ public array $cssClasses = []; public ?string $cssId = null; /** * Mapping of CSS attributes to values. * @var string[] */ public array $cssStyles = []; /** * Mapping of arbitrary attributes and values to add to this node's top-level * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use * `$cssClasses`, `$cssId`, and `$cssStyles` instead. * @var array */ public array $attributes = []; /** * All child nodes in this node. * @var MDNode[] */ public array $children = []; /** * @param MDNode|MDNode[] $children */ public function __construct(MDNode|array $children=[]) { if (is_array($children)) { foreach ($children as $elem) { if (!($elem instanceof MDNode)) { $thisClassName = MDUtils::typename($this); $elemClassName = MDUtils::typename($elem); throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got array with {$elemClassName} element"); } } $this->children = $children; } elseif ($children instanceof MDNode) { $this->children = [ $children ]; } else { $thisClassName = MDUtils::typename($this); $elemClassName = MDUtils::typename($children); throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got {$elemClassName}"); } } public function __toString(): string { $s = "<" . get_class($this); foreach ($this->children as $child) { $s .= " {$child}"; } $s .= ">"; return $s; } /** * Adds a CSS class. If already present it will not be duplicated. */ public function addClass(string $cssClass): bool { if (array_search($cssClass, $this->cssClasses) !== false) return false; array_push($this->cssClasses, $cssClass); return true; } /** * Removes a CSS class. * * @param string $cssClass * @return bool whether the class was present and removed */ public function removeClass(string $cssClass): bool { $beforeLength = sizeof($this->cssClasses); $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]); return sizeof($this->cssClasses) != $beforeLength; } /** * Renders this node and any children as an HTML string. If the node has no * content an empty string should be returned. */ public function toHTML(MDState $state): string { return MDNode::arrayToHTML($this->children, $state); } /** * Renders this node and any children as a plain text string. The conversion * should only render ordinary text, not attempt markdown-like formatting * (e.g. list items should not be prefixed with asterisks, only have their * content text returned). If the node has no renderable content an empty * string should be returned. */ public function toPlaintext(MDState $state): string { return MDNode::arrayToPlaintext($this->children, $state); } /** * Protected helper method that renders an HTML fragment of the attributes * to apply to the root HTML tag representation of this node. * * Example result with a couple `$cssClasses`, a `$cssId`, and a custom * `$attributes` key-value pair: * * ``` * class="foo bar" id="baz" lang="en" * ``` * * The value includes a leading space if it's non-empty so that it can be * concatenated directly after the tag name and before the closing `>`. */ protected function htmlAttributes(): string { $html = ''; if (sizeof($this->cssClasses) > 0) { $classlist = htmlentities(implode(' ', $this->cssClasses)); $html .= " class=\"{$classlist}\""; } if ($this->cssId !== null && mb_strlen($this->cssId) > 0) { $html .= " id=\"" . htmlentities($this->cssId) . "\""; } $styles = []; foreach ($this->cssStyles as $key => $value) { array_push($styles, "{$key}: {$value};"); } if (sizeof($styles) > 0) { $escaped = htmlentities(implode(' ', $styles)); $html .= " style=\"{$escaped}\""; } foreach ($this->attributes as $key => $value) { if ($key === 'class' || $key === 'id' || $key === 'style') continue; $cleanKey = MDUtils::scrubAttributeName($key); if (mb_strlen($cleanKey) == 0) continue; $cleanValue = htmlentities($value); $html .= " {$cleanKey}=\"{$cleanValue}\""; } return $html; } /** * Protected helper that renders and concatenates the HTML of all children * of this node. Mostly for use by subclasses in their `toHTML` * implementations. */ protected function childHTML(MDState $state): string { return MDNode::arrayToHTML($this->children, $state); } /** * Protected helper that renders and concatenates the plaintext of all * children of this node. */ protected function childPlaintext(MDState $state): string { return MDNode::arrayToPlaintext($this->children, $state); } /** * Protected helper for rendering nodes represented by simple paired HTML * tags. Custom CSS classes and attributes will be included in the result, * and child content will be rendered between the tags. */ protected function simplePairedTagHTML(MDState $state, string $tagName): string { $openTagSuffix = $this->children[0] instanceof MDBlockNode ? "\n" : ""; $closeTagPrefix = $this->children[sizeof($this->children) - 1] instanceof MDBlockNode ? "\n" : ''; $closeTagSuffix = $this instanceof MDBlockNode ? "\n" : ''; $attr = $this->htmlAttributes(); $childHTML = $this->childHTML($state); return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}{$closeTagSuffix}"; } /** * Calls the given callback function with every child node, recursively. * Nodes are visited depth-first. */ public function visitChildren(callable $fn) { foreach ($this->children as $child) { $fn($child); $child->visitChildren($fn); } } /** * Helper for rendering and concatenating HTML from an array of `MDNode`s. * * @param MDNode[] $nodes * @param MDState $state * @return string HTML string */ public static function arrayToHTML(array $nodes, MDState $state): string { return implode('', array_map(function($node) use ($state) { return $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : ''); }, $nodes)); } /** * Helper for rendering and concatenating plaintext from an array of `MDNode`s. * * @param MDNode[] $nodes * @param MDState $state * @return string plaintext */ public static function arrayToPlaintext(array $nodes, MDState $state): string { return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes)); } /** * Recursively searches and replaces nodes in a tree. The given `$replacer` * is passed every node in the tree. If `$replacer` returns a new `MDNode` * the original will be replaced with it. If the function returns `null` no * change will be made to that node. Traversal is depth-first. * * @param MDState $state * @param MDNode[] $nodes * @param callable $replacer takes a node as an argument, returns either * a new node or `null` to leave it unchanged */ public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) { for ($i = 0; $i < sizeof($nodes); $i++) { $originalNode = $nodes[$i]; $replacement = $replacer($originalNode); if ($replacement instanceof MDNode) { array_splice($nodes, $i, 1, [$replacement]); } else { self::replaceNodes($state, $originalNode->children, $replacer); } } } } /** * Marker subclass that indicates a node represents block syntax. */ class MDBlockNode extends MDNode {} /** * Paragraph block. */ class MDParagraphNode extends MDBlockNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'p'); } } /** * A heading block with a level from 1 to 6. */ class MDHeadingNode extends MDBlockNode { public int $level; /** * @param int $level * @param MDNode|MDNode[] $children */ public function __construct(int $level, MDNode|array $children) { parent::__construct($children); if (!is_int($level) || ($level < 1 || $level > 6)) { $thisClassName = MDUtils::typename($this); throw new Error("{$thisClassName} requires heading level 1 to 6"); } $this->level = $level; } public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, "h{$this->level}"); } } /** * A sub-text block with smaller, less prominent text. */ class MDSubtextNode extends MDBlockNode { public function toHTML(MDState $state): string { $this->addClass('subtext'); return $this->simplePairedTagHTML($state, 'div'); } } /** * Node for a horizontal dividing line. */ class MDHorizontalRuleNode extends MDBlockNode { public function toHTML(MDState $state): string { return "htmlAttributes() . ">"; } } /** * A block quote, usually rendered indented from other text. */ class MDBlockquoteNode extends MDBlockNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'blockquote'); } } /** * A bulleted list. Contains `MDListItemNode` children. */ class MDUnorderedListNode extends MDBlockNode { /** @var MDListItemNode[] $children */ public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'ul'); } } /** * A numbered list. Contains `MDListItemNode` children. */ class MDOrderedListNode extends MDBlockNode { /** @var MDListItemNode[] $children */ public ?int $startOrdinal; /** * @param MDListItemNode[] $children * @param ?int $startOrdinal */ public function __construct(array $children, ?int $startOrdinal=null) { parent::__construct($children); $this->startOrdinal = $startOrdinal; } public function toHTML(MDState $state): string { if ($this->startOrdinal !== null && $this->startOrdinal != 1) { $this->attributes['start'] = strval($this->startOrdinal); } return $this->simplePairedTagHTML($state, 'ol'); } } /** * An item in a bulleted or numbered list. */ class MDListItemNode extends MDBlockNode { public ?int $ordinal; /** * @param MDNode|MDNode[] $children * @param ?int $ordinal */ public function __construct(MDNode|array $children, ?int $ordinal=null) { parent::__construct($children); $this->ordinal = $ordinal; } public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'li'); } } /** * A block of preformatted computer code. Inner markdown is ignored. */ class MDCodeBlockNode extends MDBlockNode { public string $text; /** * The programming language of the content. */ public ?string $language; public function __construct(string $text, ?string $language=null) { parent::__construct([]); $this->text = $text; $this->language = $language; } public function toHTML(MDState $state): string { $languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : ''; return "htmlAttributes() . ">" . htmlentities($this->text) . "\n"; } } /** * A table node with a single header row and any number of body rows. */ class MDTableNode extends MDBlockNode { /** @var MDTableRowNode[] $children */ public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; } public function bodyRows(): array { return array_slice($this->children, 1); } /** * How to align each column. Columns beyond the length of the array or with * corresponding `null` elements will have no alignment set. Values should * be valid CSS `text-align` values. * * @var string[] */ public array $columnAlignments = []; /** * @param MDTableRowNode $headerRow * @param MDTableRowNode[] $bodyRows */ public function __construct(MDTableRowNode $headerRow, array $bodyRows) { parent::__construct(array_merge([ $headerRow ], $bodyRows)); } public function applyAlignments() { foreach ($this->children as $child) { $this->applyAlignmentsToRow($child); } } private function applyAlignmentsToRow(MDTableRowNode $row) { foreach ($row->children as $columnIndex => $cell) { $alignment = $this->columnAlignments[$columnIndex] ?? null; $this->applyAlignmentToCell($cell, $alignment); } } public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) { if ($alignment) { $cell->cssStyles['text-align'] = $alignment; } else { unset($cell->cssStyles['text-align']); } } public function toHTML(MDState $state): string { $this->applyAlignments(); $html = ''; $html .= "htmlAttributes() . ">\n"; $html .= "\n"; $html .= $this->headerRow()->toHTML($state) . "\n"; $html .= "\n"; $html .= "\n"; $html .= MDNode::arrayToHTML($this->bodyRows(), $state) . "\n"; $html .= "\n"; $html .= "\n"; return $html; } } /** * Node for one row (header or body) in a table. */ class MDTableRowNode extends MDBlockNode { /** @var MDTableCellNode[] $children */ public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'tr'); } } /** * Node for one cell in a table row. */ class MDTableCellNode extends MDBlockNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'td'); } } /** * Node for a header cell in a header table row. */ class MDTableHeaderCellNode extends MDTableCellNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'th'); } } /** * Definition list with `MDDefinitionListTermNode` and * `MDDefinitionListDefinitionNode` children. */ class MDDefinitionListNode extends MDBlockNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'dl'); } } /** * A word or term in a definition list. */ class MDDefinitionListTermNode extends MDBlockNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'dt'); } } /** * The definition of a word or term in a definition list. Should follow a * definition term, or another definition to serve as an alternate. */ class MDDefinitionListDefinitionNode extends MDBlockNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'dd'); } } /** * Block at the bottom of a document listing all the footnotes with their * content. */ class MDFootnoteListNode extends MDBlockNode { private function footnoteId(MDState $state, string $symbol): ?int { $lookup = $state->root()->userInfo['footnoteIds']; if (!$lookup) return null; return $lookup[$symbol] ?? null; } public function toHTML(MDState $state): string { $footnotes = $state->root()->userInfo['footnotes']; $symbolOrder = array_keys($footnotes); if (sizeof($footnotes) == 0) return ''; $footnoteUniques = $state->root()->userInfo['footnoteInstances']; $html = ''; $html .= '
'; $html .= '
    '; foreach ($symbolOrder as $symbolRaw) { $symbol = "{$symbolRaw}"; $content = $footnotes[$symbol]; if (!$content) continue; $footnoteId = $this->footnoteId($state, $symbol); $contentHTML = MDNode::arrayToHTML($content, $state); $html .= "
  1. root()->elementIdPrefix}footnote_{$footnoteId}\">{$contentHTML}"; $uniques = $footnoteUniques[$symbol] ?? null; if ($uniques) { foreach ($uniques as $unique) { $html .= " root()->elementIdPrefix}footnoteref_{$unique}\" class=\"footnote-backref\">↩︎"; } } $html .= "
  2. \n"; } $html .= '
'; $html .= '
'; return $html; } public function toPlaintext(MDState $state): string { $footnotes = $state->userInfo['footnotes']; $symbolOrder = array_keys($footnotes); if (sizeof($footnotes) == 0) return ''; $text = ''; foreach ($symbolOrder as $symbolRaw) { $symbol = "{$symbolRaw}"; $content = $footnotes[$symbol]; if (!$content) continue; $text .= "{$symbol}. " . $this->childPlaintext(state) . "\n"; } return trim($text); } } /** * Marker subclass that indicates a node represents inline syntax. */ class MDInlineNode extends MDNode {} /** * Contains plain text. Special HTML characters are escaped when rendered. */ class MDTextNode extends MDInlineNode { public string $text; public function __construct(string $text) { parent::__construct([]); $this->text = $text; } public function toHTML(MDState $state): string { return htmlentities($this->text); } public function toPlaintext(MDState $state): string { return $this->text; } } /** * Contains plain text which is rendered with HTML entities when rendered to * be marginally more difficult for web scapers to decipher. Used for * semi-sensitive info like email addresses. */ class MDObfuscatedTextNode extends MDTextNode { public function toHTML(MDState $state): string { return MDUtils::escapeObfuscated($this->text); } } /** * Emphasized (italicized) content. */ class MDEmphasisNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'em'); } } /** * Strong (bold) content. */ class MDStrongNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'strong'); } } /** * Content rendered with a line through it. */ class MDStrikethroughNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 's'); } } /** * Underlined content. */ class MDUnderlineNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'u'); } } /** * Highlighted content. Usually rendered with a bright colored background. */ class MDHighlightNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'mark'); } } /** * Superscripted content. */ class MDSuperscriptNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'sup'); } } /** * Subscripted content. */ class MDSubscriptNode extends MDInlineNode { public function toHTML(MDState $state): string { return $this->simplePairedTagHTML($state, 'sub'); } } /** * Inline plaintext indicating computer code. */ class MDCodeNode extends MDInlineNode { public string $text; public function __construct(string $text) { parent::__construct([]); $this->text = $text; } public function toHTML(MDState $state): string { return "htmlAttributes() . ">" . htmlentities($this->text) . ""; } } /** * A footnote symbol in a document. Denoted as a superscripted number that can * be clicked to go to its content at the bottom of the document. */ class MDFootnoteNode extends MDInlineNode { /** * Symbol the author used to match up the footnote to its content definition. */ public string $symbol; /** * The superscript symbol rendered in HTML. May be the same or different * than `$symbol`. */ public ?string $displaySymbol = null; /** * Unique ID for the footnote definition. */ public ?int $footnoteId = null; /** * Unique number for backlinking to a footnote occurrence. Populated by * `MDFootnoteReader->postProcess()`. */ public ?int $occurrenceId = null; public function __construct(string $symbol, ?string $title=null) { parent::__construct([]); $this->symbol = $symbol; if ($title) $this->attributes['title'] = $title; } public function toHTML(MDState $state): string { if ($this->footnoteId !== null) { return "root()->elementIdPrefix}footnoteref_{$this->occurrenceId}\"" . $this->htmlAttributes() . ">" . "root()->elementIdPrefix}footnote_{$this->footnoteId}\">" . htmlentities($this->displaySymbol ?? $this->symbol) . ""; } return ""; } } /** * A clickable hypertext link. */ class MDLinkNode extends MDInlineNode { public string $href; /** * @param string $href * @param MDNode|MDNode[] $children * @param ?string $title */ public function __construct(string $href, MDNode|array $children, ?string $title=null) { parent::__construct($children); $this->href = $href; if ($title !== null) $this->attributes['title'] = $title; } public function toHTML(MDState $state): string { if (str_starts_with($this->href, 'mailto:')) { $escapedLink = MDUtils::escapeObfuscated($this->href); } else { $escapedLink = htmlentities($this->href); } return "htmlAttributes() . ">" . $this->childHTML($state) . ""; } } /** * A clickable hypertext link where the URL is defined elsewhere by reference. */ class MDReferencedLinkNode extends MDLinkNode { public string $reference; /** * @param string $reference * @param MDNode|MDNode[] $children */ public function __construct(string $reference, MDNode|array $children) { parent::__construct('', $children); $this->reference = $reference; } public function toHTML(MDState $state): string { if ($this->href === '') { $url = $state->urlForReference($this->reference); if ($url) $this->href = $url; $title = $state->urlTitleForReference($this->reference); if ($title) $this->attributes['title'] = $title; } return parent::toHTML($state); } } /** * An inline image. */ class MDImageNode extends MDInlineNode { public string $src; public ?string $alt; public function __construct(string $src, ?string $alt) { parent::__construct([]); $this->src = $src; $this->alt = $alt; } public function toHTML(MDState $state): string { $html = "src) . "\""; if ($this->alt) $html .= " alt=\"" . htmlentities($this->alt) . "\""; $html .= $this->htmlAttributes() . ">"; return $html; } } /** * An inline image where the URL is defined elsewhere by reference. */ class MDReferencedImageNode extends MDImageNode { public string $reference; public function __construct(string $reference, ?string $alt=null) { parent::__construct('', $alt, []); $this->reference = $reference; } public function toHTML(MDState $state): string { if ($this->src === '') { $url = $state->urlForReference($this->reference); if ($url !== null) $this->src = $url; $title = $state->urlTitleForReference($this->reference); if ($title !== null) $this->attributes['title'] = $title; } return parent::toHTML($state); } } /** * An abbreviation that can be hovered over to see its full expansion. */ class MDAbbreviationNode extends MDInlineNode { public string $abbreviation; /** * @param string $abbreviation * @param string $definition */ public function __construct(string $abbreviation, string $definition) { parent::__construct([]); $this->abbreviation = $abbreviation; $this->attributes['title'] = $definition; } public function toHTML(MDState $state): string { return "htmlAttributes() . ">" . htmlentities($this->abbreviation) . ""; } } /** * A line break that is preserved when rendered to HTML. */ class MDLineBreakNode extends MDInlineNode { public function toHTML(MDState $state): string { return '
'; } public function toPlaintext(MDState $state): string { return "\n"; } } /** * A verbatim HTML tag. May be altered to strip out disallowed attributes or * CSS values. */ class MDHTMLTagNode extends MDInlineNode { public MDHTMLTag $tag; public function __construct(MDHTMLTag $tag) { parent::__construct([]); $this->tag = $tag; } public function toHTML(MDState $state): string { return "{$this->tag}"; } } // -- Main class ------------------------------------------------------------ /** * Markdown parser. */ class Markdown { /** * Set of standard readers to handle common syntax. * @return MDReader[] */ public static function standardReaders(): array { if (self::$sharedStandardReaders === null) { self::$sharedStandardReaders = [ new MDUnderlinedHeadingReader(), new MDHashHeadingReader(), new MDBlockQuoteReader(), new MDHorizontalRuleReader(), new MDUnorderedListReader(), new MDOrderedListReader(), new MDFencedCodeBlockReader(), new MDIndentedCodeBlockReader(), new MDParagraphReader(), new MDStrongReader(), new MDEmphasisReader(), new MDCodeSpanReader(), new MDImageReader(), new MDLinkReader(), new MDHTMLTagReader(), ]; } return self::$sharedStandardReaders; } private static ?array $sharedStandardReaders = null; /** * All supported readers except `MDLineBreakReader`. * @return MDReader[] */ public static function allReaders(): array { if (self::$sharedAllReaders === null) { $sharedAllReaders = array_merge(self::standardReaders(), [ new MDSubtextReader(), new MDTableReader(), new MDDefinitionListReader(), new MDFootnoteReader(), new MDAbbreviationReader(), new MDUnderlineReader(), new MDSubscriptReader(), new MDStrikethroughReader(), new MDHighlightReader(), new MDSuperscriptReader(), new MDReferencedImageReader(), new MDReferencedLinkReader(), new MDModifierReader(), ]); } return $sharedAllReaders; } private static ?array $sharedAllReaders = null; /** * Shared instance of a parser with standard syntax. */ public static function standardParser(): Markdown { if (self::$sharedStandardMarkdown === null) { self::$sharedStandardMarkdown = new Markdown(self::standardReaders()); } return self::$sharedStandardMarkdown; } private static ?Markdown $sharedStandardMarkdown = null; /** * Shared instance of a parser with all supported syntax. */ public static function completeParser(): Markdown { if (self::$sharedCompleteParser === null) { self::$sharedCompleteParser = new Markdown(self::allReaders()); } return self::$sharedCompleteParser; } public static ?Markdown $sharedCompleteParser = null; /** * Filter for what non-markdown HTML is permitted. HTML generated as a * result of markdown is unaffected. */ public MDHTMLFilter $tagFilter; /** @var MDReader[] */ private array $readers; /** @var MDReader[] */ private array $readersByBlockPriority; /** @var MDReader[] */ private array $readersByTokenPriority; private array $readersBySubstitutePriority; /** * Creates a Markdown parser with the given syntax readers. * * @param MDReader[] $readers */ public function __construct(?array $readers=null) { $this->readers = $readers ?? self::allReaders(); $this->readersByBlockPriority = MDReader::sortReaderForBlocks($this->readers); $this->readersByTokenPriority = MDReader::sortReadersForTokenizing($this->readers); $this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($this->readers); $this->tagFilter = new MDHTMLFilter(); } /** * Converts a markdown string to an HTML string. * * @param string $markdown * @param string $elementIdPrefix Optional prefix for generated element * `id`s and links to them. For differentiating multiple markdown docs in * the same HTML page. * @return string HTML */ public function toHTML(string $markdown, string $elementIdPrefix='') { $lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown); try { return $this->parse($lines, $elementIdPrefix); } catch (Error $e) { $this->investigateException($lines, $elementIdPrefix); throw $e; } } /** * @param string[] $lines * @param string $elementIdPrefix */ private function parse(array $lines, string $elementIdPrefix) { $state = new MDState($lines); $state->readersByBlockPriority = $this->readersByBlockPriority; $state->readersByTokenPriority = $this->readersByTokenPriority; $state->readersBySubstitutePriority = $this->readersBySubstitutePriority; $state->tagFilter = $this->tagFilter; $state->elementIdPrefix = $elementIdPrefix; foreach ($this->readers as $reader) { $reader->preProcess($state); } $nodes = $state->readBlocks(); foreach ($this->readers as $reader) { $reader->postProcess($state, $nodes); } return MDNode::arrayToHTML($nodes, $state); } /** * Keeps removing first and last lines of markdown to locate the source of * an exception and prints the minimal snippet. * * @param string[] $lines * @param string $elementIdPrefix */ private function investigateException(array $lines, string $elementIdPrefix) { print("Investigating error...\n"); $startIndex = 0; $endIndex = sizeof($lines); // Keep stripping away first line until an exception stops being thrown for ($i = 0; $i < sizeof($lines); $i++) { try { $this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix); break; } catch (Error $e0) { $startIndex = $i; } } // Keep stripping away last line until an exception stops being thrown for ($i = sizeof($lines); $i > $startIndex; $i--) { try { $this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix); break; } catch (Error $e0) { $endIndex = $i; } } $problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex)); print("This portion of markdown caused an unexpected exception:\n{$problematicMarkdown}\n"); } } ?>