| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611 |
- <?php
- declare(strict_types=1);
-
- /**
- * Static utilities.
- */
- class MDUtils {
- // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
- public static $baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
- // Modified from https://emailregex.com/ to remove capture groups.
- public static $baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
-
- /**
- * Encodes characters as HTML numeric entities to make it marginally more
- * difficult for web scrapers to grab sensitive info. If `text` starts with
- * `mailto:` only the email address following it will be obfuscated.
- */
- public static function escapeObfuscated(string $text): string {
- if (str_starts_with($text, 'mailto:')) {
- return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
- }
- $html = '';
- $l = mb_strlen($text);
- for ($p = 0; $p < $l; $p++) {
- $cp = mb_ord(mb_substr($text, $p, 1));
- $html .= "&#{{$cp}}";
- }
- return $html;
- }
-
- /**
- * Removes illegal characters from an HTML attribute name.
- */
- public static function scrubAttributeName(string $name): string {
- return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
- }
-
- /**
- * Strips one or more leading indents from a line or lines of markdown. An
- * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
- * spaces) are treated like one indent level.
- *
- * @param string|string[] $line
- * @param int $levels
- * @return string|string[]
- */
- public static function stripIndent(string|array &$line, int $levels=1): string|array {
- $regex = "^(?: {1,4}|\\t){{$levels}}";
- return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
- }
-
- /**
- * Counts the number of indent levels in a line of text. Partial indents
- * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly`
- * is `true`.
- */
- public static function countIndents(string &$line, bool $fullIndentsOnly=false): int {
- // normalize indents to tabs
- $t = mb_ereg_replace($fullIndentsOnly ? "(?: {4}|\\t)" : "(?: {1,4}|\\t)", "\t", $line);
- // remove content after indent
- $t = mb_ereg_replace("^(\\t*)(.*?)$", "\\1", $t);
- // count tabs
- return mb_strlen($t);
- }
-
- /**
- * Returns a copy of an array without any whitespace-only lines at the end.
- *
- * @param string[] $lines
- * @return string[]
- */
- public static function withoutTrailingBlankLines(array &$lines): array {
- $stripped = $lines;
- while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
- array_pop($stripped);
- }
- return $stripped;
- }
-
- /**
- * Tests if an array of lines contains at least one blank. A blank line
- * can contain whitespace.
- *
- * @param string[] $lines
- */
- public static function containsBlankLine(array &$lines): bool {
- foreach ($lines as $line) {
- if (mb_strlen(trim($line)) == 0) return true;
- }
- return false;
- }
-
- public static function equalAssocArrays(array &$a, array &$b) {
- return empty(array_diff_assoc($a, $b));
- }
- }
-
- /**
- * Token type enum for `MDToken`.
- */
- enum MDTokenType {
- case Text;
- /**
- * Only used for the leading and trailing whitespace around a run of text,
- * not every single whitespace character.
- */
- case Whitespace;
-
- case Underscore;
- case Asterisk;
- case Slash;
- case Tilde;
- case Bang;
- case Backtick;
- case Equal;
- case Caret;
-
- case Label; // content=label
- case URL; // content=URL, extra=title
- case Email; // content=email address, extra=title
- case SimpleLink; // content=URL
- case SimpleEmail; // content=email address
- case Footnote; // content=symbol
- case Modifier; // modifier=MDTagModifier
-
- case HTMLTag; // tag=MDHTMLTag
-
- /** Wildcard for `MDToken::findFirstTokens` */
- case META_AnyNonWhitespace;
- /** Wildcard for `MDToken::findFirstTokens` */
- case META_OptionalWhitespace;
- }
-
- /**
- * Search results from `MDToken.findFirstTokens`.
- */
- class MDTokenMatch {
- /** @var MDToken{} */
- public array $tokens;
- public int $index;
-
- /**
- * @param MDToken[] $tokens
- * @param int $index
- */
- public function __construct(array $tokens, int $index) {
- $this->tokens = $tokens;
- $this->index = $index;
- }
- }
-
- /**
- * Search results from `MDToken.findPairedTokens`.
- */
- class MDPairedTokenMatch {
- /** @var MDToken[] */
- public array $startTokens;
- /** @var MDToken[] */
- public array $contentTokens;
- /** @var MDToken[] */
- public array $endTokens;
- public int $startIndex;
- public int $contentIndex;
- public int $endIndex;
- public int $totalLength;
-
- public function __construct(array $startTokens, array $contentTokens,
- array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
- int $totalLength) {
- $this->startTokens = $startTokens;
- $this->contentTokens = $contentTokens;
- $this->endTokens = $endTokens;
- $this->startIndex = $startIndex;
- $this->contentIndex = $contentIndex;
- $this->endIndex = $endIndex;
- $this->totalLength = $totalLength;
- }
- }
-
- /**
- * One lexical unit in inline markdown syntax parsing.
- */
- class MDToken {
- /**
- * The original verbatim token string. Required as a plaintext fallback if
- * the token remains unresolved.
- */
- public string $original;
- public MDTokenType $type;
- public ?string $content = null;
- public ?string $extra = null;
- public ?MDHTMLTag $tag = null;
- public ?MDTagModifier $modifier = null;
-
- /**
- * Creates a token.
- *
- * @param string $original verbatim token string
- * @param MDTokenType $type token type
- * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
- * the token
- * @param string|null $extra additional content
- */
- public function __construct(string $original, MDTokenType $type,
- string|MDTagModifier|MDHTMLTag|null $content=null,
- ?string $extra=null) {
- $this->original = $original;
- $this->type = $type;
- if ($content instanceof MDTagModifier) {
- $this->modifier = $content;
- } elseif ($content instanceof MDHTMLTag) {
- $this->tag = $content;
- } else {
- $this->content = $content;
- }
- $this->extra = $extra;
- }
-
- public function __toString(): string {
- $classname = get_class($this);
- return "({$classname} type={$this->type} content={$this->content})";
- }
-
- /**
- * Attempts to parse a label token from the beginning of `line`. A label is
- * of the form `[content]`. If found, returns an array:
- * - `0`: the entire label including brackets
- * - `1`: the content of the label
- *
- * @param string $line
- * @return ?string[] match groups or null if not found
- */
- public static function tokenizeLabel(string $line): ?array {
- if (!str_starts_with($line, '[')) return null;
- $parenCount = 0;
- $bracketCount = 0;
- $l = mb_strlen($line);
- for ($p = 1; $p < $l; $p++) {
- $ch = mb_substr($line, $p, 1);
- if ($ch == '\\') {
- $p++;
- } elseif ($ch == '(') {
- $parenCount++;
- } elseif ($ch == ')') {
- $parenCount--;
- if ($parenCount < 0) return null;
- } elseif ($ch == '[') {
- $bracketCount++;
- } elseif ($ch == ']') {
- if ($bracketCount > 0) {
- $bracketCount--;
- } else {
- return [ mb_substr($line, 0, $p + 1), mb_substr($line, 1, $p - 1) ];
- }
- }
- }
- return null;
- }
-
- private static $urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
- private static $urlRegex = '^\\((\\S+?)\\)'; // 1=URL
-
- /**
- * Attempts to parse a URL token from the beginning of `line`. A URL token
- * is of the form `(url)` or `(url "title")`. If found, returns an array:
- * - `0`: the entire URL token including parentheses
- * - `1`: the URL
- * - `2`: the optional title, or `null`
- *
- * @param string $line
- * @return ?array token tuple
- */
- public static function tokenizeURL(string $line): ?array {
- $groups = [];
- if (mb_eregi(self::$urlWithTitleRegex, $line, $groups)) {
- if (self::tokenizeEmail($line)) return null; // make sure it's not better described as an email address
- return $groups;
- }
- if (mb_eregi(self::$urlRegex, $line, $groups)) {
- if (self::tokenizeEmail($line)) return null;
- return [ $groups[0], $groups[1], null ];
- }
- return null;
- }
-
- /**
- * Attempts to parse an email address from the beginning of `line`. An
- * email address is of the form `(user@example.com)` or
- * `(user@example.com "link title")`. If found, returns an array:
- * - `0`: the entire token including parentheses
- * - `1`: the email address
- * - `2`: the optional link title, or `null`
- *
- * @param string $line
- * @return ?string[] token tuple
- */
- public static function tokenizeEmail(string $line): ?array {
- $groups;
- if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
- $line, $groups)) {
- return $groups;
- }
- if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s*\\)", $line, $groups)) {
- return [ $groups[0], $groups[1], null ];
- }
- return null;
- }
-
- /**
- * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
- * If found, returns a `MDTokenMatch`, otherwise `null`.
- *
- * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
- * are special supported token types. Note that `META_OptionalWhitespace`
- * may give a result with a variable number of tokens.
- *
- * @param (MDToken|MDNode)[] $tokensToSearch - mixed array of `MDToken` and
- * `MDNode` elements
- * @param MDTokenType[] $pattern - contiguous run of token types to find
- * @param int $startIndex - token index to begin searching (defaults to 0)
- * @return ?MDTokenMatch match object, or `null` if not found
- */
- public static function findFirstTokens(array $tokensToSearch, array $pattern, int $startIndex=0): ?MDTokenMatch {
- $matched = [];
- for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
- $matchedAll = true;
- $matched = [];
- $patternOffset = 0;
- for ($p = 0; $p < mb_strlen($pattern); $p++) {
- $t0 = $t + $p + $patternOffset;
- if ($t0 >= sizeof($tokensToSearch)) return null;
- $token = $tokensToSearch[$t0];
- $elem = $pattern[$p];
- if ($elem == MDTokenType::META_OptionalWhitespace) {
- if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
- array_push($matched, $token);
- } else {
- $patternOffset--;
- }
- } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
- if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
- $matchedAll = false;
- break;
- }
- array_push($matched, $token);
- } else {
- if (!($token instanceof MDToken) || $token->type != $elem) {
- $matchedAll = false;
- break;
- }
- array_push($matched, $token);
- }
- }
- if ($matchedAll) {
- return new MDTokenMatch($matched, $t);
- }
- }
- return null;
- }
-
- /**
- * Searches an array of MDToken for a given starting pattern and ending
- * pattern and returns match info about both and the tokens in between.
- *
- * If `contentValidator` is specified, it will be called with the content
- * tokens of a potential match. If the validator returns `true`, the result
- * will be accepted and returned by this method. If the validator returns
- * `false`, this method will keep looking for another matching pair. If no
- * validator is given the first match will be returned regardless of content.
- *
- * If a match is found, a `MDPairedTokenMatch` is returned with details
- * of the opening tokens, closing tokens, and content tokens between. Otherwise
- * `null` is returned.
- *
- * @param MDToken[] $tokensToSearch - array of `MDToken` to search in
- * @param MDTokenType[] $startPattern - array of `MDTokenType` to find first
- * @param MDTokenType[] $endPattern - array of `MDTokenType` to find positioned after `startPattern`
- * @param ?callable $contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching
- * @param number $startIndex - token index where searching should begin
- * @return ?MDPairedTokenMatch match, or `null`
- */
- public static function findPairedTokens(array $tokensToSearch,
- array $startPattern, array $endPattern, ?callable $contentValidator=null,
- int $startIndex=0): ?MDPairedTokenMatch {
- for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
- $startMatch = findFirstTokens($tokensToSearch, $startPattern, $s);
- if ($startMatch === null) return null;
- $endStart = $startMatch->index + sizeof($startMatch->tokens);
- while ($endStart < sizeof($tokensToSearch)) {
- $endMatch = findFirstTokens($tokensToSearch, $endPattern, $endStart);
- if ($endMatch === null) break;
- $contentStart = $startMatch->index + sizeof($startMatch->tokens);
- $contentLength = $endMatch->index - $contentStart;
- $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
- if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
- return new MDPairedTokenMatch($startMatch->tokens,
- $contents,
- $endMatch->tokens,
- $startMatch->index,
- $startMatch->index + sizeof($startMatch->tokens),
- $endMatch->index,
- $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
- } else {
- // Contents rejected. Try next end match.
- $endStart = $endMatch->index + 1;
- }
- }
- // No end matches. Increment start match.
- $s = $startMatch->index;
- }
- return null;
- }
-
- public function equals($other) {
- if (!($other instanceof MDToken)) return false;
- if ($other->original !== $this->original) return false;
- if ($other->type != $this->type) return false;
- if ($other->content !== $this->content) return false;
- if ($other->extra !== $this->extra) return false;
- if ($other->tag !== $this->tag) return false;
- if ($other->modifier != $this->modifier) return false;
- return true;
- }
- }
-
- /**
- * Parsing and rendering state. Passed around throughout the parsing process.
- *
- * States are hierarchical. A sub-state can be created by calling `.copy()` with
- * a new array of lines. The sub-state points back to its parent state. This
- * is done to parse inner content of a syntax as its own standalone document.
- *
- * If a custom `MDReader` implementation wants to store data in this object,
- * always do so on `state.root` to ensure it's stored on the original state,
- * not a child state. Otherwise data may be lost when the sub-state is discarded.
- */
- class MDState {
- /**
- * Ascends the parent chain to the root `MDState` instance. This should be
- * used when referencing most stored fields except `lines` and `p`.
- */
- public function root(): MDState {
- return $this->parent ? $this->parent->root() : $this;
- }
-
- /**
- * Lines of the markdown document. The current line index is pointed to by `p`.
- *
- * @var string[]
- */
- public array $lines;
-
- /**
- * The current line in `lines`.
- */
- public function currentLine(): ?string {
- return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
- }
-
- /**
- * Current line pointer into array `lines`.
- */
- public int $p = 0;
-
- private ?MDState $parent = null;
-
- /**
- * Array of `MDReader`s sorted by block reading priority.
- * @var MDReader[]
- */
- public array $readersByBlockPriority = [];
-
- /**
- * Array of `MDReader`s sorted by tokenization priority.
- * @var MDReader[]
- */
- public array $readersByTokenPriority = [];
-
- /**
- * Array of tuples of `pass:number` and `MDReader` sorted by substitution
- * priority.
- * @var array[]
- */
- public array $readersBySubstitutePriority = [];
-
- /**
- * Prefix to include in any generated `id` attributes on HTML elements.
- * Useful for keeping elements unique in multiple parsed documents in the
- * same HTML page.
- */
- public string $elementIdPrefix = '';
-
- /**
- * Filter for removing unapproved HTML tags, attributes, and values.
- */
- public MDHTMLFilter $tagFilter;
-
- private static string $textWhitespaceRegex = '^(\\s*)(?:(\\S|\\S.*\\S)(\\s*?))?$'; // 1=leading WS, 2=text, 3=trailing WS
-
- /**
- * @param string[] $lines - lines of markdown text
- */
- public function __construct(array $lines) {
- $this->lines = $lines;
- }
-
- /**
- * Creates a copy of this state with new lines. Useful for parsing nested
- * content.
- *
- * @param string[] $lines
- * @return MDState copied sub-state
- */
- public function copy(array $lines) {
- $cp = new MDState($lines);
- $cp->parent = $this;
- return $cp;
- }
-
- /**
- * Tests if there are at least `minCount` lines available to read. If `p`
- * is not provided it will be relative to `this.p`.
- */
- public function hasLines(int $minCount, ?int $p=null): bool {
- $relativeTo = ($p === null) ? $this->p : $p;
- return $relativeTo + $minCount <= sizeof($this->lines);
- }
-
- /**
- * Reads and returns an array of blocks from the current line pointer.
- *
- * @return MDBlockNode[] parsed blocks
- */
- public function readBlocks(): array {
- $blocks = [];
- while ($this->hasLines(1)) {
- $block = $this->readNextBlock();
- if ($block) {
- array_push($blocks, $block);
- } else {
- break;
- }
- }
- return $blocks;
- }
-
- /**
- * Creates a simple `MDBlockNode` if no other registered blocks match.
- */
- private function readFallbackBlock(): ?MDBlockNode {
- if ($this->p >= sizeof($this->lines)) return null;
- $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
- if (sizeof($lines) == 0) return null;
- $this->p = sizeof($this->lines);
- return $this->inlineMarkdownToNode(implode("\n", $lines));
- }
-
- /**
- * Attempts to read one block from the current line pointer. The pointer
- * will be positioned just after the end of the block.
- */
- private function readNextBlock(): ?MDBlockNode {
- while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
- $this->p++;
- }
- if (!$this->hasLines(1)) return null;
- foreach ($this->root()->readersByBlockPriority as $reader) {
- $startP = $this->p;
- $block = $reader->readBlock($this);
- if ($block) {
- if ($this->p == $startP) {
- $readerClassName = get_class($reader);
- $blockClassName = get_class($block);
- throw new Error("{$readerClassName} returned an " +
- "{$blockClassName} without incrementing MDState.p. " +
- "This could lead to an infinite loop.");
- }
- return $block;
- }
- }
- $fallback = $this->readFallbackBlock();
- return $fallback;
- }
-
- /**
- * @param string $line
- * @return MDToken[]
- */
- private function inlineMarkdownToTokens(string $line): array {
- if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
-
- $tokens = [];
- $text = '';
- $expectLiteral = false;
-
- /**
- * Flushes accumulated content in `text` to `tokens`.
- */
- function endText() {
- if (mb_strlen($text) == 0) return;
- $textGroups = null;
- if (mb_eregi(MDState::$textWhitespaceRegex, $text, $textGroups)) {
- if (mb_strlen($textGroups[1]) > 0) {
- array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
- }
- if ($textGroups[2] && mb_strlen($textGroups[2]) > 0) {
- $tokens.push(new MDToken($textGroups[2], MDTokenType::Text, $textGroups[2]));
- }
- if ($textGroups[3] && mb_strlen($textGroups[3]) > 0) {
- $tokens.push(new MDToken($textGroups[3], MDTokenType::Whitespace, $textGroups[3]));
- }
- } else {
- array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
- }
- $text = '';
- }
-
- for ($p = 0; $p < mb_strlen(line); $p++) {
- $ch = mb_substr($line, p, 1);
- $remainder = mb_substr($line, $p);
- if ($expectLiteral) {
- $text .= $ch;
- $expectLiteral = false;
- continue;
- }
- if ($ch == '\\') {
- $expectLiteral = true;
- continue;
- }
- $found = false;
- foreach ($this->root()->readersByTokenPriority as $reader) {
- $token = $reader->readToken($this, $remainder);
- if ($token === null) continue;
- endText();
- array_push($tokens, $token);
- if ($token->original == null || mb_strlen($token->original) == 0) {
- $readerClassName = get_class($reader);
- throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
- }
- $p += mb_strlen($token->original) - 1;
- $found = true;
- break;
- }
- if (!$found) {
- $text += $ch;
- }
- }
- endText();
- return $tokens;
- }
-
- /**
- * Converts a line of markdown to an `MDInlineNode`.
- *
- * @param string|string[] $line
- * @return MDInlineNode
- */
- public function inlineMarkdownToNode(string|array $line): MDInlineNode {
- $nodes = $this->inlineMarkdownToNodes($line);
- return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
- }
-
- /**
- * Converts a line of markdown to an array of `MDInlineNode`s.
- *
- * @param string|string[] $line
- * @return MDInlineNode[]
- */
- public function inlineMarkdownToNodes(string|array $line): array {
- $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
- return $this->tokensToNodes($tokens);
- }
-
- /**
- * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
- * of only `MDInlineNode` via repeated `MDReader` substition.
- *
- * @param (MDToken|MDInlineNode)[] $tokens
- * @return MDInlineNode[]
- */
- public function tokensToNodes(array $tokens): array {
- $nodes = $tokens;
-
- // Perform repeated substitutions, converting sequences of tokens into
- // nodes, until no more substitutions can be made.
- $anyChanges = false;
- do {
- $anyChanges = false;
- foreach ($this->root->readersBySubstitutePriority as $readerTuple) {
- /** @var int */
- $pass = $readerTuple[0];
- /** @var MDReader */
- $reader = $readerTuple[1];
- $changed = $reader->substituteTokens($this, $pass, $nodes);
- if (!$changed) continue;
- $anyChanges = true;
- break;
- }
- } while ($anyChanges);
-
- // Convert any remaining tokens to text nodes. Also apply any inline
- // CSS modifiers.
- $lastNode = null;
- $me = $this;
- $nodes = array_map(function($node) use ($lastNode, $me) {
- if ($node instanceof MDToken) {
- /** @var MDToken */
- $token = $node;
- if ($token->type == MDTokenType::Modifier && $lastNode) {
- $me->root()->tagFilter->scrubModifier($token->modifier);
- $token->modifier->applyTo($lastNode);
- $lastNode = null;
- return new MDTextNode('');
- }
- $lastNode = null;
- return new MDTextNode($token->original);
- } elseif ($node instanceof MDNode) {
- $lastNode = ($node instanceof MDTextNode) ? null : $node;
- return $node;
- } else {
- $nodeClassName = get_class($node);
- throw new Error("Unexpected node type {$nodeClassName}");
- }
- }, $nodes);
-
- return $nodes;
- }
-
- /**
- * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
- * and `MDReferencedImageReader`.
- * @var array symbol -> URL
- */
- private array $referenceToURL = [];
-
- /**
- * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
- * and `MDReferencedImageReader`.
- * @var array symbol -> title string
- */
- private array $referenceToTitle = [];
-
- /**
- * Defines a URL by reference symbol.
- */
- public function defineURL(string $reference, string $url, ?string $title=null) {
- $this->root->referenceToURL[mb_strtolower($reference)] = $url;
- if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
- }
-
- /**
- * Returns the URL associated with a reference symbol.
- */
- public function urlForReference(string $reference): ?string {
- return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
- }
-
- /**
- * Returns the link title associated with a reference symbol.
- */
- public function urlTitleForReference(string $reference): ?string {
- return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
- }
- }
-
- /**
- * Defines a set of allowable HTML tags, attributes, and CSS.
- */
- class MDHTMLFilter {
- /**
- * Mapping of permitted lowercase tag names to objects containing allowable
- * attributes for those tags. Does not need to include those attributes
- * defined in `allowableGlobalAttributes`.
- *
- * Values are objects with allowable lowercase attribute names mapped to
- * allowable value patterns. A `*` means any value is acceptable. Multiple
- * allowable values can be joined together with `|`. These special symbols
- * represent certain kinds of values and can be used in combination or in
- * place of literal values.
- *
- * - `{classlist}`: A list of legal CSS classnames, separated by spaces
- * - `{int}`: An integer
- * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
- * - `{style}`: One or more CSS declarations, separated by semicolons (simple
- * `key: value;` syntax only)
- * - `{url}`: A URL
- * @type {object}
- */
- public array $allowableTags = [
- 'address' => [
- 'cite' => '{url}',
- ],
- 'h1' => [],
- 'h2' => [],
- 'h3' => [],
- 'h4' => [],
- 'h5' => [],
- 'h6' => [],
- 'blockquote' => [],
- 'dl' => [],
- 'dt' => [],
- 'dd' => [],
- 'div' => [],
- 'hr' => [],
- 'ul' => [],
- 'ol' => [
- 'start' => '{int}',
- 'type' => 'a|A|i|I|1',
- ],
- 'li' => [
- 'value' => '{int}',
- ],
- 'p' => [],
- 'pre' => [],
- 'table' => [],
- 'thead' => [],
- 'tbody' => [],
- 'tfoot' => [],
- 'tr' => [],
- 'td' => [],
- 'th' => [],
- 'a' => [
- 'href' => '{url}',
- 'target' => '*',
- ],
- 'abbr' => [],
- 'b' => [],
- 'br' => [],
- 'cite' => [],
- 'code' => [],
- 'data' => [
- 'value' => '*',
- ],
- 'dfn' => [],
- 'em' => [],
- 'i' => [],
- 'kbd' => [],
- 'mark' => [],
- 'q' => [
- 'cite' => '{url}',
- ],
- 's' => [],
- 'samp' => [],
- 'small' => [],
- 'span' => [],
- 'strong' => [],
- 'sub' => [],
- 'sup' => [],
- 'time' => [
- 'datetime' => '*',
- ],
- 'u' => [],
- 'var' => [],
- 'wbr' => [],
- 'img' => [
- 'alt' => '*',
- 'href' => '{url}',
- ],
- 'figure' => [],
- 'figcaption' => [],
- 'del' => [],
- 'ins' => [],
- 'details' => [],
- 'summary' => [],
- ];
-
- /**
- * Mapping of allowable lowercase global attributes to their permitted
- * values. Uses same value pattern syntax as described in `allowableTags`.
- * @type {object}
- */
- public array $allowableGlobalAttributes = [
- 'class' => '{classlist}',
- 'data-*' => '*',
- 'dir' => 'ltr|rtl|auto',
- 'id' => '*',
- 'lang' => '*',
- 'style' => '{style}',
- 'title' => '*',
- 'translate' => 'yes|no|{none}',
- ];
-
- /**
- * Mapping of allowable CSS style names to their allowable value patterns.
- * Multiple values can be delimited with `|` characters. Limited support
- * so far.
- *
- * Recognized special values:
- * - `{color}`: A hex or named color
- *
- * @type {object}
- */
- public array $allowableStyleKeys = [
- 'background-color' => '{color}',
- 'color' => '{color}',
- ];
-
- /**
- * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
- * itself has already been whitelisted.
- *
- * @param {MDHTMLTag} tag - HTML tag
- */
- public function scrubTag(MDHTMLTag $tag) {
- foreach ($tag->attributes as $name => $value) {
- if (!$this->isValidAttributeName($tag->tagName, $name)) {
- unset($tag->attributes[$name]);
- }
- if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
- unset($tag->attributes[$name]);
- }
- }
- }
-
- /**
- * Scrubs all forbidden attributes from an HTML modifier.
- *
- * @param MDTagModifier $modifier
- * @param ?string $tagName HTML tag name, if known, otherwise only
- * global attributes will be permitted
- */
- public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
- if (sizeof($modifier->cssClasses) > 0) {
- $classList = implode(' ', $modifier->cssClasses);
- if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
- $modifier->cssClasses = [];
- }
- }
- if ($modifier->cssId !== null) {
- if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
- $modifier->cssId = null;
- }
- }
- if (!$this->isValidAttributeName($tagName, 'style')) {
- $modifier->cssStyles = [];
- } else {
- foreach ($modifier->cssStyles as $key => $val) {
- if (!$this->isValidStyleValue($key, $val)) {
- unset($modifier->cssStyles[$key]);
- }
- }
- }
- foreach ($modifier->attributes as $key => $val) {
- if (!$this->isValidAttributeValue($tagName, $key, $val)) {
- unset($modifier->attributes[$key]);
- }
- }
- }
-
- /**
- * Tests if an HTML tag name is permitted.
- */
- public function isValidTagName(string $tagName): bool {
- return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
- }
-
- /**
- * Tests if an HTML attribute name is permitted.
- */
- public function isValidAttributeName(?string $tagName, string $attributeName): bool {
- $lcAttributeName = mb_strtolower($attributeName);
- if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
- return true;
- }
- foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
- if (!str_ends_with($pattern, '*')) continue;
- $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
- if (str_starts_with($lcAttributeName, $patternPrefix)) {
- return true;
- }
- }
- if ($tagName === null) return false;
- $lcTagName = mb_strtolower($tagName);
- $tagAttributes = $this->allowableTags[$lcTagName];
- if ($tagAttributes !== null) {
- return ($tagAttributes[$lcAttributeName] ?? null) !== null;
- }
- return false;
- }
-
- /**
- * Tests if an attribute value is allowable.
- */
- public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
- $lcAttributeName = mb_strtolower($attributeName);
- $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
- if ($globalPattern !== null) {
- return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
- }
- foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
- if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
- return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
- }
- }
- if ($tagName === null) return false;
- $lcTagName = mb_strtolower($tagName);
- $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
- if ($tagAttributes === null) return false;
- $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
- if ($valuePattern === null) return false;
- return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
- }
-
- private static string $permissiveURLRegex = '^\\S+$';
- private static string $integerRegex = '^[\\-]?\\d+$';
- private static string $classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
-
- private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
- $options = explode('|', $pattern);
- foreach ($options as $option) {
- switch ($option) {
- case '*':
- return true;
- case '{classlist}':
- if (mb_eregi(self::classListRegex, $value)) return true;
- break;
- case '{int}':
- if (mb_eregi(self::integerRegex, $value)) return true;
- break;
- case '{none}':
- if ($value === true) return true;
- break;
- case '{style}':
- if ($this->isValidStyleDeclaration($value)) return true;
- break;
- case '{url}':
- if (mb_eregi(self::permissiveURLRegex, $value)) return true;
- break;
- default:
- if ($value === $option) return true;
- break;
- }
- }
- return false;
- }
-
- /**
- * Tests if a string of one or more style `key: value;` declarations is
- * fully allowable.
- */
- public function isValidStyleDeclaration(string $styles): bool {
- $settings = explode(';', $styles);
- foreach ($settings as $setting) {
- if (mb_strlen(trim($setting)) == 0) continue;
- $parts = explode(':', $setting);
- if (sizeof($parts) != 2) return false;
- $name = trim($parts[0]);
- if (!$this->isValidStyleKey($name)) return false;
- $value = trim($parts[1]);
- if (!$this->isValidStyleValue($name, $value)) return false;
- }
- return true;
- }
-
- /**
- * Tests if a CSS style key is allowable.
- */
- public function isValidStyleKey(string $key): bool {
- return ($this->allowableStyleKeys[$key] ?? null) !== null;
- }
-
- /**
- * Tests if a CSS style value is allowable.
- */
- public function isValidStyleValue(string $key, string $value): bool {
- $pattern = $this->allowableStyleKeys[$key] ?? null;
- if ($pattern === null) return false;
- $options = explode('|', $pattern);
- foreach ($options as $option) {
- switch ($option) {
- case '{color}':
- if ($this->isValidCSSColor($value)) return true;
- default:
- if ($value === $option) return true;
- }
- }
- return false;
- }
-
- private static string $styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
-
- private function isValidCSSColor(string $value): bool {
- return mb_eregi(self::$styleColorRegex, $value);
- }
- }
-
- /**
- * Represents a single HTML tag. Paired tags are represented separately.
- */
- class MDHTMLTag {
- /**
- * Verbatim string of the original parsed tag. Not modified. Should be
- * considered unsafe for inclusion in the final document. Use `toString()`
- * instead.
- */
- public string $original;
- public string $tagName;
- public bool $isCloser;
- /**
- * Map of attribute names to value strings.
- */
- public array $attributes;
-
- /**
- * @param string $original
- * @param string $tagName
- * @param bool $isCloser
- * @param array $attributes
- */
- public function __construct(string $original, string $tagName, bool $isCloser,
- array $attributes) {
- $this->original = $original;
- $this->tagName = $tagName;
- $this->isCloser = $isCloser;
- $this->attributes = $attributes;
- }
-
- public function __toString(): string {
- if ($this->isCloser) {
- return "</{$this->tagName}>";
- }
- $html = '<';
- $html .= $this->tagName;
- foreach ($this->attributes as $key => $value) {
- $safeName = MDUtils::scrubAttributeName($key);
- if ($value === true) {
- $html .= " {$safeName}";
- } else {
- $escapedValue = MDUtils::escapeHTML("{$value}");
- $html .= " {$safeName}=\"{$escapedValue}\"";
- }
- }
- $html .= '>';
- return $html;
- }
-
- public function equals($other): bool {
- if (!($other instanceof MDHTMLTag)) return false;
- if ($other->tagName != $this->tagName) return false;
- if ($other->isCloser != $this->isCloser) return false;
- return MDUtils::equal($other->attributes, $this->attributes);
- }
-
- private static string $htmlTagNameFirstRegex = '[a-z]';
- private static string $htmlTagNameMedialRegex = '[a-z0-9]';
- private static string $htmlAttributeNameFirstRegex = '[a-z]';
- private static string $htmlAttributeNameMedialRegex = '[a-z0-9-]';
- private static string $whitespaceCharRegex = '\\s';
-
- /**
- * Checks the start of the given string for presence of an HTML tag.
- */
- public static function fromLineStart(string $line): ?MDHTMLTag {
- $expectOpenBracket = 0;
- $expectCloserOrName = 1;
- $expectName = 2;
- $expectAttributeNameOrEnd = 3;
- $expectEqualsOrAttributeOrEnd = 4;
- $expectAttributeValue = 5;
- $expectCloseBracket = 6;
-
- $isCloser = false;
- $tagName = '';
- $attributeName = '';
- $attributeValue = '';
- $attributeQuote = null;
- $attributes = [];
- $fullTag = null;
- $endAttribute = function(bool $unescape=false) use (&$attributes, &$attributeName, &$attributeValue, &$attributeQuote) {
- if (mb_strlen($attributeName) > 0) {
- if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
- $attributes[$attributeName] = $unescape ? html_entity_decode($attributeValue, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') : $attributeValue;
- } else {
- $attributes[$attributeName] = true;
- }
- }
- $attributeName = '';
- $attributeValue = '';
- $attributeQuote = null;
- };
-
- $expect = $expectOpenBracket;
- for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
- $ch = mb_substr($line, $p, 1);
- $isWhitespace = mb_eregi(self::$whitespaceCharRegex, $ch);
- switch ($expect) {
- case $expectOpenBracket:
- if ($ch != '<') return null;
- $expect = $expectCloserOrName;
- break;
- case $expectCloserOrName:
- if ($ch == '/') {
- $isCloser = true;
- } else {
- $p--;
- }
- $expect = $expectName;
- break;
- case $expectName:
- if (mb_strlen($tagName) == 0) {
- if (!mb_eregi(self::$htmlTagNameFirstRegex, $ch)) return null;
- $tagName .= $ch;
- } else {
- if (mb_eregi(self::$htmlTagNameMedialRegex, $ch)) {
- $tagName .= $ch;
- } else {
- $p--;
- $expect = ($isCloser) ? $expectCloseBracket : $expectAttributeNameOrEnd;
- }
- }
- break;
- case $expectAttributeNameOrEnd:
- if (mb_strlen($attributeName) == 0) {
- if ($isWhitespace) {
- // skip whitespace
- } elseif ($ch == '/') {
- $expect = $expectCloseBracket;
- } elseif ($ch == '>') {
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
- $attributeName .= $ch;
- } else {
- return null;
- }
- } elseif ($isWhitespace) {
- $expect = $expectEqualsOrAttributeOrEnd;
- } elseif ($ch == '/') {
- $endAttribute();
- $expect = $expectCloseBracket;
- } elseif ($ch == '>') {
- $endAttribute();
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- } elseif ($ch == '=') {
- $expect = $expectAttributeValue;
- } elseif (mb_eregi(self::$htmlAttributeNameMedialRegex, $ch)) {
- $attributeName .= $ch;
- } else {
- return null;
- }
- break;
- case $expectEqualsOrAttributeOrEnd:
- if ($ch == '=') {
- $expect = $expectAttributeValue;
- } elseif ($isWhitespace) {
- // skip whitespace
- } elseif ($ch == '/') {
- $expect = $expectCloseBracket;
- } elseif ($ch == '>') {
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
- $endAttribute();
- $expect = $expectAttributeNameOrEnd;
- $p--;
- }
- break;
- case $expectAttributeValue:
- if (mb_strlen($attributeValue) == 0) {
- if ($attributeQuote === null) {
- if ($isWhitespace) {
- // skip whitespace
- } elseif ($ch == '"' || $ch == "'") {
- $attributeQuote = $ch;
- } else {
- $attributeQuote = ''; // explicitly unquoted
- $p--;
- }
- } else {
- if ($ch === $attributeQuote) {
- // Empty string
- $endAttribute($attributeQuote != '');
- $expect = $expectAttributeNameOrEnd;
- } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
- return null;
- } else {
- $attributeValue .= $ch;
- }
- }
- } else {
- if ($ch === $attributeQuote) {
- $endAttribute($attributeQuote != '');
- $expect = $expectAttributeNameOrEnd;
- } elseif ($attributeQuote === '' && $isWhitespace) {
- $endAttribute();
- $expect = $expectAttributeNameOrEnd;
- } else {
- $attributeValue .= $ch;
- }
- }
- break;
- case $expectCloseBracket:
- if ($isWhitespace) {
- // ignore whitespace
- } elseif ($ch == '>') {
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- }
- break;
- }
- }
- if ($fullTag === null) return null;
- $endAttribute();
- return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
- }
- }
-
- /**
- * Represents HTML modifications to a node, such as CSS classes to add or
- * additional attributes. See `MDHTMLFilter.scrubModifier()` to remove disallowed
- * values.
- */
- class MDTagModifier {
- /**
- * Verbatim markdown syntax. Unmodified by changes to other properties.
- */
- public string $original;
- /** @var string[] */
- public array $cssClasses = [];
- public ?string $cssId = null;
- public array $cssStyles = [];
- public array $attributes = [];
-
- private static $baseClassRegex = '\\.([a-z_\\-][a-z0-9_\\-]*?)';
- private static $baseIdRegex = '#([a-z_\\-][a-z0-9_\\-]*?)';
- private static $baseAttributeRegex = '([a-z0-9]+?)=([^\\s\\}]+?)';
- private static $baseRegex = '\\{([^}]+?)}';
- private static $leadingClassRegex = '^\\{([^}]+?)}';
- private static $trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
- private static $classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
- private static $idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
- private static $attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
-
- public function applyTo(MDNode $node) {
- if ($node instanceof MDNode) {
- foreach ($this->cssClasses as $cssClass) {
- $node->addClass($cssClass);
- }
- if ($this->cssId) $node->cssId = $this->cssId;
- foreach ($this->attributes as $name => $value) {
- $node->attributes[$name] = $value;
- }
- foreach ($this->cssStyles as $name => $value) {
- $node->cssStyles[$name] = $value;
- }
- }
- }
-
- /**
- * Adds a CSS class. If already present it will not be duplicated.
- */
- public function addClass(string $cssClass): bool {
- if (array_search($cssClass, $this->cssClasses) !== false) return false;
- array_push($this->cssClasses, $cssClass);
- return true;
- }
-
- /**
- * Removes a CSS class.
- */
- public function removeClass(string $cssClass): bool {
- $beforeLength = sizeof($this->cssClasses);
- $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
- return sizeof($this->cssClasses) != beforeLength;
- }
-
- public function equals($other): bool {
- if (!($other instanceof MDTagModifier)) return false;
- if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
- if ($other->cssId !== $this->cssId) return false;
- if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
- return true;
- }
-
- public function __toString(): string {
- return $this->original;
- }
-
- private static function styleToObject(string $styleValue): array {
- $pairs = explode(';', $styleValue);
- $styles = [];
- foreach ($pairs as $pair) {
- $keyAndValue = explode(':', $pair);
- if (sizeof($keyAndValue) != 2) continue;
- $styles[$keyAndValue[0]] = $keyAndValue[1];
- }
- return $styles;
- }
-
- private static function fromContents(string $contents): ?MDTagModifier {
- $modifierTokens = mb_split('\\s+', $contents);
- $mod = new MDTagModifier();
- $mod->original = "{{$contents}}";
- foreach ($modifierTokens as $token) {
- if (trim($token) == '') continue;
- if (mb_eregi(self::$classRegex, $token, $groups)) {
- $mod->addClass($groups[1]);
- } elseif (mb_eregi(self::$idRegex, $token, $groups)) {
- $mod->cssId = $groups[1];
- } elseif (mb_eregi(self::$attributeRegex, $token, $groups)) {
- if ($groups[1] == 'style') {
- $mod->cssStyles = self::styleToObject($groups[2]);
- } else {
- $mod->attributes[$groups[1]] = $groups[2];
- }
- } else {
- return null;
- }
- }
- return $mod;
- }
-
- /**
- * Extracts block modifier from end of a line. Always returns a 2-element
- * tuple array:
- * - `0`: the line without the modifier
- * - `1`: an `MDTagModifier` if found or `null` if not
- *
- * @param string $line
- * @param ?MDState $state
- * @return array tuple with remaining line and `MDTagModifier` or `null`
- */
- public static function fromLine(string $line, ?MDState $state): array {
- if ($state) {
- $found = false;
- foreach ($state->root()->readersByBlockPriority as $reader) {
- if ($reader instanceof MDModifierReader) {
- $found = true;
- break;
- }
- }
- if (!$found) return [ $line, null ];
- }
- if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return [ $line, null ];
- $bareLine = $groups[1];
- $mod = self::fromContents($groups[2]);
- return [ $bareLine, $mod ];
- }
-
- /**
- * Attempts to extract modifier from head of string.
- */
- public static function fromStart(string $line): ?MDTagModifier {
- if (!mb_eregi(self::$leadingClassRegex, $line, $groups)) return null;
- return self::fromContents($groups[1]);
- }
-
- /**
- * Discards any modifiers from a line and returns what remains.
- */
- public static function strip(string $line): string {
- if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return $line;
- return $groups[1];
- }
- }
-
-
- // -- Readers ---------------------------------------------------------------
-
-
- class MDReader {}
-
- class MDUnderlinedHeadingReader extends MDReader {}
-
- class MDHashHeadingReader extends MDReader {}
-
- class MDSubtextReader extends MDReader {}
-
- class MDBlockQuoteReader extends MDReader {}
-
- class _MDListReader extends MDReader {}
-
- class MDUnorderedListReader extends _MDListReader {}
-
- class MDOrderedListReader extends _MDListReader {}
-
- class MDFencedCodeBlockReader extends MDReader {}
-
- class MDIndentedCodeBlockReader extends MDReader {}
-
- class MDHorizontalRuleReader extends MDReader {}
-
- class MDTableReader extends MDReader {}
-
- class MDDefinitionListReader extends MDReader {}
-
- class MDFootnoteReader extends MDReader {}
-
- class MDAbbreviationReader extends MDReader {}
-
- class MDParagraphReader extends MDReader {}
-
- class MDSimplePairInlineReader extends MDReader {}
-
- class MDEmphasisReader extends MDSimplePairInlineReader {}
-
- class MDStrongReader extends MDSimplePairInlineReader {}
-
- class MDStrikethroughReader extends MDSimplePairInlineReader {}
-
- class MDUnderlineReader extends MDSimplePairInlineReader {}
-
- class MDHighlightReader extends MDSimplePairInlineReader {}
-
- class MDCodeSpanReader extends MDSimplePairInlineReader {}
-
- class MDSubscriptReader extends MDSimplePairInlineReader {}
-
- class MDSuperscriptReader extends MDSimplePairInlineReader {}
-
- class MDLinkReader extends MDReader {}
-
- class MDReferencedLinkReader extends MDLinkReader {}
-
- class MDImageReader extends MDLinkReader {}
-
- class MDReferencedImageReader extends MDReferencedLinkReader {}
-
- class MDLineBreakReader extends MDReader {}
-
- class MDHTMLTagReader extends MDReader {}
-
- class MDModifierReader extends MDReader {}
-
-
- // -- Nodes -----------------------------------------------------------------
-
-
- class MDNode {}
-
- class MDBlockNode extends MDNode {}
-
- class MDParagraphNode extends MDBlockNode {}
-
- class MDHeadingNode extends MDBlockNode {}
-
- class MDSubtextNode extends MDBlockNode {}
-
- class MDHorizontalRuleNode extends MDBlockNode {}
-
- class MDBlockquoteNode extends MDBlockNode {}
-
- class MDUnorderedListNode extends MDBlockNode {}
-
- class MDOrderedListNode extends MDBlockNode {}
-
- class MDListItemNode extends MDBlockNode {}
-
- class MDCodeBlockNode extends MDBlockNode {}
-
- class MDTableNode extends MDBlockNode {}
-
- class MDTableRowNode extends MDBlockNode {}
-
- class MDTableCellNode extends MDBlockNode {}
-
- class MDTableHeaderCellNode extends MDBlockNode {}
-
- class MDDefinitionListNode extends MDBlockNode {}
-
- class MDDefinitionListTermNode extends MDBlockNode {}
-
- class MDDefinitionListDefinitionNode extends MDBlockNode {}
-
- class MDFootnoteListNode extends MDBlockNode {}
-
- class MDInlineNode extends MDNode {}
-
- class MDTextNode extends MDInlineNode {}
-
- class MDObfuscatedTextNode extends MDTextNode {}
-
- class MDEmphasisNode extends MDInlineNode {}
-
- class MDStrongNode extends MDInlineNode {}
-
- class MDStrikethroughNode extends MDInlineNode {}
-
- class MDUnderlineNode extends MDInlineNode {}
-
- class MDHighlightNode extends MDInlineNode {}
-
- class MDSuperscriptNode extends MDInlineNode {}
-
- class MDSubscriptNode extends MDInlineNode {}
-
- class MDCodeNode extends MDInlineNode {}
-
- class MDFootnoteNode extends MDInlineNode {}
-
- class MDLinkNode extends MDInlineNode {}
-
- class MDReferencedLinkNode extends MDLinkNode {}
-
- class MDImageNode extends MDInlineNode {}
-
- class MDReferencedImageNode extends MDImageNode {}
-
- class MDAbbreviationNode extends MDInlineNode {}
-
- class MDLineBreakNode extends MDInlineNode {}
-
- class MDHTMLTagNode extends MDInlineNode {}
-
-
- // -- Main class ------------------------------------------------------------
-
-
- class Markdown {}
- ?>
|