| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495 |
- <?php
- declare(strict_types=1);
-
- /**
- * Static utilities.
- */
- class MDUtils {
- // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
- public static $baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
- // Modified from https://emailregex.com/ to remove capture groups.
- public static $baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
-
- /**
- * Encodes characters as HTML numeric entities to make it marginally more
- * difficult for web scrapers to grab sensitive info. If `text` starts with
- * `mailto:` only the email address following it will be obfuscated.
- */
- public static function escapeObfuscated(string $text): string {
- if (str_starts_with($text, 'mailto:')) {
- return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
- }
- $html = '';
- $l = mb_strlen($text);
- for ($p = 0; $p < $l; $p++) {
- $cp = mb_ord(mb_substr($text, $p, 1));
- $html .= "&#{{$cp}}";
- }
- return $html;
- }
-
- /**
- * Removes illegal characters from an HTML attribute name.
- */
- public static function scrubAttributeName(string $name): string {
- return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
- }
-
- /**
- * Strips one or more leading indents from a line or lines of markdown. An
- * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
- * spaces) are treated like one indent level.
- *
- * @param string|string[] $line
- * @param int $levels
- * @return string|string[]
- */
- public static function stripIndent(string|array &$line, int $levels=1): string|array {
- $regex = "^(?: {1,4}|\\t){{$levels}}";
- return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
- }
-
- /**
- * Counts the number of indent levels in a line of text. Partial indents
- * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly`
- * is `true`.
- */
- public static function countIndents(string &$line, bool $fullIndentsOnly=false): int {
- // normalize indents to tabs
- $t = mb_ereg_replace($fullIndentsOnly ? "(?: {4}|\\t)" : "(?: {1,4}|\\t)", "\t", $line);
- // remove content after indent
- $t = mb_ereg_replace("^(\\t*)(.*?)$", "\\1", $t);
- // count tabs
- return mb_strlen($t);
- }
-
- /**
- * Returns a copy of an array without any whitespace-only lines at the end.
- *
- * @param string[] $lines
- * @return string[]
- */
- public static function withoutTrailingBlankLines(array &$lines): array {
- $stripped = $lines;
- while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
- array_pop($stripped);
- }
- return $stripped;
- }
-
- /**
- * Tests if an array of lines contains at least one blank. A blank line
- * can contain whitespace.
- *
- * @param string[] $lines
- */
- public static function containsBlankLine(array &$lines): bool {
- foreach ($lines as $line) {
- if (mb_strlen(trim($line)) == 0) return true;
- }
- return false;
- }
-
- public static function equalAssocArrays(array &$a, array &$b) {
- return empty(array_diff_assoc($a, $b));
- }
- }
-
- /**
- * Token type enum for `MDToken`.
- */
- enum MDTokenType {
- case Text;
- /**
- * Only used for the leading and trailing whitespace around a run of text,
- * not every single whitespace character.
- */
- case Whitespace;
-
- case Underscore;
- case Asterisk;
- case Slash;
- case Tilde;
- case Bang;
- case Backtick;
- case Equal;
- case Caret;
-
- case Label; // content=label
- case URL; // content=URL, extra=title
- case Email; // content=email address, extra=title
- case SimpleLink; // content=URL
- case SimpleEmail; // content=email address
- case Footnote; // content=symbol
- case Modifier; // modifier=MDTagModifier
-
- case HTMLTag; // tag=MDHTMLTag
-
- /** Wildcard for `MDToken::findFirstTokens` */
- case META_AnyNonWhitespace;
- /** Wildcard for `MDToken::findFirstTokens` */
- case META_OptionalWhitespace;
- }
-
- /**
- * Search results from `MDToken.findFirstTokens`.
- */
- class MDTokenMatch {
- /** @var MDToken{} */
- public array $tokens;
- public int $index;
-
- /**
- * @param MDToken[] $tokens
- * @param int $index
- */
- public function __construct(array $tokens, int $index) {
- $this->tokens = $tokens;
- $this->index = $index;
- }
- }
-
- /**
- * Search results from `MDToken.findPairedTokens`.
- */
- class MDPairedTokenMatch {
- /** @var MDToken[] */
- public array $startTokens;
- /** @var MDToken[] */
- public array $contentTokens;
- /** @var MDToken[] */
- public array $endTokens;
- public int $startIndex;
- public int $contentIndex;
- public int $endIndex;
- public int $totalLength;
-
- public function __construct(array $startTokens, array $contentTokens,
- array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
- int $totalLength) {
- $this->startTokens = $startTokens;
- $this->contentTokens = $contentTokens;
- $this->endTokens = $endTokens;
- $this->startIndex = $startIndex;
- $this->contentIndex = $contentIndex;
- $this->endIndex = $endIndex;
- $this->totalLength = $totalLength;
- }
- }
-
- /**
- * One lexical unit in inline markdown syntax parsing.
- */
- class MDToken {
- /**
- * The original verbatim token string. Required as a plaintext fallback if
- * the token remains unresolved.
- */
- public string $original;
- public MDTokenType $type;
- public ?string $content = null;
- public ?string $extra = null;
- public ?MDHTMLTag $tag = null;
- public ?MDTagModifier $modifier = null;
-
- /**
- * Creates a token.
- *
- * @param string $original verbatim token string
- * @param MDTokenType $type token type
- * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
- * the token
- * @param string|null $extra additional content
- */
- public function __construct(string $original, MDTokenType $type,
- string|MDTagModifier|MDHTMLTag|null $content=null,
- ?string $extra=null) {
- $this->original = $original;
- $this->type = $type;
- if ($content instanceof MDTagModifier) {
- $this->modifier = $content;
- } elseif ($content instanceof MDHTMLTag) {
- $this->tag = $content;
- } else {
- $this->content = $content;
- }
- $this->extra = $extra;
- }
-
- public function __toString(): string {
- $classname = get_class($this);
- return "({$classname} type={$this->type} content={$this->content})";
- }
-
- /**
- * Attempts to parse a label token from the beginning of `line`. A label is
- * of the form `[content]`. If found, returns an array:
- * - `0`: the entire label including brackets
- * - `1`: the content of the label
- *
- * @param string $line
- * @return ?string[] match groups or null if not found
- */
- public static function tokenizeLabel(string $line): ?array {
- if (!str_starts_with($line, '[')) return null;
- $parenCount = 0;
- $bracketCount = 0;
- $l = mb_strlen($line);
- for ($p = 1; $p < $l; $p++) {
- $ch = mb_substr($line, $p, 1);
- if ($ch == '\\') {
- $p++;
- } elseif ($ch == '(') {
- $parenCount++;
- } elseif ($ch == ')') {
- $parenCount--;
- if ($parenCount < 0) return null;
- } elseif ($ch == '[') {
- $bracketCount++;
- } elseif ($ch == ']') {
- if ($bracketCount > 0) {
- $bracketCount--;
- } else {
- return [ mb_substr($line, 0, $p + 1), mb_substr($line, 1, $p - 1) ];
- }
- }
- }
- return null;
- }
-
- private static $urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
- private static $urlRegex = '^\\((\\S+?)\\)'; // 1=URL
-
- /**
- * Attempts to parse a URL token from the beginning of `line`. A URL token
- * is of the form `(url)` or `(url "title")`. If found, returns an array:
- * - `0`: the entire URL token including parentheses
- * - `1`: the URL
- * - `2`: the optional title, or `null`
- *
- * @param string $line
- * @return ?array token tuple
- */
- public static function tokenizeURL(string $line): ?array {
- $groups = [];
- if (mb_eregi(self::$urlWithTitleRegex, $line, $groups)) {
- if (self::tokenizeEmail($line)) return null; // make sure it's not better described as an email address
- return $groups;
- }
- if (mb_eregi(self::$urlRegex, $line, $groups)) {
- if (self::tokenizeEmail($line)) return null;
- return [ $groups[0], $groups[1], null ];
- }
- return null;
- }
-
- /**
- * Attempts to parse an email address from the beginning of `line`. An
- * email address is of the form `(user@example.com)` or
- * `(user@example.com "link title")`. If found, returns an array:
- * - `0`: the entire token including parentheses
- * - `1`: the email address
- * - `2`: the optional link title, or `null`
- *
- * @param string $line
- * @return ?string[] token tuple
- */
- public static function tokenizeEmail(string $line): ?array {
- $groups;
- if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
- $line, $groups)) {
- return $groups;
- }
- if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s*\\)", $line, $groups)) {
- return [ $groups[0], $groups[1], null ];
- }
- return null;
- }
-
- /**
- * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
- * If found, returns a `MDTokenMatch`, otherwise `null`.
- *
- * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
- * are special supported token types. Note that `META_OptionalWhitespace`
- * may give a result with a variable number of tokens.
- *
- * @param (MDToken|MDNode)[] $tokensToSearch - mixed array of `MDToken` and
- * `MDNode` elements
- * @param MDTokenType[] $pattern - contiguous run of token types to find
- * @param int $startIndex - token index to begin searching (defaults to 0)
- * @return ?MDTokenMatch match object, or `null` if not found
- */
- public static function findFirstTokens(array $tokensToSearch, array $pattern, int $startIndex=0): ?MDTokenMatch {
- $matched = [];
- for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
- $matchedAll = true;
- $matched = [];
- $patternOffset = 0;
- for ($p = 0; $p < mb_strlen($pattern); $p++) {
- $t0 = $t + $p + $patternOffset;
- if ($t0 >= sizeof($tokensToSearch)) return null;
- $token = $tokensToSearch[$t0];
- $elem = $pattern[$p];
- if ($elem == MDTokenType::META_OptionalWhitespace) {
- if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
- array_push($matched, $token);
- } else {
- $patternOffset--;
- }
- } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
- if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
- $matchedAll = false;
- break;
- }
- array_push($matched, $token);
- } else {
- if (!($token instanceof MDToken) || $token->type != $elem) {
- $matchedAll = false;
- break;
- }
- array_push($matched, $token);
- }
- }
- if ($matchedAll) {
- return new MDTokenMatch($matched, $t);
- }
- }
- return null;
- }
-
- /**
- * Searches an array of MDToken for a given starting pattern and ending
- * pattern and returns match info about both and the tokens in between.
- *
- * If `contentValidator` is specified, it will be called with the content
- * tokens of a potential match. If the validator returns `true`, the result
- * will be accepted and returned by this method. If the validator returns
- * `false`, this method will keep looking for another matching pair. If no
- * validator is given the first match will be returned regardless of content.
- *
- * If a match is found, a `MDPairedTokenMatch` is returned with details
- * of the opening tokens, closing tokens, and content tokens between. Otherwise
- * `null` is returned.
- *
- * @param MDToken[] $tokensToSearch - array of `MDToken` to search in
- * @param MDTokenType[] $startPattern - array of `MDTokenType` to find first
- * @param MDTokenType[] $endPattern - array of `MDTokenType` to find positioned after `startPattern`
- * @param ?callable $contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching
- * @param number $startIndex - token index where searching should begin
- * @return ?MDPairedTokenMatch match, or `null`
- */
- public static function findPairedTokens(array $tokensToSearch,
- array $startPattern, array $endPattern, ?callable $contentValidator=null,
- int $startIndex=0): ?MDPairedTokenMatch {
- for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
- $startMatch = findFirstTokens($tokensToSearch, $startPattern, $s);
- if ($startMatch === null) return null;
- $endStart = $startMatch->index + sizeof($startMatch->tokens);
- while ($endStart < sizeof($tokensToSearch)) {
- $endMatch = findFirstTokens($tokensToSearch, $endPattern, $endStart);
- if ($endMatch === null) break;
- $contentStart = $startMatch->index + sizeof($startMatch->tokens);
- $contentLength = $endMatch->index - $contentStart;
- $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
- if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
- return new MDPairedTokenMatch($startMatch->tokens,
- $contents,
- $endMatch->tokens,
- $startMatch->index,
- $startMatch->index + sizeof($startMatch->tokens),
- $endMatch->index,
- $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
- } else {
- // Contents rejected. Try next end match.
- $endStart = $endMatch->index + 1;
- }
- }
- // No end matches. Increment start match.
- $s = $startMatch->index;
- }
- return null;
- }
-
- public function equals($other) {
- if (!($other instanceof MDToken)) return false;
- if ($other->original !== $this->original) return false;
- if ($other->type != $this->type) return false;
- if ($other->content !== $this->content) return false;
- if ($other->extra !== $this->extra) return false;
- if ($other->tag !== $this->tag) return false;
- if ($other->modifier != $this->modifier) return false;
- return true;
- }
- }
-
- /**
- * Parsing and rendering state. Passed around throughout the parsing process.
- *
- * States are hierarchical. A sub-state can be created by calling `.copy()` with
- * a new array of lines. The sub-state points back to its parent state. This
- * is done to parse inner content of a syntax as its own standalone document.
- *
- * If a custom `MDReader` implementation wants to store data in this object,
- * always do so on `state.root` to ensure it's stored on the original state,
- * not a child state. Otherwise data may be lost when the sub-state is discarded.
- */
- class MDState {
- /**
- * Ascends the parent chain to the root `MDState` instance. This should be
- * used when referencing most stored fields except `lines` and `p`.
- */
- public function root(): MDState {
- return $this->parent ? $this->parent->root() : $this;
- }
-
- /**
- * Lines of the markdown document. The current line index is pointed to by `p`.
- *
- * @var string[]
- */
- public array $lines;
-
- /**
- * The current line in `lines`.
- */
- public function currentLine(): ?string {
- return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
- }
-
- /**
- * Current line pointer into array `lines`.
- */
- public int $p = 0;
-
- private ?MDState $parent = null;
-
- /**
- * Array of `MDReader`s sorted by block reading priority.
- * @var MDReader[]
- */
- public array $readersByBlockPriority = [];
-
- /**
- * Array of `MDReader`s sorted by tokenization priority.
- * @var MDReader[]
- */
- public array $readersByTokenPriority = [];
-
- /**
- * Array of tuples of `pass:number` and `MDReader` sorted by substitution
- * priority.
- * @var array[]
- */
- public array $readersBySubstitutePriority = [];
-
- /**
- * Prefix to include in any generated `id` attributes on HTML elements.
- * Useful for keeping elements unique in multiple parsed documents in the
- * same HTML page.
- */
- public string $elementIdPrefix = '';
-
- /**
- * Filter for removing unapproved HTML tags, attributes, and values.
- */
- public MDHTMLFilter $tagFilter;
-
- private static string $textWhitespaceRegex = '^(\\s*)(?:(\\S|\\S.*\\S)(\\s*?))?$'; // 1=leading WS, 2=text, 3=trailing WS
-
- /**
- * @param string[] $lines - lines of markdown text
- */
- public function __construct(array $lines) {
- $this->lines = $lines;
- }
-
- /**
- * Creates a copy of this state with new lines. Useful for parsing nested
- * content.
- *
- * @param string[] $lines
- * @return MDState copied sub-state
- */
- public function copy(array $lines) {
- $cp = new MDState($lines);
- $cp->parent = $this;
- return $cp;
- }
-
- /**
- * Tests if there are at least `minCount` lines available to read. If `p`
- * is not provided it will be relative to `this.p`.
- */
- public function hasLines(int $minCount, ?int $p=null): bool {
- $relativeTo = ($p === null) ? $this->p : $p;
- return $relativeTo + $minCount <= sizeof($this->lines);
- }
-
- /**
- * Reads and returns an array of blocks from the current line pointer.
- *
- * @return MDBlockNode[] parsed blocks
- */
- public function readBlocks(): array {
- $blocks = [];
- while ($this->hasLines(1)) {
- $block = $this->readNextBlock();
- if ($block) {
- array_push($blocks, $block);
- } else {
- break;
- }
- }
- return $blocks;
- }
-
- /**
- * Creates a simple `MDBlockNode` if no other registered blocks match.
- */
- private function readFallbackBlock(): ?MDBlockNode {
- if ($this->p >= sizeof($this->lines)) return null;
- $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
- if (sizeof($lines) == 0) return null;
- $this->p = sizeof($this->lines);
- return $this->inlineMarkdownToNode(implode("\n", $lines));
- }
-
- /**
- * Attempts to read one block from the current line pointer. The pointer
- * will be positioned just after the end of the block.
- */
- private function readNextBlock(): ?MDBlockNode {
- while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
- $this->p++;
- }
- if (!$this->hasLines(1)) return null;
- foreach ($this->root()->readersByBlockPriority as $reader) {
- $startP = $this->p;
- $block = $reader->readBlock($this);
- if ($block) {
- if ($this->p == $startP) {
- $readerClassName = get_class($reader);
- $blockClassName = get_class($block);
- throw new Error("{$readerClassName} returned an " +
- "{$blockClassName} without incrementing MDState.p. " +
- "This could lead to an infinite loop.");
- }
- return $block;
- }
- }
- $fallback = $this->readFallbackBlock();
- return $fallback;
- }
-
- /**
- * @param string $line
- * @return MDToken[]
- */
- private function inlineMarkdownToTokens(string $line): array {
- if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
-
- $tokens = [];
- $text = '';
- $expectLiteral = false;
-
- /**
- * Flushes accumulated content in `text` to `tokens`.
- */
- function endText() {
- if (mb_strlen($text) == 0) return;
- $textGroups = null;
- if (mb_eregi(MDState::$textWhitespaceRegex, $text, $textGroups)) {
- if (mb_strlen($textGroups[1]) > 0) {
- array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
- }
- if ($textGroups[2] && mb_strlen($textGroups[2]) > 0) {
- $tokens.push(new MDToken($textGroups[2], MDTokenType::Text, $textGroups[2]));
- }
- if ($textGroups[3] && mb_strlen($textGroups[3]) > 0) {
- $tokens.push(new MDToken($textGroups[3], MDTokenType::Whitespace, $textGroups[3]));
- }
- } else {
- array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
- }
- $text = '';
- }
-
- for ($p = 0; $p < mb_strlen(line); $p++) {
- $ch = mb_substr($line, p, 1);
- $remainder = mb_substr($line, $p);
- if ($expectLiteral) {
- $text .= $ch;
- $expectLiteral = false;
- continue;
- }
- if ($ch == '\\') {
- $expectLiteral = true;
- continue;
- }
- $found = false;
- foreach ($this->root()->readersByTokenPriority as $reader) {
- $token = $reader->readToken($this, $remainder);
- if ($token === null) continue;
- endText();
- array_push($tokens, $token);
- if ($token->original == null || mb_strlen($token->original) == 0) {
- $readerClassName = get_class($reader);
- throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
- }
- $p += mb_strlen($token->original) - 1;
- $found = true;
- break;
- }
- if (!$found) {
- $text += $ch;
- }
- }
- endText();
- return $tokens;
- }
-
- /**
- * Converts a line of markdown to an `MDInlineNode`.
- *
- * @param string|string[] $line
- * @return MDInlineNode
- */
- public function inlineMarkdownToNode(string|array $line): MDInlineNode {
- $nodes = $this->inlineMarkdownToNodes($line);
- return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
- }
-
- /**
- * Converts a line of markdown to an array of `MDInlineNode`s.
- *
- * @param string|string[] $line
- * @return MDInlineNode[]
- */
- public function inlineMarkdownToNodes(string|array $line): array {
- $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
- return $this->tokensToNodes($tokens);
- }
-
- /**
- * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
- * of only `MDInlineNode` via repeated `MDReader` substition.
- *
- * @param (MDToken|MDInlineNode)[] $tokens
- * @return MDInlineNode[]
- */
- public function tokensToNodes(array $tokens): array {
- $nodes = $tokens;
-
- // Perform repeated substitutions, converting sequences of tokens into
- // nodes, until no more substitutions can be made.
- $anyChanges = false;
- do {
- $anyChanges = false;
- foreach ($this->root->readersBySubstitutePriority as $readerTuple) {
- /** @var int */
- $pass = $readerTuple[0];
- /** @var MDReader */
- $reader = $readerTuple[1];
- $changed = $reader->substituteTokens($this, $pass, $nodes);
- if (!$changed) continue;
- $anyChanges = true;
- break;
- }
- } while ($anyChanges);
-
- // Convert any remaining tokens to text nodes. Also apply any inline
- // CSS modifiers.
- $lastNode = null;
- $me = $this;
- $nodes = array_map(function($node) use ($lastNode, $me) {
- if ($node instanceof MDToken) {
- /** @var MDToken */
- $token = $node;
- if ($token->type == MDTokenType::Modifier && $lastNode) {
- $me->root()->tagFilter->scrubModifier($token->modifier);
- $token->modifier->applyTo($lastNode);
- $lastNode = null;
- return new MDTextNode('');
- }
- $lastNode = null;
- return new MDTextNode($token->original);
- } elseif ($node instanceof MDNode) {
- $lastNode = ($node instanceof MDTextNode) ? null : $node;
- return $node;
- } else {
- $nodeClassName = get_class($node);
- throw new Error("Unexpected node type {$nodeClassName}");
- }
- }, $nodes);
-
- return $nodes;
- }
-
- /**
- * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
- * and `MDReferencedImageReader`.
- * @var array symbol -> URL
- */
- private array $referenceToURL = [];
-
- /**
- * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
- * and `MDReferencedImageReader`.
- * @var array symbol -> title string
- */
- private array $referenceToTitle = [];
-
- /**
- * Defines a URL by reference symbol.
- */
- public function defineURL(string $reference, string $url, ?string $title=null) {
- $this->root->referenceToURL[mb_strtolower($reference)] = $url;
- if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
- }
-
- /**
- * Returns the URL associated with a reference symbol.
- */
- public function urlForReference(string $reference): ?string {
- return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
- }
-
- /**
- * Returns the link title associated with a reference symbol.
- */
- public function urlTitleForReference(string $reference): ?string {
- return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
- }
- }
-
- /**
- * Defines a set of allowable HTML tags, attributes, and CSS.
- */
- class MDHTMLFilter {
- /**
- * Mapping of permitted lowercase tag names to objects containing allowable
- * attributes for those tags. Does not need to include those attributes
- * defined in `allowableGlobalAttributes`.
- *
- * Values are objects with allowable lowercase attribute names mapped to
- * allowable value patterns. A `*` means any value is acceptable. Multiple
- * allowable values can be joined together with `|`. These special symbols
- * represent certain kinds of values and can be used in combination or in
- * place of literal values.
- *
- * - `{classlist}`: A list of legal CSS classnames, separated by spaces
- * - `{int}`: An integer
- * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
- * - `{style}`: One or more CSS declarations, separated by semicolons (simple
- * `key: value;` syntax only)
- * - `{url}`: A URL
- * @type {object}
- */
- public array $allowableTags = [
- 'address' => [
- 'cite' => '{url}',
- ],
- 'h1' => [],
- 'h2' => [],
- 'h3' => [],
- 'h4' => [],
- 'h5' => [],
- 'h6' => [],
- 'blockquote' => [],
- 'dl' => [],
- 'dt' => [],
- 'dd' => [],
- 'div' => [],
- 'hr' => [],
- 'ul' => [],
- 'ol' => [
- 'start' => '{int}',
- 'type' => 'a|A|i|I|1',
- ],
- 'li' => [
- 'value' => '{int}',
- ],
- 'p' => [],
- 'pre' => [],
- 'table' => [],
- 'thead' => [],
- 'tbody' => [],
- 'tfoot' => [],
- 'tr' => [],
- 'td' => [],
- 'th' => [],
- 'a' => [
- 'href' => '{url}',
- 'target' => '*',
- ],
- 'abbr' => [],
- 'b' => [],
- 'br' => [],
- 'cite' => [],
- 'code' => [],
- 'data' => [
- 'value' => '*',
- ],
- 'dfn' => [],
- 'em' => [],
- 'i' => [],
- 'kbd' => [],
- 'mark' => [],
- 'q' => [
- 'cite' => '{url}',
- ],
- 's' => [],
- 'samp' => [],
- 'small' => [],
- 'span' => [],
- 'strong' => [],
- 'sub' => [],
- 'sup' => [],
- 'time' => [
- 'datetime' => '*',
- ],
- 'u' => [],
- 'var' => [],
- 'wbr' => [],
- 'img' => [
- 'alt' => '*',
- 'href' => '{url}',
- ],
- 'figure' => [],
- 'figcaption' => [],
- 'del' => [],
- 'ins' => [],
- 'details' => [],
- 'summary' => [],
- ];
-
- /**
- * Mapping of allowable lowercase global attributes to their permitted
- * values. Uses same value pattern syntax as described in `allowableTags`.
- * @type {object}
- */
- public array $allowableGlobalAttributes = [
- 'class' => '{classlist}',
- 'data-*' => '*',
- 'dir' => 'ltr|rtl|auto',
- 'id' => '*',
- 'lang' => '*',
- 'style' => '{style}',
- 'title' => '*',
- 'translate' => 'yes|no|{none}',
- ];
-
- /**
- * Mapping of allowable CSS style names to their allowable value patterns.
- * Multiple values can be delimited with `|` characters. Limited support
- * so far.
- *
- * Recognized special values:
- * - `{color}`: A hex or named color
- *
- * @type {object}
- */
- public array $allowableStyleKeys = [
- 'background-color' => '{color}',
- 'color' => '{color}',
- ];
-
- /**
- * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
- * itself has already been whitelisted.
- *
- * @param {MDHTMLTag} tag - HTML tag
- */
- public function scrubTag(MDHTMLTag $tag) {
- foreach ($tag->attributes as $name => $value) {
- if (!$this->isValidAttributeName($tag->tagName, $name)) {
- unset($tag->attributes[$name]);
- }
- if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
- unset($tag->attributes[$name]);
- }
- }
- }
-
- /**
- * Scrubs all forbidden attributes from an HTML modifier.
- *
- * @param MDTagModifier $modifier
- * @param ?string $tagName HTML tag name, if known, otherwise only
- * global attributes will be permitted
- */
- public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
- if (sizeof($modifier->cssClasses) > 0) {
- $classList = implode(' ', $modifier->cssClasses);
- if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
- $modifier->cssClasses = [];
- }
- }
- if ($modifier->cssId !== null) {
- if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
- $modifier->cssId = null;
- }
- }
- if (!$this->isValidAttributeName($tagName, 'style')) {
- $modifier->cssStyles = [];
- } else {
- foreach ($modifier->cssStyles as $key => $val) {
- if (!$this->isValidStyleValue($key, $val)) {
- unset($modifier->cssStyles[$key]);
- }
- }
- }
- foreach ($modifier->attributes as $key => $val) {
- if (!$this->isValidAttributeValue($tagName, $key, $val)) {
- unset($modifier->attributes[$key]);
- }
- }
- }
-
- /**
- * Tests if an HTML tag name is permitted.
- */
- public function isValidTagName(string $tagName): bool {
- return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
- }
-
- /**
- * Tests if an HTML attribute name is permitted.
- */
- public function isValidAttributeName(?string $tagName, string $attributeName): bool {
- $lcAttributeName = mb_strtolower($attributeName);
- if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
- return true;
- }
- foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
- if (!str_ends_with($pattern, '*')) continue;
- $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
- if (str_starts_with($lcAttributeName, $patternPrefix)) {
- return true;
- }
- }
- if ($tagName === null) return false;
- $lcTagName = mb_strtolower($tagName);
- $tagAttributes = $this->allowableTags[$lcTagName];
- if ($tagAttributes !== null) {
- return ($tagAttributes[$lcAttributeName] ?? null) !== null;
- }
- return false;
- }
-
- /**
- * Tests if an attribute value is allowable.
- */
- public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
- $lcAttributeName = mb_strtolower($attributeName);
- $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
- if ($globalPattern !== null) {
- return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
- }
- foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
- if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
- return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
- }
- }
- if ($tagName === null) return false;
- $lcTagName = mb_strtolower($tagName);
- $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
- if ($tagAttributes === null) return false;
- $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
- if ($valuePattern === null) return false;
- return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
- }
-
- private static string $permissiveURLRegex = '^\\S+$';
- private static string $integerRegex = '^[\\-]?\\d+$';
- private static string $classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
-
- private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
- $options = explode('|', $pattern);
- foreach ($options as $option) {
- switch ($option) {
- case '*':
- return true;
- case '{classlist}':
- if (mb_eregi(self::classListRegex, $value)) return true;
- break;
- case '{int}':
- if (mb_eregi(self::integerRegex, $value)) return true;
- break;
- case '{none}':
- if ($value === true) return true;
- break;
- case '{style}':
- if ($this->isValidStyleDeclaration($value)) return true;
- break;
- case '{url}':
- if (mb_eregi(self::permissiveURLRegex, $value)) return true;
- break;
- default:
- if ($value === $option) return true;
- break;
- }
- }
- return false;
- }
-
- /**
- * Tests if a string of one or more style `key: value;` declarations is
- * fully allowable.
- */
- public function isValidStyleDeclaration(string $styles): bool {
- $settings = explode(';', $styles);
- foreach ($settings as $setting) {
- if (mb_strlen(trim($setting)) == 0) continue;
- $parts = explode(':', $setting);
- if (sizeof($parts) != 2) return false;
- $name = trim($parts[0]);
- if (!$this->isValidStyleKey($name)) return false;
- $value = trim($parts[1]);
- if (!$this->isValidStyleValue($name, $value)) return false;
- }
- return true;
- }
-
- /**
- * Tests if a CSS style key is allowable.
- */
- public function isValidStyleKey(string $key): bool {
- return ($this->allowableStyleKeys[$key] ?? null) !== null;
- }
-
- /**
- * Tests if a CSS style value is allowable.
- */
- public function isValidStyleValue(string $key, string $value): bool {
- $pattern = $this->allowableStyleKeys[$key] ?? null;
- if ($pattern === null) return false;
- $options = explode('|', $pattern);
- foreach ($options as $option) {
- switch ($option) {
- case '{color}':
- if ($this->isValidCSSColor($value)) return true;
- default:
- if ($value === $option) return true;
- }
- }
- return false;
- }
-
- private static string $styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
-
- private function isValidCSSColor(string $value): bool {
- return mb_eregi(self::$styleColorRegex, $value);
- }
- }
-
- /**
- * Represents a single HTML tag. Paired tags are represented separately.
- */
- class MDHTMLTag {
- /**
- * Verbatim string of the original parsed tag. Not modified. Should be
- * considered unsafe for inclusion in the final document. Use `toString()`
- * instead.
- */
- public string $original;
- public string $tagName;
- public bool $isCloser;
- /**
- * Map of attribute names to value strings.
- */
- public array $attributes;
-
- /**
- * @param string $original
- * @param string $tagName
- * @param bool $isCloser
- * @param array $attributes
- */
- public function __construct(string $original, string $tagName, bool $isCloser,
- array $attributes) {
- $this->original = $original;
- $this->tagName = $tagName;
- $this->isCloser = $isCloser;
- $this->attributes = $attributes;
- }
-
- public function __toString(): string {
- if ($this->isCloser) {
- return "</{$this->tagName}>";
- }
- $html = '<';
- $html .= $this->tagName;
- foreach ($this->attributes as $key => $value) {
- $safeName = MDUtils::scrubAttributeName($key);
- if ($value === true) {
- $html .= " {$safeName}";
- } else {
- $escapedValue = MDUtils::escapeHTML("{$value}");
- $html .= " {$safeName}=\"{$escapedValue}\"";
- }
- }
- $html .= '>';
- return $html;
- }
-
- public function equals($other): bool {
- if (!($other instanceof MDHTMLTag)) return false;
- if ($other->tagName != $this->tagName) return false;
- if ($other->isCloser != $this->isCloser) return false;
- return MDUtils::equal($other->attributes, $this->attributes);
- }
-
- private static string $htmlTagNameFirstRegex = '[a-z]';
- private static string $htmlTagNameMedialRegex = '[a-z0-9]';
- private static string $htmlAttributeNameFirstRegex = '[a-z]';
- private static string $htmlAttributeNameMedialRegex = '[a-z0-9-]';
- private static string $whitespaceCharRegex = '\\s';
-
- /**
- * Checks the start of the given string for presence of an HTML tag.
- */
- public static function fromLineStart(string $line): ?MDHTMLTag {
- $expectOpenBracket = 0;
- $expectCloserOrName = 1;
- $expectName = 2;
- $expectAttributeNameOrEnd = 3;
- $expectEqualsOrAttributeOrEnd = 4;
- $expectAttributeValue = 5;
- $expectCloseBracket = 6;
-
- $isCloser = false;
- $tagName = '';
- $attributeName = '';
- $attributeValue = '';
- $attributeQuote = null;
- $attributes = [];
- $fullTag = null;
- $endAttribute = function(bool $unescape=false) use (&$attributes, &$attributeName, &$attributeValue, &$attributeQuote) {
- if (mb_strlen($attributeName) > 0) {
- if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
- $attributes[$attributeName] = $unescape ? html_entity_decode($attributeValue, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') : $attributeValue;
- } else {
- $attributes[$attributeName] = true;
- }
- }
- $attributeName = '';
- $attributeValue = '';
- $attributeQuote = null;
- };
-
- $expect = $expectOpenBracket;
- for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
- $ch = mb_substr($line, $p, 1);
- $isWhitespace = mb_eregi(self::$whitespaceCharRegex, $ch);
- switch ($expect) {
- case $expectOpenBracket:
- if ($ch != '<') return null;
- $expect = $expectCloserOrName;
- break;
- case $expectCloserOrName:
- if ($ch == '/') {
- $isCloser = true;
- } else {
- $p--;
- }
- $expect = $expectName;
- break;
- case $expectName:
- if (mb_strlen($tagName) == 0) {
- if (!mb_eregi(self::$htmlTagNameFirstRegex, $ch)) return null;
- $tagName .= $ch;
- } else {
- if (mb_eregi(self::$htmlTagNameMedialRegex, $ch)) {
- $tagName .= $ch;
- } else {
- $p--;
- $expect = ($isCloser) ? $expectCloseBracket : $expectAttributeNameOrEnd;
- }
- }
- break;
- case $expectAttributeNameOrEnd:
- if (mb_strlen($attributeName) == 0) {
- if ($isWhitespace) {
- // skip whitespace
- } elseif ($ch == '/') {
- $expect = $expectCloseBracket;
- } elseif ($ch == '>') {
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
- $attributeName .= $ch;
- } else {
- return null;
- }
- } elseif ($isWhitespace) {
- $expect = $expectEqualsOrAttributeOrEnd;
- } elseif ($ch == '/') {
- $endAttribute();
- $expect = $expectCloseBracket;
- } elseif ($ch == '>') {
- $endAttribute();
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- } elseif ($ch == '=') {
- $expect = $expectAttributeValue;
- } elseif (mb_eregi(self::$htmlAttributeNameMedialRegex, $ch)) {
- $attributeName .= $ch;
- } else {
- return null;
- }
- break;
- case $expectEqualsOrAttributeOrEnd:
- if ($ch == '=') {
- $expect = $expectAttributeValue;
- } elseif ($isWhitespace) {
- // skip whitespace
- } elseif ($ch == '/') {
- $expect = $expectCloseBracket;
- } elseif ($ch == '>') {
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
- $endAttribute();
- $expect = $expectAttributeNameOrEnd;
- $p--;
- }
- break;
- case $expectAttributeValue:
- if (mb_strlen($attributeValue) == 0) {
- if ($attributeQuote === null) {
- if ($isWhitespace) {
- // skip whitespace
- } elseif ($ch == '"' || $ch == "'") {
- $attributeQuote = $ch;
- } else {
- $attributeQuote = ''; // explicitly unquoted
- $p--;
- }
- } else {
- if ($ch === $attributeQuote) {
- // Empty string
- $endAttribute($attributeQuote != '');
- $expect = $expectAttributeNameOrEnd;
- } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
- return null;
- } else {
- $attributeValue .= $ch;
- }
- }
- } else {
- if ($ch === $attributeQuote) {
- $endAttribute($attributeQuote != '');
- $expect = $expectAttributeNameOrEnd;
- } elseif ($attributeQuote === '' && $isWhitespace) {
- $endAttribute();
- $expect = $expectAttributeNameOrEnd;
- } else {
- $attributeValue .= $ch;
- }
- }
- break;
- case $expectCloseBracket:
- if ($isWhitespace) {
- // ignore whitespace
- } elseif ($ch == '>') {
- $fullTag = mb_substr($line, 0, $p + 1);
- break;
- }
- break;
- }
- }
- if ($fullTag === null) return null;
- $endAttribute();
- return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
- }
- }
-
- /**
- * Represents HTML modifications to a node, such as CSS classes to add or
- * additional attributes. See `MDHTMLFilter.scrubModifier()` to remove disallowed
- * values.
- */
- class MDTagModifier {
- /**
- * Verbatim markdown syntax. Unmodified by changes to other properties.
- */
- public string $original;
- /** @var string[] */
- public array $cssClasses = [];
- public ?string $cssId = null;
- public array $cssStyles = [];
- public array $attributes = [];
-
- private static $baseClassRegex = '\\.([a-z_\\-][a-z0-9_\\-]*?)';
- private static $baseIdRegex = '#([a-z_\\-][a-z0-9_\\-]*?)';
- private static $baseAttributeRegex = '([a-z0-9]+?)=([^\\s\\}]+?)';
- private static $baseRegex = '\\{([^}]+?)}';
- private static $leadingClassRegex = '^\\{([^}]+?)}';
- private static $trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
- private static $classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
- private static $idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
- private static $attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
-
- public function applyTo(MDNode $node) {
- if ($node instanceof MDNode) {
- foreach ($this->cssClasses as $cssClass) {
- $node->addClass($cssClass);
- }
- if ($this->cssId) $node->cssId = $this->cssId;
- foreach ($this->attributes as $name => $value) {
- $node->attributes[$name] = $value;
- }
- foreach ($this->cssStyles as $name => $value) {
- $node->cssStyles[$name] = $value;
- }
- }
- }
-
- /**
- * Adds a CSS class. If already present it will not be duplicated.
- */
- public function addClass(string $cssClass): bool {
- if (array_search($cssClass, $this->cssClasses) !== false) return false;
- array_push($this->cssClasses, $cssClass);
- return true;
- }
-
- /**
- * Removes a CSS class.
- */
- public function removeClass(string $cssClass): bool {
- $beforeLength = sizeof($this->cssClasses);
- $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
- return sizeof($this->cssClasses) != beforeLength;
- }
-
- public function equals($other): bool {
- if (!($other instanceof MDTagModifier)) return false;
- if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
- if ($other->cssId !== $this->cssId) return false;
- if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
- return true;
- }
-
- public function __toString(): string {
- return $this->original;
- }
-
- private static function styleToObject(string $styleValue): array {
- $pairs = explode(';', $styleValue);
- $styles = [];
- foreach ($pairs as $pair) {
- $keyAndValue = explode(':', $pair);
- if (sizeof($keyAndValue) != 2) continue;
- $styles[$keyAndValue[0]] = $keyAndValue[1];
- }
- return $styles;
- }
-
- private static function fromContents(string $contents): ?MDTagModifier {
- $modifierTokens = mb_split('\\s+', $contents);
- $mod = new MDTagModifier();
- $mod->original = "{{$contents}}";
- foreach ($modifierTokens as $token) {
- if (trim($token) == '') continue;
- if (mb_eregi(self::$classRegex, $token, $groups)) {
- $mod->addClass($groups[1]);
- } elseif (mb_eregi(self::$idRegex, $token, $groups)) {
- $mod->cssId = $groups[1];
- } elseif (mb_eregi(self::$attributeRegex, $token, $groups)) {
- if ($groups[1] == 'style') {
- $mod->cssStyles = self::styleToObject($groups[2]);
- } else {
- $mod->attributes[$groups[1]] = $groups[2];
- }
- } else {
- return null;
- }
- }
- return $mod;
- }
-
- /**
- * Extracts block modifier from end of a line. Always returns a 2-element
- * tuple array:
- * - `0`: the line without the modifier
- * - `1`: an `MDTagModifier` if found or `null` if not
- *
- * @param string $line
- * @param ?MDState $state
- * @return array tuple with remaining line and `MDTagModifier` or `null`
- */
- public static function fromLine(string $line, ?MDState $state): array {
- if ($state) {
- $found = false;
- foreach ($state->root()->readersByBlockPriority as $reader) {
- if ($reader instanceof MDModifierReader) {
- $found = true;
- break;
- }
- }
- if (!$found) return [ $line, null ];
- }
- if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return [ $line, null ];
- $bareLine = $groups[1];
- $mod = self::fromContents($groups[2]);
- return [ $bareLine, $mod ];
- }
-
- /**
- * Attempts to extract modifier from head of string.
- */
- public static function fromStart(string $line): ?MDTagModifier {
- if (!mb_eregi(self::$leadingClassRegex, $line, $groups)) return null;
- return self::fromContents($groups[1]);
- }
-
- /**
- * Discards any modifiers from a line and returns what remains.
- */
- public static function strip(string $line): string {
- if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return $line;
- return $groups[1];
- }
- }
-
-
- // -- Readers ---------------------------------------------------------------
-
-
- /**
- * Base class for readers of various markdown syntax. A `Markdown` instance can
- * be created with any combination of subclasses of these to customize the
- * flavor of markdown parsed.
- *
- * @see {@link custom.md} for details on subclassing
- */
- class MDReader {
- /**
- * Called before processing begins. `state.lines` is populated and the
- * line pointer `state.p` will be at `0`.
- *
- * Default implementation does nothing.
- */
- public function preProcess(MDState $state) {}
-
- /**
- * Attempts to read an `MDBlockNode` subclass at the current line pointer
- * `state.p`. Only matches if the block pattern starts at the line pointer,
- * not elsewhere in the `state.lines` array. If a block is found, `state.p`
- * should be incremented to the next line _after_ the block structure and
- * a `MDBlockNode` subclass instance is returned. If no block is found,
- * returns `null`.
- *
- * Default implementation always returns `null`.
- */
- public function readBlock(MDState $state): ?MDBlockNode { return null; }
-
- /**
- * Attempts to read an inline token from the beginning of `line`. Only the
- * start of the given `line` is considered. If a matching token is found, an
- * `MDToken` is returned. Otherwise `null` is returned.
- *
- * Default implementation always returns `null`.
- */
- public function readToken(MDState $state, string $line): ?MDToken { return null; }
-
- /**
- * Attempts to find a pattern anywhere in `tokens` and perform a _single_
- * in-place substitution with one or more `MDNode` subclass instances.
- * If a substitution is performed, must return `true`, otherwise `false`.
- *
- * Default implementation always returns `false`.
- *
- * @param MDState $state
- * @param int $pass what substitution pass this is, starting with 1
- * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and `MDInlineNode` elements
- * @return bool `true` if a substitution was performed, `false` if not
- */
- public function substituteTokens(MDState $state, int $pass, array $tokens): bool { return false; }
-
- /**
- * Called after all parsing has completed. An array `blocks` is passed of
- * all the top-level `MDBlockNode` elements in the document which this
- * method can traverse or alter in-place via `.splice` operations if
- * necessary.
- *
- * `MDNode.visitChildren` is useful for recursively looking for certain
- * `MDNode` instances. `MDNode.replaceNodes` is useful for swapping in
- * replacements.
- *
- * Default implementation does nothing.
- *
- * @param MDState $state
- * @param MDBlockNode[] $blocks
- */
- public function postProcess(MDState $state, array $blocks) {}
-
- /**
- * Can be overridden to influence ordering of this reader with respect to
- * another during the block parsing phase. Return `-1` to be ordered before
- * the given reader, `1` to be ordered after it, or `0` for no preference.
- * Only return non-`0` values to resolve specific conflicts.
- *
- * Default implementation always returns `0` (no preference).
- *
- * @param MDReader $other
- * @return int a negative, positive, or 0 value to be ordered before,
- * after, or anwhere relative to `other`, respectively
- */
- public function compareBlockOrdering(MDReader $other): int {
- return 0;
- }
-
- /**
- * Can be overridden to influence ordering of this reader with respect to
- * another during the tokenizing phase. Return `-1` to be ordered before
- * the given reader, `1` to be ordered after it, or `0` for no preference.
- * Only return non-`0` values to resolve specific conflicts.
- *
- * Default implementation always returns `0` (no preference).
- *
- * @param MDReader $other
- * @return int a negative, positive, or 0 value to be ordered before,
- * after, or anwhere relative to `other`, respectively
- */
- public function compareTokenizeOrdering(MDReader $other): int {
- return 0;
- }
-
- /**
- * Can be overridden to influence ordering of this reader with respect to
- * another during the substitution phase. Return `-1` to be ordered before
- * the given reader, `1` to be ordered after it, or `0` for no preference.
- * Only return non-`0` values to resolve specific conflicts.
- *
- * Readers are sorted within each substitution pass. All pass 1 readers are
- * processed first, then all pass 2 readers, etc. The number of passes this
- * reader participates in is dictated by `substitionPassCount`.
- *
- * Default implementation always returns `0` (no preference).
- *
- * @param MDReader $other
- * @param int $pass substitution pass, with numbering starting at `1`
- * @return int a negative, positive, or 0 value to be ordered before,
- * after, or anwhere relative to `other`, respectively
- */
- public function compareSubstituteOrdering(MDReader $other, int $pass): int {
- return 0;
- }
-
- /**
- * How many substitution passes this reader requires. Substitution allows
- * all pass 1 readers to process first, then all pass 2 readers, etc.
- */
- public function substitutionPassCount(): int { return 1; }
-
- /**
- * For sorting readers with ordering preferences. The `compare` methods
- * don't have the properties of normal sorting compares so need to sort
- * differently.
- *
- * @param MDReader[] $arr array to sort
- * @param callable $compareFn comparison function, taking two array element
- * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
- * respectively
- * @param callable $idFn function for returning a unique hashable id for
- * the array element
- * @return MDReader[] sorted array
- */
- private static function kahnTopologicalSort(array $arr, callable $compareFn, callable $idFn): array {
- $graph = [];
- $inDegrees = [];
- $valuesById = [];
-
- // Build the graph and compute in-degrees
- foreach ($arr as $elem) {
- $id = $idFn($elem);
- $graph[$id] = [];
- $inDegrees[$id] = 0;
- $valuesById[$id] = $elem;
- }
-
- for ($i = 0; $i < sizeof($arr); $i++) {
- $elemA = $arr[$i];
- $idA = $idFn($elemA);
- for ($j = 0; $j < sizeof($arr); $j++) {
- if ($i === $j) continue;
- $elemB = $arr[$j];
- $idB = $idFn($elemB);
- $comparisonResult = $compareFn($elemA, $elemB);
- if ($comparisonResult < 0) {
- array_push($graph[$idA], push($idB));
- $inDegrees[$idB]++;
- } elseif ($comparisonResult > 0) {
- array_push($graph[$idB], $idA);
- $inDegrees[$idA]++;
- }
- }
- }
-
- // Initialize the queue with zero-inDegree nodes
- $queue = [];
- foreach ($inDegrees as $elemId) {
- if ($inDegrees[$elemId] === 0) {
- array_push($queue, $elemId);
- }
- }
-
- // Process the queue and build the topological order list
- $sorted = [];
- while (sizeof($queue) > 0) {
- $elemId = array_shift($queue);
- array_push($sorted, $valuesById[$elemId]);
- unset($valuesById[$elemId]);
-
- foreach ($graph[$elemId] as $neighbor) {
- $inDegrees[$neighbor]--;
- if ($inDegrees[$neighbor] === 0) {
- array_push($queue, $neighbor);
- }
- }
- }
- // Anything left over can go at the end. No ordering dependencies.
- foreach ($valuesById as $elemId => $value) {
- array_push($sorted, $value);
- }
-
- return $sorted;
- }
-
- /**
- * Returns a sorted array of readers by their block priority preferences.
- *
- * @param MDReader[] $readers
- * @return MDReader[] sorted readers
- */
- public static function sortReaderForBlocks(array &$readers) {
- $sorted = $readers;
- return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
- return $a->compareBlockOrdering($b);
- }, fn($elem) => get_class($elem));
- }
-
- /**
- * Returns a sorted array of readers by their tokenization priority preferences.
- *
- * @param MDReader[] $readers
- * @return MDReader[] sorted readers
- */
- public static function sortReadersForTokenizing(array $readers): array {
- $sorted = $readers;
- return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
- return $a->compareTokenizeOrdering($b);
- }, fn($elem) => get_class($elem));
- }
-
- /**
- * Returns a sorted array of tuples (arrays) containing the substitution
- * pass number and reader instance, sorted by their substitution priority
- * preferences.
- *
- * For readers with `substitutionPassCount` > `1`, the same reader will
- * appear multiple times in the resulting array, one per pass.
- *
- * @param MDReader[] $readers
- * @return MDReader[] sorted array of tuples with the pass number and
- * reader instance in each
- */
- public static function sortReadersForSubstitution(array $readers): array {
- $tuples = [];
- $maxPass = 1;
- foreach ($readers as $reader) {
- $passCount = $reader->substitutionPassCount();
- for ($pass = 1; $pass <= $passCount; $pass++) {
- array_push($tuples, [ $pass, $reader ]);
- }
- $maxPass = max($maxPass, $pass);
- }
- $result = [];
- for ($pass = 1; $pass <= $maxPass; $pass++) {
- $readersThisPass = array_filter(tuples, fn($tup) => $tup[0] == $pass);
- $passResult = self::kahnTopologicalSort($readersThisPass, function(MDReader $a, MDReader $b): int {
- $aReader = $a[1];
- $bReader = $b[1];
- return $aReader->compareSubstituteOrdering($bReader, $pass);
- }, fn($elem) => get_class($elem[1]));
- $result = array_merge($result, $passResult);
- }
- return $result;
- }
- }
-
- /**
- * Reads markdown blocks for headings denoted with the underline syntax.
- *
- * Supports `MDTagModifier` suffixes.
- */
- class MDUnderlinedHeadingReader extends MDReader {
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- if (!$state->hasLines(2)) return null;
- $modifier;
- $contentLine = trim($state->lines[$p++]);
- [$contentLine, $modifier] = MDTagModifier.fromLine(contentLine, state);
- $underLine = trim($state->lines[$p++]);
- if ($contentLine == '') return null;
- if (mb_eregi('^=+$', $underLine)) {
- $state->p = $p;
- $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
- if ($modifier) $modifier->applyTo($block);
- return $block;
- }
- if (mb_eregi('^\-+$', $underLine)) {
- $state->p = $p;
- $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
- if ($modifier) $modifier->applyTo($block);
- return $block;
- }
- return null;
- }
- }
-
- /**
- * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
- * to 6 are supported.
- *
- * Supports `MDTagModifier` suffixes.
- */
- class MDHashHeadingReader extends MDReader {
- private static $hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- $line = $state->lines[$p++];
- $modifier;
- [$line, $modifier] = MDTagModifier::fromLine($line, $state);
- if (!mb_eregi(self::hashHeadingRegex, $line, $groups)) return null;
- $state->p = $p;
- $level = mb_strlen($groups[1]);
- $content = $groups[2];
- $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
- if ($modifier) $modifier->applyTo($block);
- return $block;
- }
- }
-
- /**
- * Reads subtext blocks. Subtext is smaller, fainter text for things like
- * disclaimers or sources.
- *
- * Supports `MDTagModifier` suffixes.
- */
- class MDSubtextReader extends MDReader {
- private static $subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- $line = $state->lines[$p++];
- $modifier;
- [$line, $modifier] = MDTagModifier::fromLine($line, $state);
- if (!mb_eregi(self::subtextRegex, $line, $groups)) return null;
- $state->p = $p;
- $content = $groups[1];
- $block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
- if ($modifier) $modifier->applyTo($block);
- return $block;
- }
-
- public function compareBlockOrdering(MDReader $other): int {
- if ($other instanceof MDUnorderedListReader) {
- return -1;
- }
- return 0;
- }
- }
-
- /**
- * Reads markdown blocks for blockquoted text.
- */
- class MDBlockQuoteReader extends MDReader {
- public function readBlock(MDState $state): ?MDBlockNode {
- $blockquoteLines = [];
- $p = $state->p;
- while ($p < sizeof($state->lines)) {
- $line = $state->lines[$p++];
- if (str_starts_with($line, ">")) {
- array_push($blockquoteLines, $line);
- } else {
- break;
- }
- }
- if (sizeof($blockquoteLines) == 0) return null;
- $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '', mb_substr($line, 1)), $blockquoteLines);
- $substate = $state->copy($contentLines);
- $quotedBlocks = $substate->readBlocks();
- $state->p = $p;
- return new MDBlockquoteNode($quotedBlocks);
- }
- }
-
- /**
- * Internal abstract base class for ordered and unordered lists.
- */
- class _MDListReader extends MDReader {
- private static function readItemLines(MDState $state, int $firstLineStartPos): array {
- $p = $state->p;
- $lines = [];
- $seenBlankLine = false;
- $stripTrailingBlankLines = true;
- while ($state->hasLines(1, $p)) {
- $isFirstLine = ($p == $state->p);
- $line = $state->lines[$p++];
- if ($isFirstLine) {
- $line = mb_substr($line, $firstLineStartPos);
- }
- if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
- // Found next list item
- $stripTrailingBlankLines = false; // because this signals extra spacing intended
- break;
- }
- $isBlankLine = trim($line) == '';
- $isIndented = mb_eregi('^\\s+\\S', $line);
- if ($isBlankLine) {
- $seenBlankLine = true;
- } elseif (!$isIndented && $seenBlankLine) {
- // Post-list content
- break;
- }
- array_push($lines, $line);
- }
- $lines = MDUtils::withoutTrailingBlankLines($lines);
- return MDUtils::stripIndent($lines);
- }
-
- protected function readListItemContent(MDState $state, int $firstLineStartPos): MDBlockNode {
- $itemLines = $this->readItemLines($state, $firstLineStartPos);
- $state->p += max(sizeof($itemLines), 1);
-
- if (sizeof($itemLines) == 1) {
- return $state->inlineMarkdownToNode($itemLines[0]);
- }
-
- $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
- if ($hasBlankLines) {
- $substate = $state->copy($itemLines);
- $blocks = $substate->readBlocks();
- return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
- }
-
- // Multiline content with no blank lines. Search for new block
- // boundaries without the benefit of a blank line to demarcate it.
- for ($p = 1; $p < sizeof($itemLines); $p++) {
- $line = $itemLines[p];
- if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
- // Nested list found
- $firstBlock = $state->inlineMarkdownToNode(implode("\n", array_slice($itemLines, 0, $p)));
- $substate = $state->copy(array_slice($itemLines, $p));
- $blocks = $substate->readBlocks();
- return array_merge([ $firstBlock, $blocks ]);
- }
- }
-
- // Ok, give up and just do a standard block read
- {
- $substate = $state->copy($itemLines);
- $blocks = $substate->readBlocks();
- return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
- }
- }
-
- public function readBlock(MDState $state): ?MDBlockNode {
- throw new Error(`Abstract readBlock must be overridden in ${this.constructor.name}`);
- }
- }
-
- /**
- * Block reader for unordered (bulleted) lists.
- */
- class MDUnorderedListReader extends _MDListReader {
- private static string $unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content
-
- private function readUnorderedListItem(MDState $state): ?MDListItemNode {
- $p = $state->p;
- $line = $state->lines[$p];
- if (!mb_eregi(self::$unorderedListRegex, $line, $groups)) return null;
- $firstLineOffset = mb_strlen($groups[1]);
- return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
- }
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $items = [];
- $item = null;
- do {
- $item = $this->readUnorderedListItem($state);
- if ($item) array_push($items, $item);
- } while ($item);
- if (sizeof($items) == 0) return null;
- return new MDUnorderedListNode($items);
- }
- }
-
- /**
- * Block reader for ordered (numbered) lists. The number of the first item is
- * used to begin counting. The subsequent items increase by 1, regardless of
- * their value.
- */
- class MDOrderedListReader extends _MDListReader {
- private static string $orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content
-
- private function readOrderedListItem(MDState $state): ?MDListItemNode {
- $p = $state->p;
- $line = $state->lines[$p];
- if (!mb_eregi(self::$orderedListRegex, $line, $groups)) return null;
- $ordinal = intval($groups[1]);
- $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
- return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
- }
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $items = [];
- $item = null;
- do {
- $item = $this->readOrderedListItem($state);
- if ($item) array_push($items, $item);
- } while ($item);
- if (sizeof($items)) return null;
- return new MDOrderedListNode($items, $items[0]->ordinal);
- }
- }
-
- /**
- * Block reader for code blocks denoted by pairs of triple tickmarks. If
- * a programming language name, _xyz_, immediately follows the backticks, a
- * `language-xyz` CSS class will be added to the resulting `<code>`
- * element.
- *
- * Supports `MDTagModifier` suffix.
- */
- class MDFencedCodeBlockReader extends MDReader {
- public function readBlock(MDState $state): ?MDBlockNode {
- if (!$state->hasLines(2)) return null;
- $p = $state->p;
- $openFenceLine = $state->lines[$p++];
- [$openFenceLine, $modifier] = MDTagModifier->fromLine($openFenceLine, $state);
- if (!mb_eregi('```\s*([a-z0-9]*)\s*$', $openFenceLine, $groups)) return null;
- $language = mb_strlen($groups[1]) > 0 ? $groups[1] : null;
- $codeLines = [];
- while ($state->hasLines(1, $p)) {
- $line = $state->lines[$p++];
- if (trim($line) == '```') {
- $state->p = $p;
- $block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
- if ($modifier) $modifier->applyTo($block);
- return $block;
- }
- array_push($codeLines, $line);
- }
- return null;
- }
- }
-
- /**
- * Block reader for code blocks denoted by indenting text.
- */
- class MDIndentedCodeBlockReader extends MDReader {
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- $codeLines = [];
- while ($state->hasLines(1, $p)) {
- $line = $state->lines[$p++];
- if (MDUtils::countIndents($line, true) < 1) {
- $p--;
- break;
- }
- array_push($codeLines, MDUtils::stripIndent($line));
- }
- if (sizeof($codeLines) == 0) return null;
- $state->p = $p;
- return new MDCodeBlockNode(implode("\n", $codeLines));
- }
- }
-
- /**
- * Block reader for horizontal rules. Composed of three or more hypens or
- * asterisks on a line by themselves, with or without intermediate whitespace.
- */
- class MDHorizontalRuleReader extends MDReader {
- private static string $horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- $line = $state->lines[$p++];
- [$line, $modifier] = MDTagModifier::fromLine($line, $state);
- if (mb_eregi(self::horizontalRuleRegex, $line)) {
- $state->p = $p;
- $block = new MDHorizontalRuleNode();
- if ($modifier) $modifier->applyTo($block);
- return $block;
- }
- return null;
- }
-
- public function compareBlockOrdering(MDReader $other): int {
- if ($other instanceof MDUnorderedListReader) {
- return -1;
- }
- return 0;
- }
- }
-
- /**
- * Block reader for tables.
- *
- * Supports `MDTagModifier` suffix.
- */
- class MDTableReader extends MDReader {
- private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
- if (!$state->hasLines(1)) return null;
- $p = $state->p;
- $line = MDTagModifier::strip(trim($state->lines[$p++]));
- if (!mb_eregi('.*\\|.*', $line)) return null;
- if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
- if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
- $cellTokens = explode('|', $line);
- $cells = array_map(function($token) use ($isHeader) {
- $content = $state->inlineMarkdownToNode(trim($token));
- return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
- }, $cellTokens);
- $state->p = $p;
- return new MDTableRowNode($cells);
- }
-
- /**
- * @param string $line
- * @return string[]
- */
- private function parseColumnAlignments(string $line): array {
- $line = trim($line);
- if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
- if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
- return array_map(function($token) {
- if (str_starts_with($token, ':')) {
- if (str_ends_with($token, ':')) {
- return 'center';
- }
- return 'left';
- } elseif (str_ends_with($token, ':')) {
- return 'right';
- }
- return null;
- }, mb_split('\\s*\\|\\s*', $line));
- }
-
- private static string $tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
-
- public function readBlock(MDState $state): ?MDBlockNode {
- if (!$state->hasLines(2)) return null;
- $startP = $state->p;
- $firstLine = $state->lines[$startP];
- $modifier = MDTagModifier::fromLine($firstLine, $state)[1];
- $headerRow = $this->readTableRow($state, true);
- if ($headerRow === null) {
- $state->p = $startP;
- return null;
- }
- $dividerLine = $state->lines[$state->p++];
- if (!mb_eregi(self::$tableDividerRegex, $dividerLine, $dividerGroups)) {
- $state->p = $startP;
- return null;
- }
- $columnAlignments = $this->parseColumnAlignments($dividerLine);
- $bodyRows = [];
- while ($state->hasLines(1)) {
- $row = $this->readTableRow($state, false);
- if ($row === null) break;
- array_push($bodyRows, $row);
- }
- $table = new MDTableNode($headerRow, $bodyRows);
- $table->columnAlignments = $columnAlignments;
- if ($modifier) $modifier->applyTo($table);
- return $table;
- }
- }
-
- /**
- * Block reader for definition lists. Definitions go directly under terms starting
- * with a colon.
- */
- class MDDefinitionListReader extends MDReader {
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- $groups;
- $termCount = 0;
- $definitionCount = 0;
- $defLines = [];
- while ($state->hasLines(1, $p)) {
- $line = $state->lines[$p++];
- if (trim($line) === '') {
- break;
- }
- if (mb_eregi('^\\s+', $line)) {
- if (sizeof($defLines) == 0) return null;
- $defLines[sizeof($defLines) - 1] .= "\n" . $line;
- } elseif (mb_eregi('^:\\s+', $line)) {
- array_push($defLines, $line);
- $definitionCount++;
- } else {
- array_push($defLines, $line);
- $termCount++;
- }
- }
- if ($termCount == 0 || $definitionCount == 0) return null;
- $blocks = array_map(function($line) {
- if (mb_eregi('^:\\s+(.*?)$', $line)) {
- return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
- } else {
- return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
- }
- }, $defLines);
- $state->p = $p;
- return new MDDefinitionListNode($blocks);
- }
- }
-
- /**
- * Block reader for defining footnote contents. Footnotes can be defined anywhere
- * in the document but will always be rendered at the end of a page or end of
- * the document.
- */
- class MDFootnoteReader extends MDReader {
- private static string $footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
- private static string $footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
-
- /**
- * @param MDState $state
- * @param string $symbol
- * @param MDNode[] $footnote
- */
- private function defineFootnote(MDState $state, string $symbol, array $footnote) {
- $footnotes = $state->root()['footnotes'] ?? [];
- $footnotes[$symbol] = $footnote;
- $state->root()['footnotes'] = $footnotes;
- }
-
- private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
- $footnoteInstances = $state->root()['footnoteInstances'];
- $instances = $footnoteInstances[$symbol] ?? [];
- array_push($instances, $unique);
- $footnoteInstances[$symbol] = $instances;
- }
-
- private function idForFootnoteSymbol(MDState $state, string $symbol): int {
- $footnoteIds = $state->root()['footnoteIds'];
- $existing = $footnoteIds[$symbol];
- if ($existing) return $existing;
- $nextFootnoteId = $state->root()['nextFootnoteId'];
- $id = $nextFootnoteId++;
- $footnoteIds[$symbol] = $id;
- $state->root()['nextFootnoteId'] = $nextFootnoteId;
- return $id;
- }
-
- public function preProcess(MDState $state) {
- $state->root()['footnoteInstances'] = [];
- $state->root()['footnotes'] = [];
- $state->root()['footnoteIds'] = [];
- $state->root()['nextFootnoteId'] = 1;
- }
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
- $symbol = $groups[1];
- $def = $groups[2];
- while ($state->hasLines(1, $p)) {
- $line = $state->lines[$p++];
- if (mb_eregi('^\\s+', $line)) {
- $def += "\n" . $line;
- } else {
- $p--;
- break;
- }
- }
- $content = $state->inlineMarkdownToNodes($def);
- $this->defineFootnote($state, $symbol, $content);
- $state->p = $p;
- return new MDNode(); // empty
- }
-
- public function readToken(MDState $state, string $line): ?MDToken {
- $groups;
- if (mb_eregi(self::$footnoteWithTitleRegex, $line, $groups)) {
- return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
- }
- if (mb_eregi(MDFootnoteReader::footnoteRegex, $line, $groups)) {
- return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
- }
- return null;
- }
-
- public function substituteTokens(MDState $state, int $pass, array $tokens): bool {
- if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
- $symbol = $match->tokens[0]->content;
- array_splice($tokens, $match->index, 1, new MDFootnoteNode($symbol));
- return true;
- }
- return false;
- }
-
- /**
- * @param {MDState} state
- * @param {MDBlockNode[]} blocks
- */
- public function postProcess(MDState $state, array $blocks) {
- $nextOccurrenceId = 1;
- foreach ($blocks as $block) {
- $block->visitChildren(function($node) use (&$nextOccurrenceId) {
- if (!($node instanceof MDFootnoteNode)) return;
- $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
- $node->occurrenceId = $nextOccurrenceId++;
- $node->displaySymbol = strval($node->footnoteId);
- $this->$registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
- });
- }
- if (sizeof($state->footnotes) == 0) return;
- array_push($blocks, new MDFootnoteListNode());
- }
-
- public function compareBlockOrdering(MDReader $other): int {
- if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
- return -1;
- }
- return 0;
- }
-
- public function compareTokenizeOrdering(MDReader $other): int {
- if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
- return -1;
- }
- return 0;
- }
-
- public function compareSubstituteOrdering(MDReader $other, int $pass): int {
- if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
- return -1;
- }
- return 0;
- }
- }
-
- /**
- * Block reader for abbreviation definitions. Anywhere the abbreviation appears
- * in plain text will have its definition available when hovering over it.
- * Definitions can appear anywhere in the document. Their content should only
- * contain simple text, not markdown.
- */
- class MDAbbreviationReader extends MDReader {
- private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
- $state->root()->abbreviations[$abbreviation] = $definition;
- $regex = "\\b(" . preg_quote($abbreviation) . ")\\b";
- $state->root()->abbreviationRegexes[$abbreviation] = $regex;
- }
-
- public function preProcess(MDState $state) {
- $state->root()['abbreviations'] = [];
- $state->root()['abbreviationRegexes'] = [];
- }
-
- public function readBlock(MDState $state): ?MDBlockNode {
- $p = $state->p;
- $line = $state->lines[$p++];
- if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
- $abbrev = $groups[1];
- $def = $groups[2];
- $this->defineAbbreviation($state, $abbrev, $def);
- $state->p = $p;
- return new MDNode(); // empty
- }
-
- /**
- * @param MDState $state
- * @param MDNode[] $blocks
- */
- public function postProcess(MDState $state, array $blocks) {
- $abbreviations = $state->root()['abbreviations'];
- $regexes = $state->root()['abbreviationRegexes'];
- MDNode::replaceNodes($state, $blocks, function($original) {
- if (!($original instanceof MDTextNode)) return null;
- $changed = false;
- $elems = [ $original->text ]; // mix of strings and MDNodes
- for ($i = 0; $i < sizeof($elems); $i++) {
- $text = $elems[i];
- if (!is_string($text)) continue;
- foreach ($abbreviations as $abbreviation) {
- $index = strpos($text, $abbreviation);
- if ($index === false) break;
- $prefix = substr($text, 0, $index);
- $suffix = substr($text, $index + strlen($abbreviation));
- $definition = $abbreviations[$abbreviation];
- array_splice($elems, $i, 1, [ $prefix, new MDAbbreviationNode($abbreviation, $definition), $suffix ]);
- $i = -1; // start over
- $changed = true;
- break;
- }
- }
- if (!$changed) return null;
- $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem);
- return new MDNode($nodes);
- });
- }
- }
-
- /**
- * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
- * whitespace-only) line. This reader is prioritized after every other reader
- * since there is no distinguishing syntax.
- */
- class MDParagraphReader extends MDReader {
- public function readBlock(MDState $state): ?MDBlockNode {
- $paragraphLines = [];
- $p = $state->p;
- while ($state->hasLines(1, $p)) {
- $line = $state->lines[$p++];
- if (trim($line) === '') {
- break;
- }
- array_push($paragraphLines, $line);
- }
- if ($state->p == 0 && $p >= sizeof($state->lines)) {
- // If it's the entire document don't wrap it in a paragraph
- return null;
- }
- if (sizeof($paragraphLines) > 0) {
- $state->p = $p;
- $content = implode("\n", $paragraphLines);
- return new MDParagraphNode($state->inlineMarkdownToNodes($content));
- }
- return null;
- }
-
- public function compareBlockOrdering(MDReader $other): int {
- return 1; // always dead last
- }
- }
-
- class MDSimplePairInlineReader extends MDReader {}
-
- class MDEmphasisReader extends MDSimplePairInlineReader {}
-
- class MDStrongReader extends MDSimplePairInlineReader {}
-
- class MDStrikethroughReader extends MDSimplePairInlineReader {}
-
- class MDUnderlineReader extends MDSimplePairInlineReader {}
-
- class MDHighlightReader extends MDSimplePairInlineReader {}
-
- class MDCodeSpanReader extends MDSimplePairInlineReader {}
-
- class MDSubscriptReader extends MDSimplePairInlineReader {}
-
- class MDSuperscriptReader extends MDSimplePairInlineReader {}
-
- class MDLinkReader extends MDReader {}
-
- class MDReferencedLinkReader extends MDLinkReader {}
-
- class MDImageReader extends MDLinkReader {}
-
- class MDReferencedImageReader extends MDReferencedLinkReader {}
-
- class MDLineBreakReader extends MDReader {}
-
- class MDHTMLTagReader extends MDReader {}
-
- class MDModifierReader extends MDReader {}
-
-
- // -- Nodes -----------------------------------------------------------------
-
-
- class MDNode {}
-
- class MDBlockNode extends MDNode {}
-
- class MDParagraphNode extends MDBlockNode {}
-
- class MDHeadingNode extends MDBlockNode {}
-
- class MDSubtextNode extends MDBlockNode {}
-
- class MDHorizontalRuleNode extends MDBlockNode {}
-
- class MDBlockquoteNode extends MDBlockNode {}
-
- class MDUnorderedListNode extends MDBlockNode {}
-
- class MDOrderedListNode extends MDBlockNode {}
-
- class MDListItemNode extends MDBlockNode {}
-
- class MDCodeBlockNode extends MDBlockNode {}
-
- class MDTableNode extends MDBlockNode {}
-
- class MDTableRowNode extends MDBlockNode {}
-
- class MDTableCellNode extends MDBlockNode {}
-
- class MDTableHeaderCellNode extends MDBlockNode {}
-
- class MDDefinitionListNode extends MDBlockNode {}
-
- class MDDefinitionListTermNode extends MDBlockNode {}
-
- class MDDefinitionListDefinitionNode extends MDBlockNode {}
-
- class MDFootnoteListNode extends MDBlockNode {}
-
- class MDInlineNode extends MDNode {}
-
- class MDTextNode extends MDInlineNode {}
-
- class MDObfuscatedTextNode extends MDTextNode {}
-
- class MDEmphasisNode extends MDInlineNode {}
-
- class MDStrongNode extends MDInlineNode {}
-
- class MDStrikethroughNode extends MDInlineNode {}
-
- class MDUnderlineNode extends MDInlineNode {}
-
- class MDHighlightNode extends MDInlineNode {}
-
- class MDSuperscriptNode extends MDInlineNode {}
-
- class MDSubscriptNode extends MDInlineNode {}
-
- class MDCodeNode extends MDInlineNode {}
-
- class MDFootnoteNode extends MDInlineNode {}
-
- class MDLinkNode extends MDInlineNode {}
-
- class MDReferencedLinkNode extends MDLinkNode {}
-
- class MDImageNode extends MDInlineNode {}
-
- class MDReferencedImageNode extends MDImageNode {}
-
- class MDAbbreviationNode extends MDInlineNode {}
-
- class MDLineBreakNode extends MDInlineNode {}
-
- class MDHTMLTagNode extends MDInlineNode {}
-
-
- // -- Main class ------------------------------------------------------------
-
-
- class Markdown {}
- ?>
|