PHP and Javascript implementations of a simple markdown parser
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

markdown.php 121KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Static utilities.
  5. */
  6. class MDUtils {
  7. // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
  8. public static $baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
  9. // Modified from https://emailregex.com/ to remove capture groups.
  10. public static $baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
  11. /**
  12. * Encodes characters as HTML numeric entities to make it marginally more
  13. * difficult for web scrapers to grab sensitive info. If `text` starts with
  14. * `mailto:` only the email address following it will be obfuscated.
  15. */
  16. public static function escapeObfuscated(string $text): string {
  17. if (str_starts_with($text, 'mailto:')) {
  18. return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
  19. }
  20. $html = '';
  21. $l = mb_strlen($text);
  22. for ($p = 0; $p < $l; $p++) {
  23. $cp = mb_ord(mb_substr($text, $p, 1));
  24. $html .= "&#{{$cp}}";
  25. }
  26. return $html;
  27. }
  28. /**
  29. * Removes illegal characters from an HTML attribute name.
  30. */
  31. public static function scrubAttributeName(string $name): string {
  32. return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
  33. }
  34. /**
  35. * Strips one or more leading indents from a line or lines of markdown. An
  36. * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
  37. * spaces) are treated like one indent level.
  38. *
  39. * @param string|string[] $line
  40. * @param int $levels
  41. * @return string|string[]
  42. */
  43. public static function stripIndent(string|array &$line, int $levels=1): string|array {
  44. $regex = "^(?: {1,4}|\\t){{$levels}}";
  45. return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
  46. }
  47. /**
  48. * Counts the number of indent levels in a line of text. Partial indents
  49. * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly`
  50. * is `true`.
  51. */
  52. public static function countIndents(string &$line, bool $fullIndentsOnly=false): int {
  53. // normalize indents to tabs
  54. $t = mb_ereg_replace($fullIndentsOnly ? "(?: {4}|\\t)" : "(?: {1,4}|\\t)", "\t", $line);
  55. // remove content after indent
  56. $t = mb_ereg_replace("^(\\t*)(.*?)$", "\\1", $t);
  57. // count tabs
  58. return mb_strlen($t);
  59. }
  60. /**
  61. * Returns a copy of an array without any whitespace-only lines at the end.
  62. *
  63. * @param string[] $lines
  64. * @return string[]
  65. */
  66. public static function withoutTrailingBlankLines(array $lines): array {
  67. $stripped = $lines;
  68. while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
  69. array_pop($stripped);
  70. }
  71. return $stripped;
  72. }
  73. /**
  74. * Tests if an array of lines contains at least one blank. A blank line
  75. * can contain whitespace.
  76. *
  77. * @param string[] $lines
  78. */
  79. public static function containsBlankLine(array $lines): bool {
  80. foreach ($lines as $line) {
  81. if (mb_strlen(trim($line)) == 0) return true;
  82. }
  83. return false;
  84. }
  85. public static function typename($value): string {
  86. $tn = gettype($value);
  87. return ($tn === 'object') ? get_class($value) : $tn;
  88. }
  89. public static function equalAssocArrays(array &$a, array &$b) {
  90. return empty(array_diff_assoc($a, $b));
  91. }
  92. }
  93. /**
  94. * Token type enum for `MDToken`.
  95. */
  96. enum MDTokenType {
  97. case Text;
  98. /**
  99. * Only used for the leading and trailing whitespace around a run of text,
  100. * not every single whitespace character.
  101. */
  102. case Whitespace;
  103. case Underscore;
  104. case Asterisk;
  105. case Slash;
  106. case Tilde;
  107. case Bang;
  108. case Backtick;
  109. case Equal;
  110. case Caret;
  111. case Label; // content=label
  112. case URL; // content=URL, extra=title
  113. case Email; // content=email address, extra=title
  114. case SimpleLink; // content=URL
  115. case SimpleEmail; // content=email address
  116. case Footnote; // content=symbol
  117. case Modifier; // modifier=MDTagModifier
  118. case HTMLTag; // tag=MDHTMLTag
  119. /** Wildcard for `MDToken::findFirstTokens` */
  120. case META_AnyNonWhitespace;
  121. /** Wildcard for `MDToken::findFirstTokens` */
  122. case META_OptionalWhitespace;
  123. }
  124. /**
  125. * Search results from `MDToken.findFirstTokens`.
  126. */
  127. class MDTokenMatch {
  128. /** @var MDToken{} */
  129. public array $tokens;
  130. public int $index;
  131. /**
  132. * @param MDToken[] $tokens
  133. * @param int $index
  134. */
  135. public function __construct(array $tokens, int $index) {
  136. $this->tokens = $tokens;
  137. $this->index = $index;
  138. }
  139. }
  140. /**
  141. * Search results from `MDToken.findPairedTokens`.
  142. */
  143. class MDPairedTokenMatch {
  144. /** @var MDToken[] */
  145. public array $startTokens;
  146. /** @var MDToken[] */
  147. public array $contentTokens;
  148. /** @var MDToken[] */
  149. public array $endTokens;
  150. public int $startIndex;
  151. public int $contentIndex;
  152. public int $endIndex;
  153. public int $totalLength;
  154. public function __construct(array $startTokens, array $contentTokens,
  155. array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
  156. int $totalLength) {
  157. $this->startTokens = $startTokens;
  158. $this->contentTokens = $contentTokens;
  159. $this->endTokens = $endTokens;
  160. $this->startIndex = $startIndex;
  161. $this->contentIndex = $contentIndex;
  162. $this->endIndex = $endIndex;
  163. $this->totalLength = $totalLength;
  164. }
  165. }
  166. /**
  167. * One lexical unit in inline markdown syntax parsing.
  168. */
  169. class MDToken {
  170. /**
  171. * The original verbatim token string. Required as a plaintext fallback if
  172. * the token remains unresolved.
  173. */
  174. public string $original;
  175. public MDTokenType $type;
  176. public ?string $content = null;
  177. public ?string $extra = null;
  178. public ?MDHTMLTag $tag = null;
  179. public ?MDTagModifier $modifier = null;
  180. /**
  181. * Creates a token.
  182. *
  183. * @param string $original verbatim token string
  184. * @param MDTokenType $type token type
  185. * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
  186. * the token
  187. * @param string|null $extra additional content
  188. */
  189. public function __construct(string $original, MDTokenType $type,
  190. string|MDTagModifier|MDHTMLTag|null $content=null,
  191. ?string $extra=null) {
  192. $this->original = $original;
  193. $this->type = $type;
  194. if ($content instanceof MDTagModifier) {
  195. $this->modifier = $content;
  196. } elseif ($content instanceof MDHTMLTag) {
  197. $this->tag = $content;
  198. } else {
  199. $this->content = $content;
  200. }
  201. $this->extra = $extra;
  202. }
  203. public function __toString(): string {
  204. $classname = MDUtils::typename($this);
  205. return "<{$classname} type={$this->type->name} content=\"{$this->content}\">";
  206. }
  207. /**
  208. * Attempts to parse a label token from the beginning of `line`. A label is
  209. * of the form `[content]`. If found, returns an array:
  210. * - `0`: the entire label including brackets
  211. * - `1`: the content of the label
  212. *
  213. * @param string $line
  214. * @return ?string[] match groups or null if not found
  215. */
  216. public static function tokenizeLabel(string $line): ?array {
  217. if (!str_starts_with($line, '[')) return null;
  218. $parenCount = 0;
  219. $bracketCount = 0;
  220. $l = mb_strlen($line);
  221. for ($p = 1; $p < $l; $p++) {
  222. $ch = mb_substr($line, $p, 1);
  223. if ($ch == '\\') {
  224. $p++;
  225. } elseif ($ch == '(') {
  226. $parenCount++;
  227. } elseif ($ch == ')') {
  228. $parenCount--;
  229. if ($parenCount < 0) return null;
  230. } elseif ($ch == '[') {
  231. $bracketCount++;
  232. } elseif ($ch == ']') {
  233. if ($bracketCount > 0) {
  234. $bracketCount--;
  235. } else {
  236. return [ mb_substr($line, 0, $p + 1), mb_substr($line, 1, $p - 1) ];
  237. }
  238. }
  239. }
  240. return null;
  241. }
  242. private static $urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
  243. private static $urlRegex = '^\\((\\S+?)\\)'; // 1=URL
  244. /**
  245. * Attempts to parse a URL token from the beginning of `line`. A URL token
  246. * is of the form `(url)` or `(url "title")`. If found, returns an array:
  247. * - `0`: the entire URL token including parentheses
  248. * - `1`: the URL
  249. * - `2`: the optional title, or `null`
  250. *
  251. * @param string $line
  252. * @return ?array token tuple
  253. */
  254. public static function tokenizeURL(string $line): ?array {
  255. $groups = [];
  256. if (mb_eregi(self::$urlWithTitleRegex, $line, $groups)) {
  257. if (self::tokenizeEmail($line)) return null; // make sure it's not better described as an email address
  258. return $groups;
  259. }
  260. if (mb_eregi(self::$urlRegex, $line, $groups)) {
  261. if (self::tokenizeEmail($line)) return null;
  262. return [ $groups[0], $groups[1], null ];
  263. }
  264. return null;
  265. }
  266. /**
  267. * Attempts to parse an email address from the beginning of `line`. An
  268. * email address is of the form `(user@example.com)` or
  269. * `(user@example.com "link title")`. If found, returns an array:
  270. * - `0`: the entire token including parentheses
  271. * - `1`: the email address
  272. * - `2`: the optional link title, or `null`
  273. *
  274. * @param string $line
  275. * @return ?string[] token tuple
  276. */
  277. public static function tokenizeEmail(string $line): ?array {
  278. $groups;
  279. if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
  280. $line, $groups)) {
  281. return $groups;
  282. }
  283. if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s*\\)", $line, $groups)) {
  284. return [ $groups[0], $groups[1], null ];
  285. }
  286. return null;
  287. }
  288. /**
  289. * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
  290. * If found, returns a `MDTokenMatch`, otherwise `null`.
  291. *
  292. * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
  293. * are special supported token types. Note that `META_OptionalWhitespace`
  294. * may give a result with a variable number of tokens.
  295. *
  296. * @param (MDToken|MDNode)[] $tokensToSearch - mixed array of `MDToken` and
  297. * `MDNode` elements
  298. * @param MDTokenType[] $pattern - contiguous run of token types to find
  299. * @param int $startIndex - token index to begin searching (defaults to 0)
  300. * @return ?MDTokenMatch match object, or `null` if not found
  301. */
  302. public static function findFirstTokens(array $tokensToSearch, array $pattern, int $startIndex=0): ?MDTokenMatch {
  303. if (sizeof($pattern) == 0) {
  304. throw new Error("pattern empty");
  305. }
  306. $matched = [];
  307. for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
  308. $matchedAll = true;
  309. $matched = [];
  310. $patternOffset = 0;
  311. for ($p = 0; $p < sizeof($pattern); $p++) {
  312. $t0 = $t + $p + $patternOffset;
  313. if ($t0 >= sizeof($tokensToSearch)) return null;
  314. $token = $tokensToSearch[$t0];
  315. $elem = $pattern[$p];
  316. if ($elem == MDTokenType::META_OptionalWhitespace) {
  317. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  318. array_push($matched, $token);
  319. } else {
  320. $patternOffset--;
  321. }
  322. } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
  323. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  324. $matchedAll = false;
  325. break;
  326. }
  327. array_push($matched, $token);
  328. } else {
  329. if (!($token instanceof MDToken) || $token->type != $elem) {
  330. $matchedAll = false;
  331. break;
  332. }
  333. array_push($matched, $token);
  334. }
  335. }
  336. if ($matchedAll) {
  337. return new MDTokenMatch($matched, $t);
  338. }
  339. }
  340. return null;
  341. }
  342. /**
  343. * Searches an array of MDToken for a given starting pattern and ending
  344. * pattern and returns match info about both and the tokens in between.
  345. *
  346. * If `contentValidator` is specified, it will be called with the content
  347. * tokens of a potential match. If the validator returns `true`, the result
  348. * will be accepted and returned by this method. If the validator returns
  349. * `false`, this method will keep looking for another matching pair. If no
  350. * validator is given the first match will be returned regardless of content.
  351. *
  352. * If a match is found, a `MDPairedTokenMatch` is returned with details
  353. * of the opening tokens, closing tokens, and content tokens between. Otherwise
  354. * `null` is returned.
  355. *
  356. * @param MDToken[] $tokensToSearch - array of `MDToken` to search in
  357. * @param MDTokenType[] $startPattern - array of `MDTokenType` to find first
  358. * @param MDTokenType[] $endPattern - array of `MDTokenType` to find positioned after `startPattern`
  359. * @param ?callable $contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching
  360. * @param number $startIndex - token index where searching should begin
  361. * @return ?MDPairedTokenMatch match, or `null`
  362. */
  363. public static function findPairedTokens(array $tokensToSearch,
  364. array $startPattern, array $endPattern, ?callable $contentValidator=null,
  365. int $startIndex=0): ?MDPairedTokenMatch {
  366. for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
  367. $startMatch = self::findFirstTokens($tokensToSearch, $startPattern, $s);
  368. if ($startMatch === null) return null;
  369. $endStart = $startMatch->index + sizeof($startMatch->tokens);
  370. while ($endStart < sizeof($tokensToSearch)) {
  371. $endMatch = self::findFirstTokens($tokensToSearch, $endPattern, $endStart);
  372. if ($endMatch === null) break;
  373. $contentStart = $startMatch->index + sizeof($startMatch->tokens);
  374. $contentLength = $endMatch->index - $contentStart;
  375. $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
  376. if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
  377. return new MDPairedTokenMatch($startMatch->tokens,
  378. $contents,
  379. $endMatch->tokens,
  380. $startMatch->index,
  381. $startMatch->index + sizeof($startMatch->tokens),
  382. $endMatch->index,
  383. $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
  384. } else {
  385. // Contents rejected. Try next end match.
  386. $endStart = $endMatch->index + 1;
  387. }
  388. }
  389. // No end matches. Increment start match.
  390. $s = $startMatch->index;
  391. }
  392. return null;
  393. }
  394. public function equals($other) {
  395. if (!($other instanceof MDToken)) return false;
  396. if ($other->original !== $this->original) return false;
  397. if ($other->type != $this->type) return false;
  398. if ($other->content !== $this->content) return false;
  399. if ($other->extra !== $this->extra) return false;
  400. if ($other->tag !== $this->tag) return false;
  401. if ($other->modifier != $this->modifier) return false;
  402. return true;
  403. }
  404. }
  405. /**
  406. * Parsing and rendering state. Passed around throughout the parsing process.
  407. *
  408. * States are hierarchical. A sub-state can be created by calling `.copy()` with
  409. * a new array of lines. The sub-state points back to its parent state. This
  410. * is done to parse inner content of a syntax as its own standalone document.
  411. *
  412. * If a custom `MDReader` implementation wants to store data in this object,
  413. * always do so on `state.root` to ensure it's stored on the original state,
  414. * not a child state. Otherwise data may be lost when the sub-state is discarded.
  415. */
  416. class MDState {
  417. /**
  418. * Ascends the parent chain to the root `MDState` instance. This should be
  419. * used when referencing most stored fields except `lines` and `p`.
  420. */
  421. public function root(): MDState {
  422. return $this->parent ? $this->parent->root() : $this;
  423. }
  424. /**
  425. * Lines of the markdown document. The current line index is pointed to by `p`.
  426. *
  427. * @var string[]
  428. */
  429. public array $lines;
  430. /**
  431. * The current line in `lines`.
  432. */
  433. public function currentLine(): ?string {
  434. return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
  435. }
  436. /**
  437. * Current line pointer into array `lines`.
  438. */
  439. public int $p = 0;
  440. /**
  441. * General storage for anything readers need to track during the parsing
  442. * process.
  443. */
  444. public array $userInfo = [];
  445. private ?MDState $parent = null;
  446. /**
  447. * Array of `MDReader`s sorted by block reading priority.
  448. * @var MDReader[]
  449. */
  450. public array $readersByBlockPriority = [];
  451. /**
  452. * Array of `MDReader`s sorted by tokenization priority.
  453. * @var MDReader[]
  454. */
  455. public array $readersByTokenPriority = [];
  456. /**
  457. * Array of tuples of `pass:number` and `MDReader` sorted by substitution
  458. * priority.
  459. * @var array[]
  460. */
  461. public array $readersBySubstitutePriority = [];
  462. /**
  463. * Prefix to include in any generated `id` attributes on HTML elements.
  464. * Useful for keeping elements unique in multiple parsed documents in the
  465. * same HTML page.
  466. */
  467. public string $elementIdPrefix = '';
  468. /**
  469. * Filter for removing unapproved HTML tags, attributes, and values.
  470. */
  471. public MDHTMLFilter $tagFilter;
  472. /**
  473. * @param string[] $lines - lines of markdown text
  474. */
  475. public function __construct(array $lines) {
  476. $this->lines = $lines;
  477. $this->startTime = microtime(true);
  478. }
  479. /**
  480. * Creates a copy of this state with new lines. Useful for parsing nested
  481. * content.
  482. *
  483. * @param string[] $lines
  484. * @return MDState copied sub-state
  485. */
  486. public function copy(array $lines) {
  487. $cp = new MDState($lines);
  488. $cp->parent = $this;
  489. return $cp;
  490. }
  491. /**
  492. * Tests if there are at least `minCount` lines available to read. If `p`
  493. * is not provided it will be relative to `this.p`.
  494. */
  495. public function hasLines(int $minCount, ?int $p=null): bool {
  496. $relativeTo = ($p === null) ? $this->p : $p;
  497. return $relativeTo + $minCount <= sizeof($this->lines);
  498. }
  499. /**
  500. * Reads and returns an array of blocks from the current line pointer.
  501. *
  502. * @return MDBlockNode[] parsed blocks
  503. */
  504. public function readBlocks(): array {
  505. $blocks = [];
  506. while ($this->hasLines(1)) {
  507. $block = $this->readNextBlock();
  508. if ($block) {
  509. array_push($blocks, $block);
  510. } else {
  511. break;
  512. }
  513. }
  514. return $blocks;
  515. }
  516. /**
  517. * Creates a simple `MDBlockNode` if no other registered blocks match.
  518. */
  519. private function readFallbackBlock(): ?MDBlockNode {
  520. if ($this->p >= sizeof($this->lines)) return null;
  521. $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
  522. if (sizeof($lines) == 0) return null;
  523. $this->p = sizeof($this->lines);
  524. return new MDBlockNode($this->inlineMarkdownToNode(implode("\n", $lines)));
  525. }
  526. /**
  527. * Attempts to read one block from the current line pointer. The pointer
  528. * will be positioned just after the end of the block.
  529. */
  530. private function readNextBlock(): ?MDBlockNode {
  531. while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
  532. $this->p++;
  533. }
  534. if (!$this->hasLines(1)) return null;
  535. foreach ($this->root()->readersByBlockPriority as $reader) {
  536. $startP = $this->p;
  537. $block = $reader->readBlock($this);
  538. if ($block) {
  539. if ($this->p == $startP) {
  540. $readerClassName = MDUtils::typename($reader);
  541. $blockClassName = MDUtils::typename($block);
  542. throw new Error("{$readerClassName} returned an " .
  543. "{$blockClassName} without incrementing MDState.p. " .
  544. "This could lead to an infinite loop.");
  545. }
  546. return $block;
  547. }
  548. }
  549. $fallback = $this->readFallbackBlock();
  550. return $fallback;
  551. }
  552. /**
  553. * @param string $line
  554. * @return MDToken[]
  555. */
  556. private function inlineMarkdownToTokens(string $line): array {
  557. if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
  558. $tokens = [];
  559. $text = '';
  560. $expectLiteral = false;
  561. /**
  562. * Flushes accumulated content in `$text` to `$tokens`.
  563. */
  564. $endText = function() use (&$tokens, &$text) {
  565. if (mb_strlen($text) == 0) return;
  566. $textGroups = [];
  567. if (mb_eregi('^(\\s+)(.*?)$', $text, $textGroups)) {
  568. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
  569. $text = is_string($textGroups[2]) ? $textGroups[2] : '';
  570. }
  571. if (mb_eregi('^(.*?)(\\s+)$', $text, $textGroups)) {
  572. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Text, $textGroups[1]));
  573. array_push($tokens, new MDToken($textGroups[2], MDTokenType::Whitespace, $textGroups[2]));
  574. } else {
  575. array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
  576. }
  577. $text = '';
  578. };
  579. for ($p = 0; $p < mb_strlen($line); $p++) {
  580. $ch = mb_substr($line, $p, 1);
  581. $remainder = mb_substr($line, $p);
  582. if ($expectLiteral) {
  583. $text .= $ch;
  584. $expectLiteral = false;
  585. continue;
  586. }
  587. if ($ch == '\\') {
  588. $expectLiteral = true;
  589. continue;
  590. }
  591. $found = false;
  592. foreach ($this->root()->readersByTokenPriority as $reader) {
  593. $token = $reader->readToken($this, $remainder);
  594. if ($token === null) continue;
  595. $endText();
  596. array_push($tokens, $token);
  597. if ($token->original == null || mb_strlen($token->original) == 0) {
  598. $readerClassName = MDUtils::typename($reader);
  599. throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
  600. }
  601. $p += mb_strlen($token->original) - 1;
  602. $found = true;
  603. break;
  604. }
  605. if (!$found) {
  606. $text .= $ch;
  607. }
  608. }
  609. $endText();
  610. return $tokens;
  611. }
  612. /**
  613. * Converts a line of markdown to an `MDInlineNode`.
  614. *
  615. * @param string|string[] $line
  616. * @return MDInlineNode
  617. */
  618. public function inlineMarkdownToNode(string|array $line): MDInlineNode {
  619. $nodes = $this->inlineMarkdownToNodes($line);
  620. return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
  621. }
  622. /**
  623. * Converts a line of markdown to an array of `MDInlineNode`s.
  624. *
  625. * @param string|string[] $line
  626. * @return MDInlineNode[]
  627. */
  628. public function inlineMarkdownToNodes(string|array $line): array {
  629. $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
  630. return $this->tokensToNodes($tokens);
  631. }
  632. /**
  633. * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
  634. * of only `MDInlineNode` via repeated `MDReader` substition.
  635. *
  636. * @param (MDToken|MDInlineNode)[] $tokens
  637. * @return MDInlineNode[]
  638. */
  639. public function tokensToNodes(array $tokens): array {
  640. $nodes = $tokens;
  641. // Perform repeated substitutions, converting sequences of tokens into
  642. // nodes, until no more substitutions can be made.
  643. $anyChanges = false;
  644. do {
  645. $anyChanges = false;
  646. foreach ($this->root()->readersBySubstitutePriority as $readerTuple) {
  647. /** @var int */
  648. $pass = $readerTuple[0];
  649. /** @var MDReader */
  650. $reader = $readerTuple[1];
  651. $changed = $reader->substituteTokens($this, $pass, $nodes);
  652. if (!$changed) continue;
  653. $anyChanges = true;
  654. break;
  655. }
  656. } while ($anyChanges);
  657. // Convert any remaining tokens to text nodes. Also apply any inline
  658. // CSS modifiers.
  659. $lastNode = null;
  660. $me = $this;
  661. $nodes = array_map(function($node) use (&$lastNode, $me, $nodes) {
  662. if ($node instanceof MDToken) {
  663. /** @var MDToken */
  664. $token = $node;
  665. if ($token->type == MDTokenType::Modifier && $lastNode) {
  666. $me->root()->tagFilter->scrubModifier($token->modifier);
  667. $token->modifier->applyTo($lastNode);
  668. $lastNode = null;
  669. return new MDTextNode('');
  670. }
  671. $lastNode = null;
  672. return new MDTextNode($token->original);
  673. } elseif ($node instanceof MDNode) {
  674. $lastNode = ($node instanceof MDTextNode) ? null : $node;
  675. return $node;
  676. } else {
  677. $nodeClassName = MDUtils::typename($node);
  678. throw new Error("Unexpected node type {$nodeClassName}");
  679. }
  680. }, $nodes);
  681. return $nodes;
  682. }
  683. public $startTime;
  684. /**
  685. * Checks if parsing has taken an excessive length of time. Because I'm not
  686. * fully confident in my loops yet. :)
  687. */
  688. public function checkExecutionTime(float $maxSeconds=1.0) {
  689. $elapsed = microtime(true) - $this->root()->startTime;
  690. if ($elapsed > $maxSeconds) {
  691. throw new Error("Markdown parsing taking too long. Infinite loop?");
  692. }
  693. }
  694. /**
  695. * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
  696. * and `MDReferencedImageReader`.
  697. * @var array symbol -> URL
  698. */
  699. private array $referenceToURL = [];
  700. /**
  701. * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
  702. * and `MDReferencedImageReader`.
  703. * @var array symbol -> title string
  704. */
  705. private array $referenceToTitle = [];
  706. /**
  707. * Defines a URL by reference symbol.
  708. */
  709. public function defineURL(string $reference, string $url, ?string $title=null) {
  710. $this->root()->referenceToURL[mb_strtolower($reference)] = $url;
  711. if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
  712. }
  713. /**
  714. * Returns the URL associated with a reference symbol.
  715. */
  716. public function urlForReference(string $reference): ?string {
  717. return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
  718. }
  719. /**
  720. * Returns the link title associated with a reference symbol.
  721. */
  722. public function urlTitleForReference(string $reference): ?string {
  723. return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
  724. }
  725. }
  726. /**
  727. * Defines a set of allowable HTML tags, attributes, and CSS.
  728. */
  729. class MDHTMLFilter {
  730. /**
  731. * Mapping of permitted lowercase tag names to objects containing allowable
  732. * attributes for those tags. Does not need to include those attributes
  733. * defined in `allowableGlobalAttributes`.
  734. *
  735. * Values are objects with allowable lowercase attribute names mapped to
  736. * allowable value patterns. A `*` means any value is acceptable. Multiple
  737. * allowable values can be joined together with `|`. These special symbols
  738. * represent certain kinds of values and can be used in combination or in
  739. * place of literal values.
  740. *
  741. * - `{classlist}`: A list of legal CSS classnames, separated by spaces
  742. * - `{int}`: An integer
  743. * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
  744. * - `{style}`: One or more CSS declarations, separated by semicolons (simple
  745. * `key: value;` syntax only)
  746. * - `{url}`: A URL
  747. * @type {object}
  748. */
  749. public array $allowableTags = [
  750. 'address' => [
  751. 'cite' => '{url}',
  752. ],
  753. 'h1' => [],
  754. 'h2' => [],
  755. 'h3' => [],
  756. 'h4' => [],
  757. 'h5' => [],
  758. 'h6' => [],
  759. 'blockquote' => [],
  760. 'dl' => [],
  761. 'dt' => [],
  762. 'dd' => [],
  763. 'div' => [],
  764. 'hr' => [],
  765. 'ul' => [],
  766. 'ol' => [
  767. 'start' => '{int}',
  768. 'type' => 'a|A|i|I|1',
  769. ],
  770. 'li' => [
  771. 'value' => '{int}',
  772. ],
  773. 'p' => [],
  774. 'pre' => [],
  775. 'table' => [],
  776. 'thead' => [],
  777. 'tbody' => [],
  778. 'tfoot' => [],
  779. 'tr' => [],
  780. 'td' => [],
  781. 'th' => [],
  782. 'a' => [
  783. 'href' => '{url}',
  784. 'target' => '*',
  785. ],
  786. 'abbr' => [],
  787. 'b' => [],
  788. 'br' => [],
  789. 'cite' => [],
  790. 'code' => [],
  791. 'data' => [
  792. 'value' => '*',
  793. ],
  794. 'dfn' => [],
  795. 'em' => [],
  796. 'i' => [],
  797. 'kbd' => [],
  798. 'mark' => [],
  799. 'q' => [
  800. 'cite' => '{url}',
  801. ],
  802. 's' => [],
  803. 'samp' => [],
  804. 'small' => [],
  805. 'span' => [],
  806. 'strong' => [],
  807. 'sub' => [],
  808. 'sup' => [],
  809. 'time' => [
  810. 'datetime' => '*',
  811. ],
  812. 'u' => [],
  813. 'var' => [],
  814. 'wbr' => [],
  815. 'img' => [
  816. 'alt' => '*',
  817. 'href' => '{url}',
  818. ],
  819. 'figure' => [],
  820. 'figcaption' => [],
  821. 'del' => [],
  822. 'ins' => [],
  823. 'details' => [],
  824. 'summary' => [],
  825. ];
  826. /**
  827. * Mapping of allowable lowercase global attributes to their permitted
  828. * values. Uses same value pattern syntax as described in `allowableTags`.
  829. * @type {object}
  830. */
  831. public array $allowableGlobalAttributes = [
  832. 'class' => '{classlist}',
  833. 'data-*' => '*',
  834. 'dir' => 'ltr|rtl|auto',
  835. 'id' => '*',
  836. 'lang' => '*',
  837. 'style' => '{style}',
  838. 'title' => '*',
  839. 'translate' => 'yes|no|{none}',
  840. ];
  841. /**
  842. * Mapping of allowable CSS style names to their allowable value patterns.
  843. * Multiple values can be delimited with `|` characters. Limited support
  844. * so far.
  845. *
  846. * Recognized special values:
  847. * - `{color}`: A hex or named color
  848. *
  849. * @type {object}
  850. */
  851. public array $allowableStyleKeys = [
  852. 'background-color' => '{color}',
  853. 'color' => '{color}',
  854. ];
  855. /**
  856. * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
  857. * itself has already been whitelisted.
  858. *
  859. * @param {MDHTMLTag} tag - HTML tag
  860. */
  861. public function scrubTag(MDHTMLTag $tag) {
  862. foreach ($tag->attributes as $name => $value) {
  863. if (!$this->isValidAttributeName($tag->tagName, $name)) {
  864. unset($tag->attributes[$name]);
  865. }
  866. if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
  867. unset($tag->attributes[$name]);
  868. }
  869. }
  870. }
  871. /**
  872. * Scrubs all forbidden attributes from an HTML modifier.
  873. *
  874. * @param MDTagModifier $modifier
  875. * @param ?string $tagName HTML tag name, if known, otherwise only
  876. * global attributes will be permitted
  877. */
  878. public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
  879. if (sizeof($modifier->cssClasses) > 0) {
  880. $classList = implode(' ', $modifier->cssClasses);
  881. if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
  882. $modifier->cssClasses = [];
  883. }
  884. }
  885. if ($modifier->cssId !== null) {
  886. if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
  887. $modifier->cssId = null;
  888. }
  889. }
  890. if (!$this->isValidAttributeName($tagName, 'style')) {
  891. $modifier->cssStyles = [];
  892. } else {
  893. foreach ($modifier->cssStyles as $key => $val) {
  894. if (!$this->isValidStyleValue($key, $val)) {
  895. unset($modifier->cssStyles[$key]);
  896. }
  897. }
  898. }
  899. foreach ($modifier->attributes as $key => $val) {
  900. if (!$this->isValidAttributeValue($tagName, $key, $val)) {
  901. unset($modifier->attributes[$key]);
  902. }
  903. }
  904. }
  905. /**
  906. * Tests if an HTML tag name is permitted.
  907. */
  908. public function isValidTagName(string $tagName): bool {
  909. return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
  910. }
  911. /**
  912. * Tests if an HTML attribute name is permitted.
  913. */
  914. public function isValidAttributeName(?string $tagName, string $attributeName): bool {
  915. $lcAttributeName = mb_strtolower($attributeName);
  916. if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
  917. return true;
  918. }
  919. foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
  920. if (!str_ends_with($pattern, '*')) continue;
  921. $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
  922. if (str_starts_with($lcAttributeName, $patternPrefix)) {
  923. return true;
  924. }
  925. }
  926. if ($tagName === null) return false;
  927. $lcTagName = mb_strtolower($tagName);
  928. $tagAttributes = $this->allowableTags[$lcTagName];
  929. if ($tagAttributes !== null) {
  930. return ($tagAttributes[$lcAttributeName] ?? null) !== null;
  931. }
  932. return false;
  933. }
  934. /**
  935. * Tests if an attribute value is allowable.
  936. */
  937. public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
  938. $lcAttributeName = mb_strtolower($attributeName);
  939. $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
  940. if ($globalPattern !== null) {
  941. return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
  942. }
  943. foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
  944. if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
  945. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  946. }
  947. }
  948. if ($tagName === null) return false;
  949. $lcTagName = mb_strtolower($tagName);
  950. $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
  951. if ($tagAttributes === null) return false;
  952. $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
  953. if ($valuePattern === null) return false;
  954. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  955. }
  956. private static string $permissiveURLRegex = '^\\S+$';
  957. private static string $integerRegex = '^[\\-]?\\d+$';
  958. private static string $classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
  959. private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
  960. $options = explode('|', $pattern);
  961. foreach ($options as $option) {
  962. switch ($option) {
  963. case '*':
  964. return true;
  965. case '{classlist}':
  966. if (mb_eregi(self::$classListRegex, $value)) return true;
  967. break;
  968. case '{int}':
  969. if (mb_eregi(self::$integerRegex, $value)) return true;
  970. break;
  971. case '{none}':
  972. if ($value === true) return true;
  973. break;
  974. case '{style}':
  975. if ($this->isValidStyleDeclaration($value)) return true;
  976. break;
  977. case '{url}':
  978. if (mb_eregi(self::$permissiveURLRegex, $value)) return true;
  979. break;
  980. default:
  981. if ($value === $option) return true;
  982. break;
  983. }
  984. }
  985. return false;
  986. }
  987. /**
  988. * Tests if a string of one or more style `key: value;` declarations is
  989. * fully allowable.
  990. */
  991. public function isValidStyleDeclaration(string $styles): bool {
  992. $settings = explode(';', $styles);
  993. foreach ($settings as $setting) {
  994. if (mb_strlen(trim($setting)) == 0) continue;
  995. $parts = explode(':', $setting);
  996. if (sizeof($parts) != 2) return false;
  997. $name = trim($parts[0]);
  998. if (!$this->isValidStyleKey($name)) return false;
  999. $value = trim($parts[1]);
  1000. if (!$this->isValidStyleValue($name, $value)) return false;
  1001. }
  1002. return true;
  1003. }
  1004. /**
  1005. * Tests if a CSS style key is allowable.
  1006. */
  1007. public function isValidStyleKey(string $key): bool {
  1008. return ($this->allowableStyleKeys[$key] ?? null) !== null;
  1009. }
  1010. /**
  1011. * Tests if a CSS style value is allowable.
  1012. */
  1013. public function isValidStyleValue(string $key, string $value): bool {
  1014. $pattern = $this->allowableStyleKeys[$key] ?? null;
  1015. if ($pattern === null) return false;
  1016. $options = explode('|', $pattern);
  1017. foreach ($options as $option) {
  1018. switch ($option) {
  1019. case '{color}':
  1020. if ($this->isValidCSSColor($value)) return true;
  1021. default:
  1022. if ($value === $option) return true;
  1023. }
  1024. }
  1025. return false;
  1026. }
  1027. private static string $styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
  1028. private function isValidCSSColor(string $value): bool {
  1029. return mb_eregi(self::$styleColorRegex, $value);
  1030. }
  1031. }
  1032. /**
  1033. * Represents a single HTML tag. Paired tags are represented separately.
  1034. */
  1035. class MDHTMLTag {
  1036. /**
  1037. * Verbatim string of the original parsed tag. Not modified. Should be
  1038. * considered unsafe for inclusion in the final document. Use `toString()`
  1039. * instead.
  1040. */
  1041. public string $original;
  1042. public string $tagName;
  1043. public bool $isCloser;
  1044. /**
  1045. * Map of attribute names to value strings.
  1046. */
  1047. public array $attributes;
  1048. /**
  1049. * @param string $original
  1050. * @param string $tagName
  1051. * @param bool $isCloser
  1052. * @param array $attributes
  1053. */
  1054. public function __construct(string $original, string $tagName, bool $isCloser,
  1055. array $attributes) {
  1056. $this->original = $original;
  1057. $this->tagName = $tagName;
  1058. $this->isCloser = $isCloser;
  1059. $this->attributes = $attributes;
  1060. }
  1061. public function __toString(): string {
  1062. if ($this->isCloser) {
  1063. return "</{$this->tagName}>";
  1064. }
  1065. $html = '<';
  1066. $html .= $this->tagName;
  1067. foreach ($this->attributes as $key => $value) {
  1068. $safeName = MDUtils::scrubAttributeName($key);
  1069. if ($value === true) {
  1070. $html .= " {$safeName}";
  1071. } else {
  1072. $escapedValue = htmlentities("{$value}");
  1073. $html .= " {$safeName}=\"{$escapedValue}\"";
  1074. }
  1075. }
  1076. $html .= '>';
  1077. return $html;
  1078. }
  1079. public function equals($other): bool {
  1080. if (!($other instanceof MDHTMLTag)) return false;
  1081. if ($other->tagName != $this->tagName) return false;
  1082. if ($other->isCloser != $this->isCloser) return false;
  1083. return MDUtils::equal($other->attributes, $this->attributes);
  1084. }
  1085. private static string $htmlTagNameFirstRegex = '[a-z]';
  1086. private static string $htmlTagNameMedialRegex = '[a-z0-9]';
  1087. private static string $htmlAttributeNameFirstRegex = '[a-z]';
  1088. private static string $htmlAttributeNameMedialRegex = '[a-z0-9-]';
  1089. private static string $whitespaceCharRegex = '\\s';
  1090. /**
  1091. * Checks the start of the given string for presence of an HTML tag.
  1092. */
  1093. public static function fromLineStart(string $line): ?MDHTMLTag {
  1094. $expectOpenBracket = 0;
  1095. $expectCloserOrName = 1;
  1096. $expectName = 2;
  1097. $expectAttributeNameOrEnd = 3;
  1098. $expectEqualsOrAttributeOrEnd = 4;
  1099. $expectAttributeValue = 5;
  1100. $expectCloseBracket = 6;
  1101. $isCloser = false;
  1102. $tagName = '';
  1103. $attributeName = '';
  1104. $attributeValue = '';
  1105. $attributeQuote = null;
  1106. $attributes = [];
  1107. $fullTag = null;
  1108. $endAttribute = function(bool $unescape=false) use (&$attributes, &$attributeName, &$attributeValue, &$attributeQuote) {
  1109. if (mb_strlen($attributeName) > 0) {
  1110. if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
  1111. $attributes[$attributeName] = $unescape ? html_entity_decode($attributeValue, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') : $attributeValue;
  1112. } else {
  1113. $attributes[$attributeName] = true;
  1114. }
  1115. }
  1116. $attributeName = '';
  1117. $attributeValue = '';
  1118. $attributeQuote = null;
  1119. };
  1120. $expect = $expectOpenBracket;
  1121. for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
  1122. $ch = mb_substr($line, $p, 1);
  1123. $isWhitespace = mb_eregi(self::$whitespaceCharRegex, $ch);
  1124. switch ($expect) {
  1125. case $expectOpenBracket:
  1126. if ($ch != '<') return null;
  1127. $expect = $expectCloserOrName;
  1128. break;
  1129. case $expectCloserOrName:
  1130. if ($ch == '/') {
  1131. $isCloser = true;
  1132. } else {
  1133. $p--;
  1134. }
  1135. $expect = $expectName;
  1136. break;
  1137. case $expectName:
  1138. if (mb_strlen($tagName) == 0) {
  1139. if (!mb_eregi(self::$htmlTagNameFirstRegex, $ch)) return null;
  1140. $tagName .= $ch;
  1141. } else {
  1142. if (mb_eregi(self::$htmlTagNameMedialRegex, $ch)) {
  1143. $tagName .= $ch;
  1144. } else {
  1145. $p--;
  1146. $expect = ($isCloser) ? $expectCloseBracket : $expectAttributeNameOrEnd;
  1147. }
  1148. }
  1149. break;
  1150. case $expectAttributeNameOrEnd:
  1151. if (mb_strlen($attributeName) == 0) {
  1152. if ($isWhitespace) {
  1153. // skip whitespace
  1154. } elseif ($ch == '/') {
  1155. $expect = $expectCloseBracket;
  1156. } elseif ($ch == '>') {
  1157. $fullTag = mb_substr($line, 0, $p + 1);
  1158. break;
  1159. } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
  1160. $attributeName .= $ch;
  1161. } else {
  1162. return null;
  1163. }
  1164. } elseif ($isWhitespace) {
  1165. $expect = $expectEqualsOrAttributeOrEnd;
  1166. } elseif ($ch == '/') {
  1167. $endAttribute();
  1168. $expect = $expectCloseBracket;
  1169. } elseif ($ch == '>') {
  1170. $endAttribute();
  1171. $fullTag = mb_substr($line, 0, $p + 1);
  1172. break;
  1173. } elseif ($ch == '=') {
  1174. $expect = $expectAttributeValue;
  1175. } elseif (mb_eregi(self::$htmlAttributeNameMedialRegex, $ch)) {
  1176. $attributeName .= $ch;
  1177. } else {
  1178. return null;
  1179. }
  1180. break;
  1181. case $expectEqualsOrAttributeOrEnd:
  1182. if ($ch == '=') {
  1183. $expect = $expectAttributeValue;
  1184. } elseif ($isWhitespace) {
  1185. // skip whitespace
  1186. } elseif ($ch == '/') {
  1187. $expect = $expectCloseBracket;
  1188. } elseif ($ch == '>') {
  1189. $fullTag = mb_substr($line, 0, $p + 1);
  1190. break;
  1191. } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
  1192. $endAttribute();
  1193. $expect = $expectAttributeNameOrEnd;
  1194. $p--;
  1195. }
  1196. break;
  1197. case $expectAttributeValue:
  1198. if (mb_strlen($attributeValue) == 0) {
  1199. if ($attributeQuote === null) {
  1200. if ($isWhitespace) {
  1201. // skip whitespace
  1202. } elseif ($ch == '"' || $ch == "'") {
  1203. $attributeQuote = $ch;
  1204. } else {
  1205. $attributeQuote = ''; // explicitly unquoted
  1206. $p--;
  1207. }
  1208. } else {
  1209. if ($ch === $attributeQuote) {
  1210. // Empty string
  1211. $endAttribute($attributeQuote != '');
  1212. $expect = $expectAttributeNameOrEnd;
  1213. } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
  1214. return null;
  1215. } else {
  1216. $attributeValue .= $ch;
  1217. }
  1218. }
  1219. } else {
  1220. if ($ch === $attributeQuote) {
  1221. $endAttribute($attributeQuote != '');
  1222. $expect = $expectAttributeNameOrEnd;
  1223. } elseif ($attributeQuote === '' && $isWhitespace) {
  1224. $endAttribute();
  1225. $expect = $expectAttributeNameOrEnd;
  1226. } else {
  1227. $attributeValue .= $ch;
  1228. }
  1229. }
  1230. break;
  1231. case $expectCloseBracket:
  1232. if ($isWhitespace) {
  1233. // ignore whitespace
  1234. } elseif ($ch == '>') {
  1235. $fullTag = mb_substr($line, 0, $p + 1);
  1236. break;
  1237. }
  1238. break;
  1239. }
  1240. }
  1241. if ($fullTag === null) return null;
  1242. $endAttribute();
  1243. return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
  1244. }
  1245. }
  1246. /**
  1247. * Represents HTML modifications to a node, such as CSS classes to add or
  1248. * additional attributes. See `MDHTMLFilter.scrubModifier()` to remove disallowed
  1249. * values.
  1250. */
  1251. class MDTagModifier {
  1252. /**
  1253. * Verbatim markdown syntax. Unmodified by changes to other properties.
  1254. */
  1255. public string $original;
  1256. /** @var string[] */
  1257. public array $cssClasses = [];
  1258. public ?string $cssId = null;
  1259. public array $cssStyles = [];
  1260. public array $attributes = [];
  1261. private static $baseClassRegex = '\\.([a-z_\\-][a-z0-9_\\-]*?)';
  1262. private static $baseIdRegex = '#([a-z_\\-][a-z0-9_\\-]*?)';
  1263. private static $baseAttributeRegex = '([a-z0-9]+?)=([^\\s\\}]+?)';
  1264. private static $baseRegex = '\\{([^}]+?)}';
  1265. private static $leadingClassRegex = '^\\{([^}]+?)}';
  1266. private static $trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
  1267. private static $classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
  1268. private static $idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
  1269. private static $attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
  1270. public function applyTo(MDNode $node) {
  1271. if ($node instanceof MDNode) {
  1272. foreach ($this->cssClasses as $cssClass) {
  1273. $node->addClass($cssClass);
  1274. }
  1275. if ($this->cssId) $node->cssId = $this->cssId;
  1276. foreach ($this->attributes as $name => $value) {
  1277. $node->attributes[$name] = $value;
  1278. }
  1279. foreach ($this->cssStyles as $name => $value) {
  1280. $node->cssStyles[$name] = $value;
  1281. }
  1282. }
  1283. }
  1284. /**
  1285. * Adds a CSS class. If already present it will not be duplicated.
  1286. */
  1287. public function addClass(string $cssClass): bool {
  1288. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  1289. array_push($this->cssClasses, $cssClass);
  1290. return true;
  1291. }
  1292. /**
  1293. * Removes a CSS class.
  1294. */
  1295. public function removeClass(string $cssClass): bool {
  1296. $beforeLength = sizeof($this->cssClasses);
  1297. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  1298. return sizeof($this->cssClasses) != beforeLength;
  1299. }
  1300. public function equals($other): bool {
  1301. if (!($other instanceof MDTagModifier)) return false;
  1302. if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
  1303. if ($other->cssId !== $this->cssId) return false;
  1304. if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
  1305. return true;
  1306. }
  1307. public function __toString(): string {
  1308. return $this->original;
  1309. }
  1310. private static function styleToObject(string $styleValue): array {
  1311. $pairs = explode(';', $styleValue);
  1312. $styles = [];
  1313. foreach ($pairs as $pair) {
  1314. $keyAndValue = explode(':', $pair);
  1315. if (sizeof($keyAndValue) != 2) continue;
  1316. $styles[$keyAndValue[0]] = $keyAndValue[1];
  1317. }
  1318. return $styles;
  1319. }
  1320. private static function fromContents(string $contents): ?MDTagModifier {
  1321. $modifierTokens = mb_split('\\s+', $contents);
  1322. $mod = new MDTagModifier();
  1323. $mod->original = "{{$contents}}";
  1324. foreach ($modifierTokens as $token) {
  1325. if (trim($token) == '') continue;
  1326. if (mb_eregi(self::$classRegex, $token, $groups)) {
  1327. $mod->addClass($groups[1]);
  1328. } elseif (mb_eregi(self::$idRegex, $token, $groups)) {
  1329. $mod->cssId = $groups[1];
  1330. } elseif (mb_eregi(self::$attributeRegex, $token, $groups)) {
  1331. if ($groups[1] == 'style') {
  1332. $mod->cssStyles = self::styleToObject($groups[2]);
  1333. } else {
  1334. $mod->attributes[$groups[1]] = $groups[2];
  1335. }
  1336. } else {
  1337. return null;
  1338. }
  1339. }
  1340. return $mod;
  1341. }
  1342. /**
  1343. * Extracts block modifier from end of a line. Always returns a 2-element
  1344. * tuple array:
  1345. * - `0`: the line without the modifier
  1346. * - `1`: an `MDTagModifier` if found or `null` if not
  1347. *
  1348. * @param string $line
  1349. * @param ?MDState $state
  1350. * @return array tuple with remaining line and `MDTagModifier` or `null`
  1351. */
  1352. public static function fromLine(string $line, ?MDState $state): array {
  1353. if ($state) {
  1354. $found = false;
  1355. foreach ($state->root()->readersByBlockPriority as $reader) {
  1356. if ($reader instanceof MDModifierReader) {
  1357. $found = true;
  1358. break;
  1359. }
  1360. }
  1361. if (!$found) return [ $line, null ];
  1362. }
  1363. if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return [ $line, null ];
  1364. $bareLine = $groups[1];
  1365. $mod = self::fromContents($groups[2]);
  1366. return [ $bareLine, $mod ];
  1367. }
  1368. /**
  1369. * Attempts to extract modifier from head of string.
  1370. */
  1371. public static function fromStart(string $line): ?MDTagModifier {
  1372. if (!mb_eregi(self::$leadingClassRegex, $line, $groups)) return null;
  1373. return self::fromContents($groups[1]);
  1374. }
  1375. /**
  1376. * Discards any modifiers from a line and returns what remains.
  1377. */
  1378. public static function strip(string $line): string {
  1379. if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return $line;
  1380. return $groups[1];
  1381. }
  1382. }
  1383. // -- Readers ---------------------------------------------------------------
  1384. /**
  1385. * Base class for readers of various markdown syntax. A `Markdown` instance can
  1386. * be created with any combination of subclasses of these to customize the
  1387. * flavor of markdown parsed.
  1388. *
  1389. * @see {@link custom.md} for details on subclassing
  1390. */
  1391. class MDReader {
  1392. /**
  1393. * Called before processing begins. `state.lines` is populated and the
  1394. * line pointer `state.p` will be at `0`.
  1395. *
  1396. * Default implementation does nothing.
  1397. */
  1398. public function preProcess(MDState $state) {}
  1399. /**
  1400. * Attempts to read an `MDBlockNode` subclass at the current line pointer
  1401. * `state.p`. Only matches if the block pattern starts at the line pointer,
  1402. * not elsewhere in the `state.lines` array. If a block is found, `state.p`
  1403. * should be incremented to the next line _after_ the block structure and
  1404. * a `MDBlockNode` subclass instance is returned. If no block is found,
  1405. * returns `null`.
  1406. *
  1407. * Default implementation always returns `null`.
  1408. */
  1409. public function readBlock(MDState $state): ?MDBlockNode { return null; }
  1410. /**
  1411. * Attempts to read an inline token from the beginning of `line`. Only the
  1412. * start of the given `line` is considered. If a matching token is found, an
  1413. * `MDToken` is returned. Otherwise `null` is returned.
  1414. *
  1415. * Default implementation always returns `null`.
  1416. */
  1417. public function readToken(MDState $state, string $line): ?MDToken { return null; }
  1418. /**
  1419. * Attempts to find a pattern anywhere in `tokens` and perform a _single_
  1420. * in-place substitution with one or more `MDNode` subclass instances.
  1421. * If a substitution is performed, must return `true`, otherwise `false`.
  1422. *
  1423. * Default implementation always returns `false`.
  1424. *
  1425. * @param MDState $state
  1426. * @param int $pass what substitution pass this is, starting with 1
  1427. * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and `MDInlineNode` elements
  1428. * @return bool `true` if a substitution was performed, `false` if not
  1429. */
  1430. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return false; }
  1431. /**
  1432. * Called after all parsing has completed. An array `blocks` is passed of
  1433. * all the top-level `MDBlockNode` elements in the document which this
  1434. * method can traverse or alter in-place via `.splice` operations if
  1435. * necessary.
  1436. *
  1437. * `MDNode.visitChildren` is useful for recursively looking for certain
  1438. * `MDNode` instances. `MDNode.replaceNodes` is useful for swapping in
  1439. * replacements.
  1440. *
  1441. * Default implementation does nothing.
  1442. *
  1443. * @param MDState $state
  1444. * @param MDBlockNode[] $blocks
  1445. */
  1446. public function postProcess(MDState $state, array &$blocks) {}
  1447. /**
  1448. * Can be overridden to influence ordering of this reader with respect to
  1449. * another during the block parsing phase. Return `-1` to be ordered before
  1450. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1451. * Only return non-`0` values to resolve specific conflicts.
  1452. *
  1453. * Default implementation always returns `0` (no preference).
  1454. *
  1455. * @param MDReader $other
  1456. * @return int a negative, positive, or 0 value to be ordered before,
  1457. * after, or anwhere relative to `other`, respectively
  1458. */
  1459. public function compareBlockOrdering(MDReader $other): int {
  1460. return 0;
  1461. }
  1462. /**
  1463. * Can be overridden to influence ordering of this reader with respect to
  1464. * another during the tokenizing phase. Return `-1` to be ordered before
  1465. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1466. * Only return non-`0` values to resolve specific conflicts.
  1467. *
  1468. * Default implementation always returns `0` (no preference).
  1469. *
  1470. * @param MDReader $other
  1471. * @return int a negative, positive, or 0 value to be ordered before,
  1472. * after, or anwhere relative to `other`, respectively
  1473. */
  1474. public function compareTokenizeOrdering(MDReader $other): int {
  1475. return 0;
  1476. }
  1477. /**
  1478. * Can be overridden to influence ordering of this reader with respect to
  1479. * another during the substitution phase. Return `-1` to be ordered before
  1480. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1481. * Only return non-`0` values to resolve specific conflicts.
  1482. *
  1483. * Readers are sorted within each substitution pass. All pass 1 readers are
  1484. * processed first, then all pass 2 readers, etc. The number of passes this
  1485. * reader participates in is dictated by `substitionPassCount`.
  1486. *
  1487. * Default implementation always returns `0` (no preference).
  1488. *
  1489. * @param MDReader $other
  1490. * @param int $pass substitution pass, with numbering starting at `1`
  1491. * @return int a negative, positive, or 0 value to be ordered before,
  1492. * after, or anwhere relative to `other`, respectively
  1493. */
  1494. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  1495. return 0;
  1496. }
  1497. /**
  1498. * How many substitution passes this reader requires. Substitution allows
  1499. * all pass 1 readers to process first, then all pass 2 readers, etc.
  1500. */
  1501. public function substitutionPassCount(): int { return 1; }
  1502. /**
  1503. * For sorting readers with ordering preferences. The `compare` methods
  1504. * don't have the properties of normal sorting compares so need to sort
  1505. * differently.
  1506. *
  1507. * @param MDReader[] $arr array to sort
  1508. * @param callable $compareFn comparison function, taking two array element
  1509. * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
  1510. * respectively
  1511. * @param callable $idFn function for returning a unique hashable id for
  1512. * the array element
  1513. * @return MDReader[] sorted array
  1514. */
  1515. private static function kahnTopologicalSort(array $arr, callable $compareFn, callable $idFn): array {
  1516. $graph = [];
  1517. $inDegrees = [];
  1518. $valuesById = [];
  1519. // Build the graph and compute in-degrees
  1520. foreach ($arr as $index => $elem) {
  1521. $id = $idFn($elem);
  1522. $graph[$id] = [];
  1523. $inDegrees[$id] = 0;
  1524. $valuesById[$id] = $elem;
  1525. }
  1526. for ($i = 0; $i < sizeof($arr); $i++) {
  1527. $elemA = $arr[$i];
  1528. $idA = $idFn($elemA);
  1529. for ($j = 0; $j < sizeof($arr); $j++) {
  1530. if ($i === $j) continue;
  1531. $elemB = $arr[$j];
  1532. $idB = $idFn($elemB);
  1533. $comparisonResult = $compareFn($elemA, $elemB);
  1534. if ($comparisonResult < 0) {
  1535. array_push($graph[$idA], $idB);
  1536. $inDegrees[$idB]++;
  1537. } elseif ($comparisonResult > 0) {
  1538. array_push($graph[$idB], $idA);
  1539. $inDegrees[$idA]++;
  1540. }
  1541. }
  1542. }
  1543. // Initialize the queue with zero-inDegree nodes
  1544. $queue = [];
  1545. foreach ($inDegrees as $elemId => $degree) {
  1546. if ($degree === 0) {
  1547. array_push($queue, $elemId);
  1548. }
  1549. }
  1550. // Process the queue and build the topological order list
  1551. $sorted = [];
  1552. while (sizeof($queue) > 0) {
  1553. $elemId = array_shift($queue);
  1554. array_push($sorted, $valuesById[$elemId]);
  1555. unset($valuesById[$elemId]);
  1556. foreach ($graph[$elemId] as $neighbor) {
  1557. $inDegrees[$neighbor]--;
  1558. if ($inDegrees[$neighbor] === 0) {
  1559. array_push($queue, $neighbor);
  1560. }
  1561. }
  1562. }
  1563. // Anything left over can go at the end. No ordering dependencies.
  1564. foreach ($valuesById as $elemId => $value) {
  1565. array_push($sorted, $value);
  1566. }
  1567. return $sorted;
  1568. }
  1569. /**
  1570. * Returns a sorted array of readers by their block priority preferences.
  1571. *
  1572. * @param MDReader[] $readers
  1573. * @return MDReader[] sorted readers
  1574. */
  1575. public static function sortReaderForBlocks(array &$readers): array {
  1576. $sorted = $readers;
  1577. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1578. return $a->compareBlockOrdering($b);
  1579. }, fn($elem) => MDUtils::typename($elem));
  1580. }
  1581. /**
  1582. * Returns a sorted array of readers by their tokenization priority preferences.
  1583. *
  1584. * @param MDReader[] $readers
  1585. * @return MDReader[] sorted readers
  1586. */
  1587. public static function sortReadersForTokenizing(array &$readers): array {
  1588. $sorted = $readers;
  1589. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1590. return $a->compareTokenizeOrdering($b);
  1591. }, fn($elem) => MDUtils::typename($elem));
  1592. }
  1593. /**
  1594. * Returns a sorted array of tuples (arrays) containing the substitution
  1595. * pass number and reader instance, sorted by their substitution priority
  1596. * preferences.
  1597. *
  1598. * For readers with `substitutionPassCount` > `1`, the same reader will
  1599. * appear multiple times in the resulting array, one per pass.
  1600. *
  1601. * @param MDReader[] $readers
  1602. * @return MDReader[] sorted array of tuples with the pass number and
  1603. * reader instance in each
  1604. */
  1605. public static function sortReadersForSubstitution(array &$readers): array {
  1606. $tuples = [];
  1607. $maxPass = 1;
  1608. foreach ($readers as $reader) {
  1609. $passCount = $reader->substitutionPassCount();
  1610. $maxPass = max($paxPass, $passCount);
  1611. for ($pass = 1; $pass <= $passCount; $pass++) {
  1612. array_push($tuples, [ $pass, $reader ]);
  1613. }
  1614. }
  1615. $result = [];
  1616. for ($pass = 1; $pass <= $maxPass; $pass++) {
  1617. $readersThisPass = array_values(array_filter($tuples, fn($tup) => $tup[0] === $pass));
  1618. $passResult = self::kahnTopologicalSort($readersThisPass, function(array $a, array $b) use ($pass): int {
  1619. $aReader = $a[1];
  1620. $bReader = $b[1];
  1621. return $aReader->compareSubstituteOrdering($bReader, $pass);
  1622. }, fn($elem) => MDUtils::typename($elem[1]));
  1623. $result = array_merge($result, $passResult);
  1624. }
  1625. return $result;
  1626. }
  1627. }
  1628. /**
  1629. * Reads markdown blocks for headings denoted with the underline syntax.
  1630. *
  1631. * Supports `MDTagModifier` suffixes.
  1632. */
  1633. class MDUnderlinedHeadingReader extends MDReader {
  1634. public function readBlock(MDState $state): ?MDBlockNode {
  1635. $p = $state->p;
  1636. if (!$state->hasLines(2)) return null;
  1637. $modifier;
  1638. $contentLine = trim($state->lines[$p++]);
  1639. [$contentLine, $modifier] = MDTagModifier::fromLine($contentLine, $state);
  1640. $underLine = trim($state->lines[$p++]);
  1641. if ($contentLine == '') return null;
  1642. if (mb_eregi('^=+$', $underLine)) {
  1643. $state->p = $p;
  1644. $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
  1645. if ($modifier) $modifier->applyTo($block);
  1646. return $block;
  1647. }
  1648. if (mb_eregi('^\-+$', $underLine)) {
  1649. $state->p = $p;
  1650. $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
  1651. if ($modifier) $modifier->applyTo($block);
  1652. return $block;
  1653. }
  1654. return null;
  1655. }
  1656. }
  1657. /**
  1658. * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
  1659. * to 6 are supported.
  1660. *
  1661. * Supports `MDTagModifier` suffixes.
  1662. */
  1663. class MDHashHeadingReader extends MDReader {
  1664. private static $hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content
  1665. public function readBlock(MDState $state): ?MDBlockNode {
  1666. $p = $state->p;
  1667. $line = $state->lines[$p++];
  1668. $modifier;
  1669. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1670. if (!mb_eregi(self::$hashHeadingRegex, $line, $groups)) return null;
  1671. $state->p = $p;
  1672. $level = mb_strlen($groups[1]);
  1673. $content = $groups[2];
  1674. $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
  1675. if ($modifier) $modifier->applyTo($block);
  1676. return $block;
  1677. }
  1678. }
  1679. /**
  1680. * Reads subtext blocks. Subtext is smaller, fainter text for things like
  1681. * disclaimers or sources.
  1682. *
  1683. * Supports `MDTagModifier` suffixes.
  1684. */
  1685. class MDSubtextReader extends MDReader {
  1686. private static $subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content
  1687. public function readBlock(MDState $state): ?MDBlockNode {
  1688. $p = $state->p;
  1689. $line = $state->lines[$p++];
  1690. $modifier;
  1691. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1692. if (!mb_eregi(self::$subtextRegex, $line, $groups)) return null;
  1693. $state->p = $p;
  1694. $content = $groups[1];
  1695. $block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
  1696. if ($modifier) $modifier->applyTo($block);
  1697. return $block;
  1698. }
  1699. public function compareBlockOrdering(MDReader $other): int {
  1700. if ($other instanceof MDUnorderedListReader) {
  1701. return -1;
  1702. }
  1703. return 0;
  1704. }
  1705. }
  1706. /**
  1707. * Reads markdown blocks for blockquoted text.
  1708. */
  1709. class MDBlockQuoteReader extends MDReader {
  1710. public function readBlock(MDState $state): ?MDBlockNode {
  1711. $blockquoteLines = [];
  1712. $p = $state->p;
  1713. while ($p < sizeof($state->lines)) {
  1714. $line = $state->lines[$p++];
  1715. if (str_starts_with($line, ">")) {
  1716. array_push($blockquoteLines, $line);
  1717. } else {
  1718. break;
  1719. }
  1720. }
  1721. if (sizeof($blockquoteLines) == 0) return null;
  1722. $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '', mb_substr($line, 1)), $blockquoteLines);
  1723. $substate = $state->copy($contentLines);
  1724. $quotedBlocks = $substate->readBlocks();
  1725. $state->p = $p;
  1726. return new MDBlockquoteNode($quotedBlocks);
  1727. }
  1728. }
  1729. /**
  1730. * Internal abstract base class for ordered and unordered lists.
  1731. */
  1732. class _MDListReader extends MDReader {
  1733. private static function readItemLines(MDState $state, int $firstLineStartPos): array {
  1734. $p = $state->p;
  1735. $lines = [];
  1736. $seenBlankLine = false;
  1737. $stripTrailingBlankLines = true;
  1738. while ($state->hasLines(1, $p)) {
  1739. $isFirstLine = ($p == $state->p);
  1740. $line = $state->lines[$p++];
  1741. if ($isFirstLine) {
  1742. $line = mb_substr($line, $firstLineStartPos);
  1743. }
  1744. if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
  1745. // Found next list item
  1746. $stripTrailingBlankLines = false; // because this signals extra spacing intended
  1747. break;
  1748. }
  1749. $isBlankLine = trim($line) == '';
  1750. $isIndented = mb_eregi('^\\s+\\S', $line);
  1751. if ($isBlankLine) {
  1752. $seenBlankLine = true;
  1753. } elseif (!$isIndented && $seenBlankLine) {
  1754. // Post-list content
  1755. break;
  1756. }
  1757. array_push($lines, $line);
  1758. }
  1759. $lines = MDUtils::withoutTrailingBlankLines($lines);
  1760. return MDUtils::stripIndent($lines);
  1761. }
  1762. protected function readListItemContent(MDState $state, int $firstLineStartPos): MDBlockNode {
  1763. $itemLines = $this->readItemLines($state, $firstLineStartPos);
  1764. $state->p += max(sizeof($itemLines), 1);
  1765. if (sizeof($itemLines) == 1) {
  1766. return new MDBlockNode($state->inlineMarkdownToNodes($itemLines[0]));
  1767. }
  1768. $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
  1769. if ($hasBlankLines) {
  1770. $substate = $state->copy($itemLines);
  1771. $blocks = $substate->readBlocks();
  1772. return (sizeof($blocks) == 1) ? $blocks[0] : new MBlockDNode($blocks);
  1773. }
  1774. // Multiline content with no blank lines. Search for new block
  1775. // boundaries without the benefit of a blank line to demarcate it.
  1776. for ($p = 1; $p < sizeof($itemLines); $p++) {
  1777. $line = $itemLines[$p];
  1778. if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
  1779. // Nested list found
  1780. $firstBlock = new MDBlockNode($state->inlineMarkdownToNodes(implode("\n", array_slice($itemLines, 0, $p))));
  1781. $substate = $state->copy(array_slice($itemLines, $p));
  1782. $blocks = $substate->readBlocks();
  1783. return new MDBlockNode(array_merge([ $firstBlock ], $blocks));
  1784. }
  1785. }
  1786. // Ok, give up and just do a standard block read
  1787. {
  1788. $substate = $state->copy($itemLines);
  1789. $blocks = $substate->readBlocks();
  1790. return (sizeof($blocks) == 1) ? $blocks[0] : new MDBlockNode($blocks);
  1791. }
  1792. }
  1793. public function readBlock(MDState $state): ?MDBlockNode {
  1794. $className = MDUtils::typename($this);
  1795. throw new Error("Abstract readBlock must be overridden in {$className}");
  1796. }
  1797. }
  1798. /**
  1799. * Block reader for unordered (bulleted) lists.
  1800. */
  1801. class MDUnorderedListReader extends _MDListReader {
  1802. private static string $unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content
  1803. private function readUnorderedListItem(MDState $state): ?MDListItemNode {
  1804. if (!$state->hasLines(1)) return null;
  1805. $p = $state->p;
  1806. $line = $state->lines[$p];
  1807. if (!mb_eregi(self::$unorderedListRegex, $line, $groups)) return null;
  1808. $firstLineOffset = mb_strlen($groups[1]);
  1809. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
  1810. }
  1811. public function readBlock(MDState $state): ?MDBlockNode {
  1812. $items = [];
  1813. $item = null;
  1814. do {
  1815. $item = $this->readUnorderedListItem($state);
  1816. if ($item) array_push($items, $item);
  1817. } while ($item);
  1818. if (sizeof($items) == 0) return null;
  1819. return new MDUnorderedListNode($items);
  1820. }
  1821. }
  1822. /**
  1823. * Block reader for ordered (numbered) lists. The number of the first item is
  1824. * used to begin counting. The subsequent items increase by 1, regardless of
  1825. * their value.
  1826. */
  1827. class MDOrderedListReader extends _MDListReader {
  1828. private static string $orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content
  1829. private function readOrderedListItem(MDState $state): ?MDListItemNode {
  1830. if (!$state->hasLines(1)) return null;
  1831. $p = $state->p;
  1832. $line = $state->lines[$p];
  1833. if (!mb_eregi(self::$orderedListRegex, $line, $groups)) return null;
  1834. $ordinal = intval($groups[1]);
  1835. $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
  1836. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
  1837. }
  1838. public function readBlock(MDState $state): ?MDBlockNode {
  1839. $items = [];
  1840. $item = null;
  1841. do {
  1842. $item = $this->readOrderedListItem($state);
  1843. if ($item) array_push($items, $item);
  1844. } while ($item);
  1845. if (sizeof($items) == 0) return null;
  1846. return new MDOrderedListNode($items, $items[0]->ordinal);
  1847. }
  1848. }
  1849. /**
  1850. * Block reader for code blocks denoted by pairs of triple tickmarks. If
  1851. * a programming language name, _xyz_, immediately follows the backticks, a
  1852. * `language-xyz` CSS class will be added to the resulting `<code>`
  1853. * element.
  1854. *
  1855. * Supports `MDTagModifier` suffix.
  1856. */
  1857. class MDFencedCodeBlockReader extends MDReader {
  1858. public function readBlock(MDState $state): ?MDBlockNode {
  1859. if (!$state->hasLines(2)) return null;
  1860. $p = $state->p;
  1861. $openFenceLine = $state->lines[$p++];
  1862. [$openFenceLine, $modifier] = MDTagModifier::fromLine($openFenceLine, $state);
  1863. if (!mb_eregi('```\\s*([a-z0-9]*)\\s*$', $openFenceLine, $groups)) return null;
  1864. $language = mb_strlen($groups[1]) > 0 ? $groups[1] : null;
  1865. $codeLines = [];
  1866. while ($state->hasLines(1, $p)) {
  1867. $line = $state->lines[$p++];
  1868. if (trim($line) == '```') {
  1869. $state->p = $p;
  1870. $block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
  1871. if ($modifier) $modifier->applyTo($block);
  1872. return $block;
  1873. }
  1874. array_push($codeLines, $line);
  1875. }
  1876. return null;
  1877. }
  1878. }
  1879. /**
  1880. * Block reader for code blocks denoted by indenting text.
  1881. */
  1882. class MDIndentedCodeBlockReader extends MDReader {
  1883. public function readBlock(MDState $state): ?MDBlockNode {
  1884. $p = $state->p;
  1885. $codeLines = [];
  1886. while ($state->hasLines(1, $p)) {
  1887. $line = $state->lines[$p++];
  1888. if (MDUtils::countIndents($line, true) < 1) {
  1889. $p--;
  1890. break;
  1891. }
  1892. array_push($codeLines, MDUtils::stripIndent($line));
  1893. }
  1894. if (sizeof($codeLines) == 0) return null;
  1895. $state->p = $p;
  1896. return new MDCodeBlockNode(implode("\n", $codeLines));
  1897. }
  1898. }
  1899. /**
  1900. * Block reader for horizontal rules. Composed of three or more hypens or
  1901. * asterisks on a line by themselves, with or without intermediate whitespace.
  1902. */
  1903. class MDHorizontalRuleReader extends MDReader {
  1904. private static string $horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
  1905. public function readBlock(MDState $state): ?MDBlockNode {
  1906. $p = $state->p;
  1907. $line = $state->lines[$p++];
  1908. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1909. if (mb_eregi(self::$horizontalRuleRegex, $line)) {
  1910. $state->p = $p;
  1911. $block = new MDHorizontalRuleNode();
  1912. if ($modifier) $modifier->applyTo($block);
  1913. return $block;
  1914. }
  1915. return null;
  1916. }
  1917. public function compareBlockOrdering(MDReader $other): int {
  1918. if ($other instanceof MDUnorderedListReader) {
  1919. return -1;
  1920. }
  1921. return 0;
  1922. }
  1923. }
  1924. /**
  1925. * Block reader for tables.
  1926. *
  1927. * Supports `MDTagModifier` suffix.
  1928. */
  1929. class MDTableReader extends MDReader {
  1930. private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
  1931. if (!$state->hasLines(1)) return null;
  1932. $p = $state->p;
  1933. $line = MDTagModifier::strip(trim($state->lines[$p++]));
  1934. if (!mb_eregi('.*\\|.*', $line)) return null;
  1935. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1936. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1937. $cellTokens = explode('|', $line);
  1938. $cells = array_map(function($token) use ($isHeader, $state) {
  1939. $content = $state->inlineMarkdownToNode(trim($token));
  1940. return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
  1941. }, $cellTokens);
  1942. $state->p = $p;
  1943. return new MDTableRowNode($cells);
  1944. }
  1945. /**
  1946. * @param string $line
  1947. * @return string[]
  1948. */
  1949. private function parseColumnAlignments(string $line): array {
  1950. $line = trim($line);
  1951. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1952. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1953. return array_map(function($token) {
  1954. if (str_starts_with($token, ':')) {
  1955. if (str_ends_with($token, ':')) {
  1956. return 'center';
  1957. }
  1958. return 'left';
  1959. } elseif (str_ends_with($token, ':')) {
  1960. return 'right';
  1961. }
  1962. return null;
  1963. }, mb_split('\\s*\\|\\s*', $line));
  1964. }
  1965. private static string $tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
  1966. public function readBlock(MDState $state): ?MDBlockNode {
  1967. if (!$state->hasLines(2)) return null;
  1968. $startP = $state->p;
  1969. $firstLine = $state->lines[$startP];
  1970. $modifier = MDTagModifier::fromLine($firstLine, $state)[1];
  1971. $headerRow = $this->readTableRow($state, true);
  1972. if ($headerRow === null) {
  1973. $state->p = $startP;
  1974. return null;
  1975. }
  1976. $dividerLine = $state->lines[$state->p++];
  1977. if (!mb_eregi(self::$tableDividerRegex, $dividerLine, $dividerGroups)) {
  1978. $state->p = $startP;
  1979. return null;
  1980. }
  1981. $columnAlignments = $this->parseColumnAlignments($dividerLine);
  1982. $bodyRows = [];
  1983. while ($state->hasLines(1)) {
  1984. $row = $this->readTableRow($state, false);
  1985. if ($row === null) break;
  1986. array_push($bodyRows, $row);
  1987. }
  1988. $table = new MDTableNode($headerRow, $bodyRows);
  1989. $table->columnAlignments = $columnAlignments;
  1990. if ($modifier) $modifier->applyTo($table);
  1991. return $table;
  1992. }
  1993. }
  1994. /**
  1995. * Block reader for definition lists. Definitions go directly under terms starting
  1996. * with a colon.
  1997. */
  1998. class MDDefinitionListReader extends MDReader {
  1999. public function readBlock(MDState $state): ?MDBlockNode {
  2000. $p = $state->p;
  2001. $groups;
  2002. $termCount = 0;
  2003. $definitionCount = 0;
  2004. $defLines = [];
  2005. while ($state->hasLines(1, $p)) {
  2006. $line = $state->lines[$p++];
  2007. if (trim($line) === '') {
  2008. break;
  2009. }
  2010. if (mb_eregi('^\\s+', $line)) {
  2011. if (sizeof($defLines) == 0) return null;
  2012. $defLines[sizeof($defLines) - 1] .= "\n" . $line;
  2013. } elseif (mb_eregi('^:\\s+', $line)) {
  2014. array_push($defLines, $line);
  2015. $definitionCount++;
  2016. } else {
  2017. array_push($defLines, $line);
  2018. $termCount++;
  2019. }
  2020. }
  2021. if ($termCount == 0 || $definitionCount == 0) return null;
  2022. $blocks = array_map(function($line) use ($state) {
  2023. if (mb_eregi('^:\\s+(.*?)$', $line, $groups)) {
  2024. return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
  2025. } else {
  2026. return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
  2027. }
  2028. }, $defLines);
  2029. $state->p = $p;
  2030. return new MDDefinitionListNode($blocks);
  2031. }
  2032. }
  2033. /**
  2034. * Block reader for defining footnote contents. Footnotes can be defined anywhere
  2035. * in the document but will always be rendered at the end of a page or end of
  2036. * the document.
  2037. */
  2038. class MDFootnoteReader extends MDReader {
  2039. private static string $footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
  2040. private static string $footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
  2041. /**
  2042. * @param MDState $state
  2043. * @param string $symbol
  2044. * @param MDNode[] $footnote
  2045. */
  2046. private function defineFootnote(MDState $state, string $symbol, array $footnote) {
  2047. $footnotes = $state->root()->userInfo['footnotes'] ?? [];
  2048. $footnotes[$symbol] = $footnote;
  2049. $state->root()->userInfo['footnotes'] = $footnotes;
  2050. }
  2051. private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
  2052. $footnoteInstances = $state->root()->userInfo['footnoteInstances'];
  2053. $instances = $footnoteInstances[$symbol] ?? [];
  2054. array_push($instances, $unique);
  2055. $footnoteInstances[$symbol] = $instances;
  2056. $state->root()->userInfo['footnoteInstances'] = $footnoteInstances;
  2057. }
  2058. private function idForFootnoteSymbol(MDState $state, string $symbol): int {
  2059. $footnoteIds = $state->root()->userInfo['footnoteIds'] ?? [];
  2060. $existing = $footnoteIds[$symbol] ?? null;
  2061. if ($existing !== null) return $existing;
  2062. $nextFootnoteId = $state->root()->userInfo['nextFootnoteId'] ?? 1;
  2063. $id = $nextFootnoteId++;
  2064. $footnoteIds[$symbol] = $id;
  2065. $state->root()->userInfo['nextFootnoteId'] = $nextFootnoteId;
  2066. $state->root()->userInfo['footnoteIds'] = $footnoteIds;
  2067. return $id;
  2068. }
  2069. public function preProcess(MDState $state) {
  2070. $state->root()->userInfo['footnoteInstances'] = [];
  2071. $state->root()->userInfo['footnotes'] = [];
  2072. $state->root()->userInfo['footnoteIds'] = [];
  2073. $state->root()->userInfo['nextFootnoteId'] = 1;
  2074. }
  2075. public function readBlock(MDState $state): ?MDBlockNode {
  2076. $p = $state->p;
  2077. if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
  2078. $symbol = $groups[1];
  2079. $def = $groups[2];
  2080. while ($state->hasLines(1, $p)) {
  2081. $line = $state->lines[$p++];
  2082. if (mb_eregi('^\\s+', $line)) {
  2083. $def .= "\n" . $line;
  2084. } else {
  2085. $p--;
  2086. break;
  2087. }
  2088. }
  2089. $content = $state->inlineMarkdownToNodes($def);
  2090. $this->defineFootnote($state, $symbol, $content);
  2091. $state->p = $p;
  2092. return new MDBlockNode(); // empty
  2093. }
  2094. public function readToken(MDState $state, string $line): ?MDToken {
  2095. $groups;
  2096. if (mb_eregi(self::$footnoteWithTitleRegex, $line, $groups)) {
  2097. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
  2098. }
  2099. if (mb_eregi(self::$footnoteRegex, $line, $groups)) {
  2100. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
  2101. }
  2102. return null;
  2103. }
  2104. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2105. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
  2106. $symbol = $match->tokens[0]->content;
  2107. array_splice($tokens, $match->index, 1, [new MDFootnoteNode($symbol)]);
  2108. return true;
  2109. }
  2110. return false;
  2111. }
  2112. /**
  2113. * @param {MDState} state
  2114. * @param {MDBlockNode[]} blocks
  2115. */
  2116. public function postProcess(MDState $state, array &$blocks) {
  2117. $nextOccurrenceId = 1;
  2118. foreach ($blocks as $block) {
  2119. $block->visitChildren(function($node) use (&$nextOccurrenceId, $state) {
  2120. if (!($node instanceof MDFootnoteNode)) return;
  2121. $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
  2122. $node->occurrenceId = $nextOccurrenceId++;
  2123. $node->displaySymbol = strval($node->footnoteId);
  2124. $this->registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
  2125. });
  2126. }
  2127. if (sizeof($state->userInfo['footnotes']) == 0) return;
  2128. array_push($blocks, new MDFootnoteListNode());
  2129. }
  2130. public function compareBlockOrdering(MDReader $other): int {
  2131. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2132. return -1;
  2133. }
  2134. return 0;
  2135. }
  2136. public function compareTokenizeOrdering(MDReader $other): int {
  2137. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2138. return -1;
  2139. }
  2140. return 0;
  2141. }
  2142. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2143. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2144. return -1;
  2145. }
  2146. return 0;
  2147. }
  2148. }
  2149. /**
  2150. * Block reader for abbreviation definitions. Anywhere the abbreviation appears
  2151. * in plain text will have its definition available when hovering over it.
  2152. * Definitions can appear anywhere in the document. Their content should only
  2153. * contain simple text, not markdown.
  2154. */
  2155. class MDAbbreviationReader extends MDReader {
  2156. private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
  2157. $abbrevs = $state->root()->userInfo['abbreviations'];
  2158. $abbrevs[$abbreviation] = $definition;
  2159. $state->root()->userInfo['abbreviations'] = $abbrevs;
  2160. }
  2161. public function preProcess(MDState $state) {
  2162. $state->root()->userInfo['abbreviations'] = [];
  2163. }
  2164. public function readBlock(MDState $state): ?MDBlockNode {
  2165. $p = $state->p;
  2166. $line = $state->lines[$p++];
  2167. if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
  2168. $abbrev = $groups[1];
  2169. $def = $groups[2];
  2170. $this->defineAbbreviation($state, $abbrev, $def);
  2171. $state->p = $p;
  2172. return new MDBlockNode(); // empty
  2173. }
  2174. /**
  2175. * @param MDState $state
  2176. * @param MDNode[] $blocks
  2177. */
  2178. public function postProcess(MDState $state, array &$blocks) {
  2179. $abbreviations = $state->root()->userInfo['abbreviations'];
  2180. MDNode::replaceNodes($state, $blocks, function($original) use ($abbreviations) {
  2181. if (!($original instanceof MDTextNode)) return null;
  2182. $changed = false;
  2183. $elems = [ $original->text ]; // mix of strings and MDNodes
  2184. for ($i = 0; $i < sizeof($elems); $i++) {
  2185. $text = $elems[$i];
  2186. if (!is_string($text)) continue;
  2187. foreach ($abbreviations as $abbreviation => $definition) {
  2188. $index = strpos($text, $abbreviation);
  2189. if ($index === false) continue;
  2190. $prefix = substr($text, 0, $index);
  2191. $suffix = substr($text, $index + strlen($abbreviation));
  2192. array_splice($elems, $i, 1, [$prefix, new MDAbbreviationNode($abbreviation, $definition), $suffix]);
  2193. $i = -1; // start over
  2194. $changed = true;
  2195. break;
  2196. }
  2197. }
  2198. if (!$changed) return null;
  2199. $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem, $elems);
  2200. return new MDNode($nodes);
  2201. });
  2202. }
  2203. }
  2204. /**
  2205. * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
  2206. * whitespace-only) line. This reader is prioritized after every other reader
  2207. * since there is no distinguishing syntax.
  2208. */
  2209. class MDParagraphReader extends MDReader {
  2210. public function readBlock(MDState $state): ?MDBlockNode {
  2211. $paragraphLines = [];
  2212. $p = $state->p;
  2213. while ($state->hasLines(1, $p)) {
  2214. $line = $state->lines[$p++];
  2215. if (trim($line) === '') {
  2216. break;
  2217. }
  2218. array_push($paragraphLines, $line);
  2219. }
  2220. if ($state->p == 0 && $p >= sizeof($state->lines)) {
  2221. // If it's the entire document don't wrap it in a paragraph
  2222. return null;
  2223. }
  2224. if (sizeof($paragraphLines) > 0) {
  2225. $state->p = $p;
  2226. $content = implode("\n", $paragraphLines);
  2227. return new MDParagraphNode($state->inlineMarkdownToNodes($content));
  2228. }
  2229. return null;
  2230. }
  2231. public function compareBlockOrdering(MDReader $other): int {
  2232. return 1; // always dead last
  2233. }
  2234. }
  2235. /**
  2236. * Abstract base class for readers that look for one or two delimiting tokens
  2237. * on either side of some content. E.g. `**strong**`.
  2238. */
  2239. class MDSimplePairInlineReader extends MDReader {
  2240. // Passes:
  2241. // 1. Syntaxes with two delimiting tokens, interior tokens of the same
  2242. // kind must be even in number
  2243. // 2. Syntaxes with one delimiting token, interior tokens of the same
  2244. // kind must be even in number
  2245. // 3. Syntaxes with two delimiting tokens, any tokens inside
  2246. // 4. Syntaxes with one delimiting token, any tokens inside
  2247. public function substitutionPassCount(): int { return 4; }
  2248. /**
  2249. * Attempts a substitution of a matched pair of delimiting token types.
  2250. * If successful, the substitution is performed on `tokens` and `true` is
  2251. * returned, otherwise `false` is returned and the array is untouched.
  2252. *
  2253. * If `this.substitutionPassCount` is greater than 1, the first pass
  2254. * will reject matches with the delimiting character inside the content
  2255. * tokens. If the reader uses a single pass or a subsequent pass is performed
  2256. * with multiple pass any contents will be accepted.
  2257. *
  2258. * @param MDState $state
  2259. * @param int $pass pass number, starting with `1`
  2260. * @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on
  2261. * @param string $nodeClass class of the node to return if matched
  2262. * @param MDTokenType $delimiter delimiting token
  2263. * @param int $count how many times the token is repeated to form the delimiter
  2264. * @param bool $plaintext whether to invoke `nodeClass` with a verbatim
  2265. * content string instead of parsed `MDNode`s
  2266. * @return bool `true` if substitution was performed, `false` if not
  2267. */
  2268. public function attemptPair(MDState $state, int $pass, array &$tokens, string $nodeClass, MDTokenType $delimiter, int $count=1, bool $plaintext=false): bool {
  2269. // We do four passes. #1: doubles without inner tokens, #2: singles
  2270. // without inner tokens, #3: doubles with paired inner tokens,
  2271. // #4: singles with paired inner tokens
  2272. if ($count == 1 && $pass != 2 && $pass != 4) return false;
  2273. if ($count > 1 && $pass != 1 && $pass != 3) return false;
  2274. $delimiters = array_fill(0, $count, $delimiter);
  2275. $isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1;
  2276. $match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters, function($content) use ($nodeClass, $isFirstOfMultiplePasses, $delimiter) {
  2277. $firstType = $content[0] instanceof MDToken ? $content[0]->type : null;
  2278. $lastType = $content[sizeof($content) - 1] instanceof MDToken ? $content[sizeof($content) - 1]->type : null;
  2279. if ($firstType == MDTokenType::Whitespace) return false;
  2280. if ($lastType == MDTokenType::Whitespace) return false;
  2281. foreach ($content as $token) {
  2282. // Don't allow nesting
  2283. if (MDUtils::typename($token) == $nodeClass) return false;
  2284. }
  2285. if ($isFirstOfMultiplePasses) {
  2286. $innerCount = 0;
  2287. foreach ($content as $token) {
  2288. if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++;
  2289. }
  2290. if (($innerCount % 2) != 0) return false;
  2291. }
  2292. return true;
  2293. });
  2294. if ($match === null) return false;
  2295. $state->checkExecutionTime();
  2296. if ($plaintext) {
  2297. $content = implode('', array_map(fn($token) => $token instanceof MDToken ? $token->original : $token->toPlaintext($state), $match->contentTokens));
  2298. } else {
  2299. $content = $state->tokensToNodes($match->contentTokens);
  2300. }
  2301. $ref = new ReflectionClass($nodeClass);
  2302. $node = $ref->newInstanceArgs([ $content ]);
  2303. array_splice($tokens, $match->startIndex, $match->totalLength, [$node]);
  2304. return true;
  2305. }
  2306. private static $firstTime = null;
  2307. }
  2308. /**
  2309. * Reader for emphasis syntax. Denoted with a single underscore on either side of
  2310. * some text (preferred) or a single asterisk on either side.
  2311. */
  2312. class MDEmphasisReader extends MDSimplePairInlineReader {
  2313. public function readToken(MDState $state, string $line): ?MDToken {
  2314. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2315. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2316. return null;
  2317. }
  2318. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2319. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true;
  2320. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true;
  2321. return false;
  2322. }
  2323. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2324. if ($other instanceof MDStrongReader) {
  2325. return 1;
  2326. }
  2327. return 0;
  2328. }
  2329. }
  2330. /**
  2331. * Reader for strong syntax. Denoted with two asterisks on either side of some
  2332. * text (preferred) or two underscores on either side. Note that if
  2333. * `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
  2334. */
  2335. class MDStrongReader extends MDSimplePairInlineReader {
  2336. public function readToken(MDState $state, string $line): ?MDToken {
  2337. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2338. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2339. return null;
  2340. }
  2341. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2342. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true;
  2343. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true;
  2344. return false;
  2345. }
  2346. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2347. if ($other instanceof MDEmphasisReader) {
  2348. return -1;
  2349. }
  2350. return 0;
  2351. }
  2352. }
  2353. /**
  2354. * Reader for strikethrough syntax. Consists of two tildes on either side of
  2355. * some text (preferred) or single tildes on either side. Note that if
  2356. * `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
  2357. *
  2358. * The number of recognized tildes can be configured.
  2359. */
  2360. class MDStrikethroughReader extends MDSimplePairInlineReader {
  2361. /** @type {boolean} */
  2362. public bool $singleTildeEnabled = true;
  2363. /** @type {boolean} */
  2364. public bool $doubleTildeEnabled = true;
  2365. public function readToken(MDState $state, string $line): ?MDToken {
  2366. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2367. return null;
  2368. }
  2369. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2370. if ($this->singleTildeEnabled) {
  2371. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true;
  2372. }
  2373. if ($this->doubleTildeEnabled) {
  2374. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true;
  2375. }
  2376. return false;
  2377. }
  2378. }
  2379. /**
  2380. * Reader for underline syntax. Consists of two underscores on either side of
  2381. * some text. If used with `MDStrongReader` which also looks for double
  2382. * underscores, this reader will take priority.
  2383. */
  2384. class MDUnderlineReader extends MDSimplePairInlineReader {
  2385. public function readToken(MDState $state, string $line): ?MDToken {
  2386. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2387. return null;
  2388. }
  2389. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2390. return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2);
  2391. }
  2392. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2393. if ($other instanceof MDStrongReader) {
  2394. return -1;
  2395. }
  2396. return 0;
  2397. }
  2398. }
  2399. /**
  2400. * Reader for highlight syntax. Consists of pairs of equal signs on either side
  2401. * of some text.
  2402. */
  2403. class MDHighlightReader extends MDSimplePairInlineReader {
  2404. public function readToken(MDState $state, string $line): ?MDToken {
  2405. if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal);
  2406. return null;
  2407. }
  2408. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2409. return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2);
  2410. }
  2411. }
  2412. /**
  2413. * Reader for inline code syntax. Consists of one or two delimiting backticks
  2414. * around text. The contents between the backticks will be rendered verbatim,
  2415. * ignoring any inner markdown syntax. To include a backtick inside, escape it
  2416. * with a backslash.
  2417. */
  2418. class MDCodeSpanReader extends MDSimplePairInlineReader {
  2419. public function readToken(MDState $state, string $line): ?MDToken {
  2420. if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick);
  2421. return null;
  2422. }
  2423. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2424. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true;
  2425. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true;
  2426. return false;
  2427. }
  2428. }
  2429. /**
  2430. * Reader for subscript syntax. Consists of single tildes on either side of
  2431. * some text. If used with `MDStrikethroughReader`, this reader will take
  2432. * precedence, and strikethrough can only be done with double tildes.
  2433. */
  2434. class MDSubscriptReader extends MDSimplePairInlineReader {
  2435. public function readToken(MDState $state, string $line): ?MDToken {
  2436. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2437. return null;
  2438. }
  2439. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2440. return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde);
  2441. }
  2442. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2443. if ($other instanceof MDStrikethroughReader) {
  2444. return -1;
  2445. }
  2446. return 0;
  2447. }
  2448. }
  2449. /**
  2450. * Reader for superscript syntax. Consists of single caret characters on either
  2451. * side of some text.
  2452. */
  2453. class MDSuperscriptReader extends MDSimplePairInlineReader {
  2454. public function readToken(MDState $state, string $line): ?MDToken {
  2455. if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret);
  2456. return null;
  2457. }
  2458. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2459. return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret);
  2460. }
  2461. }
  2462. /**
  2463. * Reads a hypertext link. Consists of link text between square brackets
  2464. * followed immediately by a URL in parentheses.
  2465. */
  2466. class MDLinkReader extends MDReader {
  2467. public function readToken(MDState $state, string $line): ?MDToken {
  2468. $simpleEmailRegex = "^<(" . MDUtils::$baseEmailRegex . ")>";
  2469. $simpleURLRegex = "^<(" . MDUtils::$baseURLRegex . ")>";
  2470. if ($groups = MDToken::tokenizeLabel($line)) {
  2471. return new MDToken($groups[0], MDTokenType::Label, $groups[1]);
  2472. }
  2473. if ($groups = MDToken::tokenizeEmail($line)) {
  2474. return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]);
  2475. }
  2476. if ($groups = MDToken::tokenizeURL($line)) {
  2477. return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]);
  2478. }
  2479. if (mb_eregi($simpleEmailRegex, $line, $groups)) {
  2480. return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]);
  2481. }
  2482. if (mb_eregi($simpleURLRegex, $line, $groups)) {
  2483. return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]);
  2484. }
  2485. return null;
  2486. }
  2487. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2488. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2489. $text = $match->tokens[0]->content;
  2490. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2491. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2492. array_splice($tokens, $match->index, sizeof($match->tokens), [new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title)]);
  2493. return true;
  2494. }
  2495. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) {
  2496. $text = $match->tokens[0]->content;
  2497. $email = $match->tokens[sizeof($match->tokens) - 1]->content;
  2498. $url = "mailto:{$email}";
  2499. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2500. array_splice($tokens, $match->index, sizeof($match->tokens), [new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title)]);
  2501. return true;
  2502. }
  2503. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) {
  2504. $token = $match->tokens[0];
  2505. $link = "mailto:{$token->content}";
  2506. $node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content));
  2507. array_splice($tokens, $match->index, 1, [$node]);
  2508. return true;
  2509. }
  2510. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) {
  2511. $token = $match->tokens[0];
  2512. $link = $token->content;
  2513. $node = new MDLinkNode($link, new MDTextNode($link));
  2514. array_splice($tokens, $match->index, 1, [$node]);
  2515. return true;
  2516. }
  2517. return false;
  2518. }
  2519. }
  2520. /**
  2521. * Reader for referential URL definitions. Consists of link text between square
  2522. * brackets followed immediately by a reference symbol also in square brackets.
  2523. * The URL can be defined elsewhere on a line by itself with the symbol in square
  2524. * brackets, colon, and the URL (and optional title in quotes).
  2525. */
  2526. class MDReferencedLinkReader extends MDLinkReader {
  2527. public function readBlock(MDState $state): ?MDBlockNode {
  2528. $p = $state->p;
  2529. $line = $state->lines[$p++];
  2530. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) {
  2531. $symbol = $groups[1];
  2532. $url = $groups[2];
  2533. $title = $groups[3];
  2534. } else {
  2535. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) {
  2536. $symbol = $groups[1];
  2537. $url = $groups[2];
  2538. $title = null;
  2539. } else {
  2540. return null;
  2541. }
  2542. }
  2543. $state->defineURL($symbol, $url, $title);
  2544. $state->p = $p;
  2545. return new MDBlockNode([]); // empty
  2546. }
  2547. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2548. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2549. $text = $match->tokens[0]->content;
  2550. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2551. array_splice($tokens, $match->index, sizeof($match->tokens), [new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text))]);
  2552. return true;
  2553. }
  2554. return false;
  2555. }
  2556. }
  2557. /**
  2558. * Reader for images. Consists of an exclamation, alt text in square brackets,
  2559. * and image URL in parentheses.
  2560. */
  2561. class MDImageReader extends MDLinkReader {
  2562. public function readToken(MDState $state, string $line): ?MDToken {
  2563. $s = parent::readToken($state, $line);
  2564. if ($s) return $s;
  2565. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2566. return null;
  2567. }
  2568. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2569. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2570. $alt = $match->tokens[1]->content;
  2571. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2572. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2573. $node = new MDImageNode($url, $alt);
  2574. if ($title !== null) {
  2575. $node->attributes['title'] = $title;
  2576. }
  2577. array_splice($tokens, $match->index, sizeof($match->tokens), [$node]);
  2578. return true;
  2579. }
  2580. return false;
  2581. }
  2582. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2583. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2584. return -1;
  2585. }
  2586. return 0;
  2587. }
  2588. }
  2589. /**
  2590. * Reader for images with referential URL definitions. Consists of an
  2591. * exclamation, alt text in square brackets, and link symbol in square brackets.
  2592. * URL is defined the same as for `MDReferencedLinkReader`.
  2593. */
  2594. class MDReferencedImageReader extends MDReferencedLinkReader {
  2595. public function readToken(MDState $state, string $line): ?MDToken {
  2596. $s = parent::readToken($state, $line);
  2597. if ($s) return $s;
  2598. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2599. return null;
  2600. }
  2601. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2602. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2603. $alt = $match->tokens[1]->content;
  2604. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2605. array_splice($tokens, $match->index, sizeof($match->tokens), [new MDReferencedImageNode($ref, $alt)]);
  2606. return true;
  2607. }
  2608. return false;
  2609. }
  2610. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2611. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2612. return -1;
  2613. }
  2614. return 0;
  2615. }
  2616. }
  2617. /**
  2618. * Converts line breaks within blocks into line breaks in the HTML. Not
  2619. * included in any of the default reader sets since most flavors ignore
  2620. * line breaks within blocks.
  2621. */
  2622. class MDLineBreakReader extends MDReader {
  2623. public function postProcess(MDState $state, array &$blocks) {
  2624. MDNode::replaceNodes($state, $blocks, function(MDNode $original) {
  2625. if (!($original instanceof MDTextNode)) return null;
  2626. $lines = explode("\n", $original->text);
  2627. if (sizeof($lines) == 1) return null;
  2628. $nodes = [];
  2629. foreach ($lines as $i => $line) {
  2630. if ($i > 0) {
  2631. array_push($nodes, new MDLineBreakNode());
  2632. }
  2633. array_push($nodes, new MDTextNode($line));
  2634. }
  2635. return new MDNode($nodes);
  2636. });
  2637. }
  2638. }
  2639. /**
  2640. * Reads a verbatim HTML tag, and if it passes validation by `MDState.tagFilter`,
  2641. * will be rendered in the final HTML document. Disallowed tags will be rendered
  2642. * as plain text in the resulting document.
  2643. */
  2644. class MDHTMLTagReader extends MDReader {
  2645. public function readToken(MDState $state, string $line): ?MDToken {
  2646. $tag = MDHTMLTag::fromLineStart($line, $state);
  2647. if ($tag === null) return null;
  2648. if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null;
  2649. $state->root()->tagFilter->scrubTag($tag);
  2650. return new MDToken($tag->original, MDTokenType::HTMLTag, $tag);
  2651. }
  2652. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2653. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) {
  2654. $tag = $match->tokens[0]->tag;
  2655. array_splice($tokens, $match->index, 1, [new MDHTMLTagNode($tag)]);
  2656. return true;
  2657. }
  2658. return false;
  2659. }
  2660. }
  2661. /**
  2662. * Reads tag modifiers. Consists of curly braces with one or more CSS classes,
  2663. * IDs, or custom attributes separated by spaces to apply to the preceding
  2664. * node. Validation is performed on modifiers and only acceptable values are
  2665. * applied.
  2666. */
  2667. class MDModifierReader extends MDReader {
  2668. public function readToken(MDState $state, string $line): ?MDToken {
  2669. $modifier = MDTagModifier::fromStart($line);
  2670. if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier);
  2671. return null;
  2672. }
  2673. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2674. // Modifiers are applied elsewhere, and if they're not it's fine if they're
  2675. // rendered as the original syntax.
  2676. return false;
  2677. }
  2678. }
  2679. // -- Nodes -----------------------------------------------------------------
  2680. /**
  2681. * Base class for nodes in the assembled document tree.
  2682. */
  2683. class MDNode {
  2684. /**
  2685. * Array of CSS classes to add to the node when rendered as HTML.
  2686. * @var string[]
  2687. */
  2688. public array $cssClasses = [];
  2689. public ?string $cssId = null;
  2690. /**
  2691. * Mapping of CSS attributes to values.
  2692. * @var array
  2693. */
  2694. public array $cssStyles = [];
  2695. /**
  2696. * Mapping of arbitrary attributes and values to add to this node's top-level
  2697. * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
  2698. * `cssClasses`, `cssId`, and `cssStyles` instead.
  2699. * @var array
  2700. */
  2701. public array $attributes = [];
  2702. /**
  2703. * All child nodes in this node.
  2704. * @var MDNode[]
  2705. */
  2706. public array $children = [];
  2707. /**
  2708. * @param MDNode[]|MDNode $children
  2709. */
  2710. public function __construct(array|MDNode $children=[]) {
  2711. if (is_array($children)) {
  2712. foreach ($children as $elem) {
  2713. if (!($elem instanceof MDNode)) {
  2714. $thisClassName = MDUtils::typename($this);
  2715. $elemClassName = MDUtils::typename($elem);
  2716. throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got array with {$elemClassName} element");
  2717. }
  2718. }
  2719. $this->children = $children;
  2720. } elseif ($children instanceof MDNode) {
  2721. $this->children = [ $children ];
  2722. } else {
  2723. $thisClassName = MDUtils::typename($this);
  2724. $elemClassName = MDUtils::typename($children);
  2725. throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got {$elemClassName}");
  2726. }
  2727. }
  2728. public function __toString(): string {
  2729. $s = "<" . get_class($this);
  2730. foreach ($this->children as $child) {
  2731. $s .= " {$child}";
  2732. }
  2733. $s .= ">";
  2734. return $s;
  2735. }
  2736. /**
  2737. * Adds a CSS class. If already present it will not be duplicated.
  2738. */
  2739. public function addClass(string $cssClass): bool {
  2740. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  2741. array_push($this->cssClasses, $cssClass);
  2742. return true;
  2743. }
  2744. /**
  2745. * Removes a CSS class.
  2746. *
  2747. * @param {string} cssClass
  2748. * @returns {boolean} whether the class was present and removed
  2749. */
  2750. public function removeClass(string $cssClass): bool {
  2751. $beforeLength = sizeof($this->cssClasses);
  2752. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  2753. return sizeof($this->cssClasses) != $beforeLength;
  2754. }
  2755. /**
  2756. * Renders this node and any children as an HTML string. If the node has no
  2757. * content an empty string should be returned.
  2758. */
  2759. public function toHTML(MDState $state): string {
  2760. return MDNode::arrayToHTML($this->children, $state);
  2761. }
  2762. /**
  2763. * Renders this node and any children as a plain text string. The conversion
  2764. * should only render ordinary text, not attempt markdown-like formatting
  2765. * (e.g. list items should not be prefixed with asterisks, only have their
  2766. * content text returned). If the node has no renderable content an empty
  2767. * string should be returned.
  2768. */
  2769. public function toPlaintext(MDState $state): string {
  2770. return MDNode::arrayToPlaintext($this->children, $state);
  2771. }
  2772. /**
  2773. * Protected helper method that renders an HTML fragment of the attributes
  2774. * to apply to the root HTML tag representation of this node.
  2775. *
  2776. * Example result with a couple `cssClasses`, a `cssId`, and a custom
  2777. * `attributes` key-value pair:
  2778. *
  2779. * ```
  2780. * class="foo bar" id="baz" lang="en"
  2781. * ```
  2782. *
  2783. * The value includes a leading space if it's non-empty so that it can be
  2784. * concatenated directly after the tag name and before the closing `>`.
  2785. */
  2786. protected function htmlAttributes(): string {
  2787. $html = '';
  2788. if (sizeof($this->cssClasses) > 0) {
  2789. $classlist = htmlentities(implode(' ', $this->cssClasses));
  2790. $html .= " class=\"{$classlist}\"";
  2791. }
  2792. if ($this->cssId !== null && mb_strlen($this->cssId) > 0) {
  2793. $html .= " id=\"" . htmlentities($this->cssId) . "\"";
  2794. }
  2795. $styles = [];
  2796. foreach ($this->cssStyles as $key => $value) {
  2797. array_push($styles, "{$key}: {$value};");
  2798. }
  2799. if (sizeof($styles) > 0) {
  2800. $escaped = htmlentities(implode(' ', $styles));
  2801. $html .= " style=\"{$escaped}\"";
  2802. }
  2803. foreach ($this->attributes as $key => $value) {
  2804. if ($key === 'class' || $key === 'id' || $key === 'style') continue;
  2805. $cleanKey = MDUtils::scrubAttributeName($key);
  2806. if (mb_strlen($cleanKey) == 0) continue;
  2807. $cleanValue = htmlentities($value);
  2808. $html .= " {$cleanKey}=\"{$cleanValue}\"";
  2809. }
  2810. return $html;
  2811. }
  2812. /**
  2813. * Protected helper that renders and concatenates the HTML of all children
  2814. * of this node. Mostly for use by subclasses in their `toHTML`
  2815. * implementations.
  2816. */
  2817. protected function childHTML(MDState $state): string {
  2818. return MDNode::arrayToHTML($this->children, $state);
  2819. }
  2820. /**
  2821. * Protected helper that renders and concatenates the plaintext of all
  2822. * children of this node.
  2823. */
  2824. protected function childPlaintext(MDState $state): string {
  2825. return MDNode::arrayToPlaintext($this->children, $state);
  2826. }
  2827. /**
  2828. * Protected helper for rendering nodes represented by simple paired HTML
  2829. * tags. Custom CSS classes and attributes will be included in the result,
  2830. * and child content will be rendered between the tags.
  2831. */
  2832. protected function simplePairedTagHTML(MDState $state, string $tagName): string {
  2833. $openTagSuffix = $this->children[0] instanceof MDBlockNode ? "\n" : "";
  2834. $closeTagPrefix = $this->children[sizeof($this->children) - 1] instanceof MDBlockNode ? "\n" : '';
  2835. $closeTagSuffix = $this instanceof MDBlockNode ? "\n" : '';
  2836. $attr = $this->htmlAttributes();
  2837. $childHTML = $this->childHTML($state);
  2838. return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}</{$tagName}>{$closeTagSuffix}";
  2839. }
  2840. /**
  2841. * Calls the given callback function with every child node, recursively.
  2842. * Nodes are visited depth-first.
  2843. */
  2844. public function visitChildren(callable $fn) {
  2845. foreach ($this->children as $child) {
  2846. $fn($child);
  2847. $child->visitChildren($fn);
  2848. }
  2849. }
  2850. /**
  2851. * Helper for rendering and concatenating HTML from an array of `MDNode`s.
  2852. *
  2853. * @param MDNode[] $nodes
  2854. * @param MDState $state
  2855. * @return string HTML string
  2856. */
  2857. public static function arrayToHTML(array $nodes, MDState $state): string {
  2858. return implode('', array_map(function($node) use ($state) {
  2859. return $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : '');
  2860. }, $nodes));
  2861. }
  2862. /**
  2863. * Helper for rendering and concatenating plaintext from an array of `MDNode`s.
  2864. *
  2865. * @param MDNode[] $nodes
  2866. * @param MDState $state
  2867. * @return string plaintext
  2868. */
  2869. public static function arrayToPlaintext(array $nodes, MDState $state): string {
  2870. return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes));
  2871. }
  2872. /**
  2873. * Recursively searches and replaces nodes in a tree. The given `replacer`
  2874. * is passed every node in the tree. If `replacer` returns a new `MDNode`
  2875. * the original will be replaced with it. If the function returns `null` no
  2876. * change will be made to that node. Traversal is depth-first.
  2877. *
  2878. * @param {MDState} state
  2879. * @param {MDNode[]} nodes
  2880. * @param {function} replacer - takes a node as an argument, returns either
  2881. * a new node or `null` to leave it unchanged
  2882. */
  2883. public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) {
  2884. for ($i = 0; $i < sizeof($nodes); $i++) {
  2885. $originalNode = $nodes[$i];
  2886. $replacement = $replacer($originalNode);
  2887. if ($replacement instanceof MDNode) {
  2888. array_splice($nodes, $i, 1, [$replacement]);
  2889. } else {
  2890. self::replaceNodes($state, $originalNode->children, $replacer);
  2891. }
  2892. }
  2893. }
  2894. }
  2895. /**
  2896. * Marker subclass that indicates a node represents block syntax.
  2897. */
  2898. class MDBlockNode extends MDNode {}
  2899. /**
  2900. * Paragraph block.
  2901. */
  2902. class MDParagraphNode extends MDBlockNode {
  2903. public function toHTML(MDState $state): string {
  2904. return $this->simplePairedTagHTML($state, 'p');
  2905. }
  2906. }
  2907. /**
  2908. * A heading block with a level from 1 to 6.
  2909. */
  2910. class MDHeadingNode extends MDBlockNode {
  2911. public int $level;
  2912. public function __construct(int $level, array $children) {
  2913. parent::__construct($children);
  2914. if (!is_int($level) || ($level < 1 || $level > 6)) {
  2915. $thisClassName = MDUtils::typename($this);
  2916. throw new Error("{$thisClassName} requires heading level 1 to 6");
  2917. }
  2918. $this->level = $level;
  2919. }
  2920. public function toHTML(MDState $state): string {
  2921. return $this->simplePairedTagHTML($state, "h{$this->level}");
  2922. }
  2923. }
  2924. /**
  2925. * A sub-text block with smaller, less prominent text.
  2926. */
  2927. class MDSubtextNode extends MDBlockNode {
  2928. public function toHTML(MDState $state): string {
  2929. $this->addClass('subtext');
  2930. return $this->simplePairedTagHTML($state, 'div');
  2931. }
  2932. }
  2933. /**
  2934. * Node for a horizontal dividing line.
  2935. */
  2936. class MDHorizontalRuleNode extends MDBlockNode {
  2937. public function toHTML(MDState $state): string {
  2938. return "<hr" . $this->htmlAttributes() . ">";
  2939. }
  2940. }
  2941. /**
  2942. * A block quote, usually rendered indented from other text.
  2943. */
  2944. class MDBlockquoteNode extends MDBlockNode {
  2945. public function toHTML(MDState $state): string {
  2946. return $this->simplePairedTagHTML($state, 'blockquote');
  2947. }
  2948. }
  2949. /**
  2950. * A bulleted list. Contains `MDListItemNode` children.
  2951. */
  2952. class MDUnorderedListNode extends MDBlockNode {
  2953. /** @var MDListItemNode[] $children */
  2954. public function toHTML(MDState $state): string {
  2955. return $this->simplePairedTagHTML($state, 'ul');
  2956. }
  2957. }
  2958. /**
  2959. * A numbered list. Contains `MDListItemNode` children.
  2960. */
  2961. class MDOrderedListNode extends MDBlockNode {
  2962. /** @var MDListItemNode[] $children */
  2963. public ?int $startOrdinal;
  2964. /**
  2965. * @param MDListItemNode[] $children
  2966. * @param ?int $startOrdinal
  2967. */
  2968. public function __construct(array $children, ?int $startOrdinal=null) {
  2969. parent::__construct($children);
  2970. $this->startOrdinal = $startOrdinal;
  2971. }
  2972. public function toHTML(MDState $state): string {
  2973. if ($this->startOrdinal !== null && $this->startOrdinal != 1) {
  2974. $this->attributes['start'] = strval($this->startOrdinal);
  2975. }
  2976. return $this->simplePairedTagHTML($state, 'ol');
  2977. }
  2978. }
  2979. /**
  2980. * An item in a bulleted or numbered list.
  2981. */
  2982. class MDListItemNode extends MDBlockNode {
  2983. public ?int $ordinal;
  2984. /**
  2985. * @param MDNode|MDNode[] $children
  2986. * @param ?int $ordinal
  2987. */
  2988. public function __construct(array|MDNode $children, ?int $ordinal=null) {
  2989. parent::__construct($children);
  2990. $this->ordinal = $ordinal;
  2991. }
  2992. public function toHTML(MDState $state): string {
  2993. return $this->simplePairedTagHTML($state, 'li');
  2994. }
  2995. }
  2996. /**
  2997. * A block of preformatted computer code. Inner markdown is ignored.
  2998. */
  2999. class MDCodeBlockNode extends MDBlockNode {
  3000. public string $text;
  3001. /**
  3002. * The programming language of the content.
  3003. */
  3004. public ?string $language;
  3005. public function __construct(string $text, ?string $language=null) {
  3006. parent::__construct([]);
  3007. $this->text = $text;
  3008. $this->language = $language;
  3009. }
  3010. public function toHTML(MDState $state): string {
  3011. $languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : '';
  3012. return "<pre" . $this->htmlAttributes() . "><code{$languageModifier}>" .
  3013. htmlentities($this->text) . "</code></pre>\n";
  3014. }
  3015. }
  3016. /**
  3017. * A table node with a single header row and any number of body rows.
  3018. *
  3019. * If modifying the rows, use the `headerRow` and `bodyRows` accessors,
  3020. * otherwise `children` may get out of sync.
  3021. */
  3022. class MDTableNode extends MDBlockNode {
  3023. /** @var MDTableRowNode[] $children */
  3024. public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; }
  3025. public function bodyRows(): array { return array_slice($this->children, 1); }
  3026. /**
  3027. * How to align each column. Columns beyond the length of the array or with
  3028. * corresponding `null` elements will have no alignment set. Values should
  3029. * be valid CSS `text-align` values.
  3030. *
  3031. * @var string[]
  3032. */
  3033. public array $columnAlignments = [];
  3034. /**
  3035. * @param MDTableRowNode $headerRow
  3036. * @param MDTableRowNode[] $bodyRows
  3037. */
  3038. public function __construct(MDTableRowNode $headerRow, array $bodyRows) {
  3039. parent::__construct(array_merge([ $headerRow ], $bodyRows));
  3040. }
  3041. public function applyAlignments() {
  3042. foreach ($this->children as $child) {
  3043. $this->applyAlignmentsToRow($child);
  3044. }
  3045. }
  3046. private function applyAlignmentsToRow(MDTableRowNode $row) {
  3047. foreach ($row->children as $columnIndex => $cell) {
  3048. $alignment = $this->columnAlignments[$columnIndex] ?? null;
  3049. $this->applyAlignmentToCell($cell, $alignment);
  3050. }
  3051. }
  3052. public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) {
  3053. if ($alignment) {
  3054. $cell->cssStyles['text-align'] = $alignment;
  3055. } else {
  3056. unset($cell->cssStyles['text-align']);
  3057. }
  3058. }
  3059. public function toHTML(MDState $state): string {
  3060. $this->applyAlignments();
  3061. $html = '';
  3062. $html .= "<table" . $this->htmlAttributes() . ">\n";
  3063. $html .= "<thead>\n";
  3064. $html .= $this->headerRow()->toHTML($state) . "\n";
  3065. $html .= "</thead>\n";
  3066. $html .= "<tbody>\n";
  3067. $html .= MDNode::arrayToHTML($this->bodyRows(), $state) . "\n";
  3068. $html .= "</tbody>\n";
  3069. $html .= "</table>\n";
  3070. return $html;
  3071. }
  3072. }
  3073. /**
  3074. * Node for one row (header or body) in a table.
  3075. */
  3076. class MDTableRowNode extends MDBlockNode {
  3077. /** @var MDTableCellNode[] $children */
  3078. public function toHTML(MDState $state): string {
  3079. return $this->simplePairedTagHTML($state, 'tr');
  3080. }
  3081. }
  3082. /**
  3083. * Node for one cell in a table row.
  3084. */
  3085. class MDTableCellNode extends MDBlockNode {
  3086. public function toHTML(MDState $state): string {
  3087. return $this->simplePairedTagHTML($state, 'td');
  3088. }
  3089. }
  3090. /**
  3091. * Node for a header cell in a header table row.
  3092. */
  3093. class MDTableHeaderCellNode extends MDTableCellNode {
  3094. public function toHTML(MDState $state): string {
  3095. return $this->simplePairedTagHTML($state, 'th');
  3096. }
  3097. }
  3098. /**
  3099. * Definition list with `MDDefinitionListTermNode` and
  3100. * `MDDefinitionListDefinitionNode` children.
  3101. */
  3102. class MDDefinitionListNode extends MDBlockNode {
  3103. public function toHTML(MDState $state): string {
  3104. return $this->simplePairedTagHTML($state, 'dl');
  3105. }
  3106. }
  3107. /**
  3108. * A word or term in a definition list.
  3109. */
  3110. class MDDefinitionListTermNode extends MDBlockNode {
  3111. public function toHTML(MDState $state): string {
  3112. return $this->simplePairedTagHTML($state, 'dt');
  3113. }
  3114. }
  3115. /**
  3116. * The definition of a word or term in a definition list. Should follow a
  3117. * definition term, or another definition to serve as an alternate.
  3118. */
  3119. class MDDefinitionListDefinitionNode extends MDBlockNode {
  3120. public function toHTML(MDState $state): string {
  3121. return $this->simplePairedTagHTML($state, 'dd');
  3122. }
  3123. }
  3124. /**
  3125. * Block at the bottom of a document listing all the footnotes with their
  3126. * content.
  3127. */
  3128. class MDFootnoteListNode extends MDBlockNode {
  3129. private function footnoteId(MDState $state, string $symbol): ?int {
  3130. $lookup = $state->root()->userInfo['footnoteIds'];
  3131. if (!$lookup) return null;
  3132. return $lookup[$symbol] ?? null;
  3133. }
  3134. public function toHTML(MDState $state): string {
  3135. $footnotes = $state->root()->userInfo['footnotes'];
  3136. $symbolOrder = array_keys($footnotes);
  3137. if (sizeof($footnotes) == 0) return '';
  3138. $footnoteUniques = $state->root()->userInfo['footnoteInstances'];
  3139. $html = '';
  3140. $html .= '<div class="footnotes">';
  3141. $html .= '<ol>';
  3142. foreach ($symbolOrder as $symbolRaw) {
  3143. $symbol = "{$symbolRaw}";
  3144. $content = $footnotes[$symbol];
  3145. if (!$content) continue;
  3146. $footnoteId = $this->footnoteId($state, $symbol);
  3147. $contentHTML = MDNode::arrayToHTML($content, $state);
  3148. $html .= "<li value=\"{$footnoteId}\" id=\"{$state->root()->elementIdPrefix}footnote_{$footnoteId}\">{$contentHTML}";
  3149. $uniques = $footnoteUniques[$symbol] ?? null;
  3150. if ($uniques) {
  3151. foreach ($uniques as $unique) {
  3152. $html .= " <a href=\"#{$state->root()->elementIdPrefix}footnoteref_{$unique}\" class=\"footnote-backref\">↩︎</a>";
  3153. }
  3154. }
  3155. $html .= "</li>\n";
  3156. }
  3157. $html .= '</ol>';
  3158. $html .= '</div>';
  3159. return $html;
  3160. }
  3161. public function toPlaintext(MDState $state): string {
  3162. $footnotes = $state->userInfo['footnotes'];
  3163. $symbolOrder = array_keys($footnotes);
  3164. if (sizeof($footnotes) == 0) return '';
  3165. $text = '';
  3166. foreach ($symbolOrder as $symbolRaw) {
  3167. $symbol = "{$symbolRaw}";
  3168. $content = $footnotes[$symbol];
  3169. if (!$content) continue;
  3170. $text .= "{$symbol}. " . $this->childPlaintext(state) . "\n";
  3171. }
  3172. return trim($text);
  3173. }
  3174. }
  3175. /**
  3176. * Marker subclass that indicates a node represents inline syntax.
  3177. */
  3178. class MDInlineNode extends MDNode {}
  3179. /**
  3180. * Contains plain text. Special HTML characters are escaped when rendered.
  3181. */
  3182. class MDTextNode extends MDInlineNode {
  3183. public string $text;
  3184. public function __construct(string $text) {
  3185. parent::__construct([]);
  3186. $this->text = $text;
  3187. }
  3188. public function toHTML(MDState $state): string {
  3189. return htmlentities($this->text);
  3190. }
  3191. public function toPlaintext(MDState $state): string {
  3192. return $this->text;
  3193. }
  3194. }
  3195. /**
  3196. * Contains plain text which is rendered with HTML entities when rendered to
  3197. * be marginally more difficult for web scapers to decipher. Used for
  3198. * semi-sensitive info like email addresses.
  3199. */
  3200. class MDObfuscatedTextNode extends MDTextNode {
  3201. public function toHTML(MDState $state): string {
  3202. return MDUtils::escapeObfuscated($this->text);
  3203. }
  3204. }
  3205. /**
  3206. * Emphasized (italicized) content.
  3207. */
  3208. class MDEmphasisNode extends MDInlineNode {
  3209. public function toHTML(MDState $state): string {
  3210. return $this->simplePairedTagHTML($state, 'em');
  3211. }
  3212. }
  3213. /**
  3214. * Strong (bold) content.
  3215. */
  3216. class MDStrongNode extends MDInlineNode {
  3217. public function toHTML(MDState $state): string {
  3218. return $this->simplePairedTagHTML($state, 'strong');
  3219. }
  3220. }
  3221. /**
  3222. * Content rendered with a line through it.
  3223. */
  3224. class MDStrikethroughNode extends MDInlineNode {
  3225. public function toHTML(MDState $state): string {
  3226. return $this->simplePairedTagHTML($state, 's');
  3227. }
  3228. }
  3229. /**
  3230. * Underlined content.
  3231. */
  3232. class MDUnderlineNode extends MDInlineNode {
  3233. public function toHTML(MDState $state): string {
  3234. return $this->simplePairedTagHTML($state, 'u');
  3235. }
  3236. }
  3237. /**
  3238. * Highlighted content. Usually rendered with a bright colored background.
  3239. */
  3240. class MDHighlightNode extends MDInlineNode {
  3241. public function toHTML(MDState $state): string {
  3242. return $this->simplePairedTagHTML($state, 'mark');
  3243. }
  3244. }
  3245. /**
  3246. * Superscripted content.
  3247. */
  3248. class MDSuperscriptNode extends MDInlineNode {
  3249. public function toHTML(MDState $state): string {
  3250. return $this->simplePairedTagHTML($state, 'sup');
  3251. }
  3252. }
  3253. /**
  3254. * Subscripted content.
  3255. */
  3256. class MDSubscriptNode extends MDInlineNode {
  3257. public function toHTML(MDState $state): string {
  3258. return $this->simplePairedTagHTML($state, 'sub');
  3259. }
  3260. }
  3261. /**
  3262. * Inline plaintext indicating computer code.
  3263. */
  3264. class MDCodeNode extends MDInlineNode {
  3265. public string $text;
  3266. public function __construct(string $text) {
  3267. parent::__construct([]);
  3268. $this->text = $text;
  3269. }
  3270. public function toHTML(MDState $state): string {
  3271. return "<code" . $this->htmlAttributes() . ">" . htmlentities($this->text) . "</code>";
  3272. }
  3273. }
  3274. /**
  3275. * A footnote symbol in a document. Denoted as a superscripted number that can
  3276. * be clicked to go to its content at the bottom of the document.
  3277. */
  3278. class MDFootnoteNode extends MDInlineNode {
  3279. /**
  3280. * Symbol the author used to match up the footnote to its content definition.
  3281. */
  3282. public string $symbol;
  3283. /**
  3284. * The superscript symbol rendered in HTML. May be the same or different
  3285. * than `symbol`.
  3286. */
  3287. public ?string $displaySymbol = null;
  3288. /**
  3289. * Unique ID for the footnote definition.
  3290. */
  3291. public ?int $footnoteId = null;
  3292. /**
  3293. * Unique number for backlinking to a footnote occurrence. Populated by
  3294. * `MDFootnoteReader.postProcess`.
  3295. */
  3296. public ?int $occurrenceId = null;
  3297. public function __construct(string $symbol, ?string $title=null) {
  3298. parent::__construct([]);
  3299. $this->symbol = $symbol;
  3300. if ($title) $this->attributes['title'] = $title;
  3301. }
  3302. public function toHTML(MDState $state): string {
  3303. if ($this->footnoteId !== null) {
  3304. return "<sup class=\"footnote\" id=\"{$state->root()->elementIdPrefix}footnoteref_{$this->occurrenceId}\"" . $this->htmlAttributes() . ">" .
  3305. "<a href=\"#{$state->root()->elementIdPrefix}footnote_{$this->footnoteId}\">" . htmlentities($this->displaySymbol ?? $this->symbol) . "</a></sup>";
  3306. }
  3307. return "<!--FNREF:{{$this->symbol}}-->";
  3308. }
  3309. }
  3310. /**
  3311. * A clickable hypertext link.
  3312. */
  3313. class MDLinkNode extends MDInlineNode {
  3314. public string $href;
  3315. /**
  3316. * @param string $href
  3317. * @param MDNode[]|MDNode $children
  3318. */
  3319. public function __construct(string $href, array|MDNode $children, ?string $title=null) {
  3320. parent::__construct($children);
  3321. $this->href = $href;
  3322. if ($title !== null) $this->attributes['title'] = $title;
  3323. }
  3324. public function toHTML(MDState $state): string {
  3325. if (str_starts_with($this->href, 'mailto:')) {
  3326. $escapedLink = MDUtils::escapeObfuscated($this->href);
  3327. } else {
  3328. $escapedLink = htmlentities($this->href);
  3329. }
  3330. return "<a href=\"{$escapedLink}\"" . $this->htmlAttributes() . ">" . $this->childHTML($state) . "</a>";
  3331. }
  3332. }
  3333. /**
  3334. * A clickable hypertext link where the URL is defined elsewhere by reference.
  3335. */
  3336. class MDReferencedLinkNode extends MDLinkNode {
  3337. public string $reference;
  3338. public function __construct(string $reference, array|MDNode $children) {
  3339. parent::__construct('', $children);
  3340. $this->reference = $reference;
  3341. }
  3342. public function toHTML(MDState $state): string {
  3343. if ($this->href === '') {
  3344. $url = $state->urlForReference($this->reference);
  3345. if ($url) $this->href = $url;
  3346. $title = $state->urlTitleForReference($this->reference);
  3347. if ($title) $this->attributes['title'] = $title;
  3348. }
  3349. return parent::toHTML($state);
  3350. }
  3351. }
  3352. /**
  3353. * An inline image.
  3354. */
  3355. class MDImageNode extends MDInlineNode {
  3356. public string $src;
  3357. public ?string $alt;
  3358. public function __construct(string $src, ?string $alt) {
  3359. parent::__construct([]);
  3360. $this->src = $src;
  3361. $this->alt = $alt;
  3362. }
  3363. public function toHTML(MDState $state): string {
  3364. $html = "<img src=\"" . htmlentities($this->src) . "\"";
  3365. if ($this->alt) $html .= " alt=\"" . htmlentities($this->alt) . "\"";
  3366. $html .= $this->htmlAttributes() . ">";
  3367. return $html;
  3368. }
  3369. }
  3370. /**
  3371. * An inline image where the URL is defined elsewhere by reference.
  3372. */
  3373. class MDReferencedImageNode extends MDImageNode {
  3374. public string $reference;
  3375. public function __construct(string $reference, ?string $alt=null) {
  3376. parent::__construct('', $alt, []);
  3377. $this->reference = $reference;
  3378. }
  3379. public function toHTML(MDState $state): string {
  3380. if ($this->src === '') {
  3381. $url = $state->urlForReference($this->reference);
  3382. if ($url !== null) $this->src = $url;
  3383. $title = $state->urlTitleForReference($this->reference);
  3384. if ($title !== null) $this->attributes['title'] = $title;
  3385. }
  3386. return parent::toHTML($state);
  3387. }
  3388. }
  3389. /**
  3390. * An abbreviation that can be hovered over to see its full expansion.
  3391. */
  3392. class MDAbbreviationNode extends MDInlineNode {
  3393. /** @type {string} */
  3394. public string $abbreviation;
  3395. /**
  3396. * @param {string} abbreviation
  3397. * @param {string} definition
  3398. */
  3399. public function __construct(string $abbreviation, string $definition) {
  3400. parent::__construct([]);
  3401. $this->abbreviation = $abbreviation;
  3402. $this->attributes['title'] = $definition;
  3403. }
  3404. public function toHTML(MDState $state): string {
  3405. return "<abbr" . $this->htmlAttributes() . ">" . htmlentities($this->abbreviation) . "</abbr>";
  3406. }
  3407. }
  3408. /**
  3409. * A line break that is preserved when rendered to HTML.
  3410. */
  3411. class MDLineBreakNode extends MDInlineNode {
  3412. public function toHTML(MDState $state): string {
  3413. return '<br>';
  3414. }
  3415. public function toPlaintext(MDState $state): string {
  3416. return "\n";
  3417. }
  3418. }
  3419. /**
  3420. * A verbatim HTML tag. May be altered to strip out disallowed attributes or
  3421. * CSS values.
  3422. */
  3423. class MDHTMLTagNode extends MDInlineNode {
  3424. public MDHTMLTag $tag;
  3425. public function __construct(MDHTMLTag $tag) {
  3426. parent::__construct([]);
  3427. $this->tag = $tag;
  3428. }
  3429. public function toHTML(MDState $state): string {
  3430. return "{$this->tag}";
  3431. }
  3432. }
  3433. // -- Main class ------------------------------------------------------------
  3434. /**
  3435. * Markdown parser.
  3436. */
  3437. class Markdown {
  3438. /**
  3439. * Set of standard readers to handle common syntax.
  3440. * @type {MDReader[]}
  3441. */
  3442. public static function standardReaders(): array {
  3443. if (self::$sharedStandardReaders === null) {
  3444. self::$sharedStandardReaders = [
  3445. new MDUnderlinedHeadingReader(),
  3446. new MDHashHeadingReader(),
  3447. new MDBlockQuoteReader(),
  3448. new MDHorizontalRuleReader(),
  3449. new MDUnorderedListReader(),
  3450. new MDOrderedListReader(),
  3451. new MDFencedCodeBlockReader(),
  3452. new MDIndentedCodeBlockReader(),
  3453. new MDParagraphReader(),
  3454. new MDStrongReader(),
  3455. new MDEmphasisReader(),
  3456. new MDCodeSpanReader(),
  3457. new MDImageReader(),
  3458. new MDLinkReader(),
  3459. new MDHTMLTagReader(),
  3460. ];
  3461. }
  3462. return self::$sharedStandardReaders;
  3463. }
  3464. private static ?array $sharedStandardReaders = null;
  3465. /**
  3466. * All supported readers except `MDLineBreakReader`.
  3467. * @type {MDReader[]}
  3468. */
  3469. public static function allReaders(): array {
  3470. if (self::$sharedAllReaders === null) {
  3471. $sharedAllReaders = array_merge(self::standardReaders(), [
  3472. new MDSubtextReader(),
  3473. new MDTableReader(),
  3474. new MDDefinitionListReader(),
  3475. new MDFootnoteReader(),
  3476. new MDAbbreviationReader(),
  3477. new MDUnderlineReader(),
  3478. new MDSubscriptReader(),
  3479. new MDStrikethroughReader(),
  3480. new MDHighlightReader(),
  3481. new MDSuperscriptReader(),
  3482. new MDReferencedImageReader(),
  3483. new MDReferencedLinkReader(),
  3484. new MDModifierReader(),
  3485. ]);
  3486. }
  3487. return $sharedAllReaders;
  3488. }
  3489. private static ?array $sharedAllReaders = null;
  3490. /**
  3491. * Shared instance of a parser with standard syntax.
  3492. */
  3493. public static function standardParser(): Markdown {
  3494. if (self::$sharedStandardMarkdown === null) {
  3495. self::$sharedStandardMarkdown = new Markdown(self::standardReaders());
  3496. }
  3497. return self::$sharedStandardMarkdown;
  3498. }
  3499. private static ?Markdown $sharedStandardMarkdown = null;
  3500. /**
  3501. * Shared instance of a parser with all supported syntax.
  3502. */
  3503. public static function completeParser(): Markdown {
  3504. if (self::$sharedCompleteParser === null) {
  3505. self::$sharedCompleteParser = new Markdown(self::allReaders());
  3506. }
  3507. return self::$sharedCompleteParser;
  3508. }
  3509. public static ?Markdown $sharedCompleteParser = null;
  3510. /**
  3511. * Filter for what non-markdown HTML is permitted. HTML generated as a
  3512. * result of markdown is unaffected.
  3513. */
  3514. public MDHTMLFilter $tagFilter;
  3515. /** @var MDReader[] */
  3516. private array $readers;
  3517. /** @var MDReader[] */
  3518. private array $readersByBlockPriority;
  3519. /** @var MDReader[] */
  3520. private array $readersByTokenPriority;
  3521. private array $readersBySubstitutePriority;
  3522. /**
  3523. * Creates a Markdown parser with the given syntax readers.
  3524. *
  3525. * @param MDReader[] $readers
  3526. */
  3527. public function __construct(?array $readers=null) {
  3528. $this->readers = $readers ?? self::allReaders();
  3529. $this->readersByBlockPriority = MDReader::sortReaderForBlocks($this->readers);
  3530. $this->readersByTokenPriority = MDReader::sortReadersForTokenizing($this->readers);
  3531. $this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($this->readers);
  3532. $this->tagFilter = new MDHTMLFilter();
  3533. }
  3534. /**
  3535. * Converts a markdown string to an HTML string.
  3536. *
  3537. * @param string $markdown
  3538. * @param string $elementIdPrefix Optional prefix for generated element
  3539. * `id`s and links to them. For differentiating multiple markdown docs in
  3540. * the same HTML page.
  3541. * @return string HTML
  3542. */
  3543. public function toHTML(string $markdown, string $elementIdPrefix='') {
  3544. $lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown);
  3545. try {
  3546. return $this->parse($lines, $elementIdPrefix);
  3547. } catch (Error $e) {
  3548. $this->investigateException($lines, $elementIdPrefix);
  3549. throw $e;
  3550. }
  3551. }
  3552. /**
  3553. * @param string[] $lines
  3554. * @param string $elementIdPrefix
  3555. */
  3556. private function parse(array $lines, string $elementIdPrefix) {
  3557. $state = new MDState($lines);
  3558. $state->readersByBlockPriority = $this->readersByBlockPriority;
  3559. $state->readersByTokenPriority = $this->readersByTokenPriority;
  3560. $state->readersBySubstitutePriority = $this->readersBySubstitutePriority;
  3561. $state->tagFilter = $this->tagFilter;
  3562. $state->elementIdPrefix = $elementIdPrefix;
  3563. foreach ($this->readers as $reader) {
  3564. $reader->preProcess($state);
  3565. }
  3566. $nodes = $state->readBlocks();
  3567. foreach ($this->readers as $reader) {
  3568. $reader->postProcess($state, $nodes);
  3569. }
  3570. return MDNode::arrayToHTML($nodes, $state);
  3571. }
  3572. /**
  3573. * Keeps removing first and last lines of markdown to locate the source of
  3574. * an exception and prints the minimal snippet.
  3575. *
  3576. * @param string[] $lines
  3577. * @param string $elementIdPrefix
  3578. */
  3579. private function investigateException(array $lines, string $elementIdPrefix) {
  3580. print("Investigating error...\n");
  3581. $startIndex = 0;
  3582. $endIndex = sizeof($lines);
  3583. // Keep stripping away first line until an exception stops being thrown
  3584. for ($i = 0; $i < sizeof($lines); $i++) {
  3585. try {
  3586. $this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix);
  3587. break;
  3588. } catch (Error $e0) {
  3589. $startIndex = $i;
  3590. }
  3591. }
  3592. // Keep stripping away last line until an exception stops being thrown
  3593. for ($i = sizeof($lines); $i > $startIndex; $i--) {
  3594. try {
  3595. $this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix);
  3596. break;
  3597. } catch (Error $e0) {
  3598. $endIndex = $i;
  3599. }
  3600. }
  3601. $problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex));
  3602. print("This portion of markdown caused an unexpected exception:\n{$problematicMarkdown}\n");
  3603. }
  3604. }
  3605. ?>