PHP and Javascript implementations of a simple markdown parser
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

markdown.php 124KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Static utilities.
  5. */
  6. class MDUtils {
  7. // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
  8. public const baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
  9. // Modified from https://emailregex.com/ to remove capture groups.
  10. public const baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
  11. /**
  12. * Escapes special HTML characters.
  13. *
  14. * @param string $str string to escape
  15. * @param bool $encodeNewlinesAsBreaks whether to convert newline characters to `<br>` tags
  16. * @return string escaped HTML
  17. */
  18. public static function escapeHTML($str, $encodeNewlinesAsBreaks=false) {
  19. if (!is_string($str)) return '';
  20. $html = $str;
  21. $html = mb_ereg_replace('&', '&amp;', $html);
  22. $html = mb_ereg_replace('<', '&lt;', $html);
  23. $html = mb_ereg_replace('>', '&gt;', $html);
  24. $html = mb_ereg_replace('"', '&quot;', $html);
  25. if ($encodeNewlinesAsBreaks) {
  26. $html = str_replace("\n", "<br>\n", $html);
  27. }
  28. return $html;
  29. }
  30. /**
  31. * Encodes characters as HTML numeric entities to make it marginally more
  32. * difficult for web scrapers to grab sensitive info. If `$text` starts with
  33. * `mailto:` only the email address following it will be obfuscated.
  34. */
  35. public static function escapeObfuscated(string $text): string {
  36. if (str_starts_with($text, 'mailto:')) {
  37. return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
  38. }
  39. $html = '';
  40. $l = mb_strlen($text);
  41. for ($p = 0; $p < $l; $p++) {
  42. $cp = mb_ord(mb_substr($text, $p, 1));
  43. $html .= "&#{$cp};";
  44. }
  45. return $html;
  46. }
  47. /**
  48. * Removes illegal characters from an HTML attribute name.
  49. */
  50. public static function scrubAttributeName(string $name): string {
  51. return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
  52. }
  53. /**
  54. * Strips one or more leading indents from a line or lines of markdown. An
  55. * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
  56. * spaces) are treated like one indent level.
  57. *
  58. * @param string|string[] $line
  59. * @param int $levels
  60. * @return string|string[]
  61. */
  62. public static function stripIndent(string|array $line, int $levels=1): string|array {
  63. $regex = "^(?: {1,4}|\\t){{$levels}}";
  64. return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
  65. }
  66. /**
  67. * Counts the number of indent levels in a line of text. Partial indents
  68. * (1 to 3 spaces) are counted as one indent level unless `$fullIndentsOnly`
  69. * is `true`.
  70. */
  71. public static function countIndents(string $line, bool $fullIndentsOnly=false): int {
  72. // normalize indents to tabs
  73. $t = mb_ereg_replace($fullIndentsOnly ? '(?: {4}|\\t)' : '(?: {1,4}|\\t)', "\t", $line);
  74. // remove content after indent
  75. $t = mb_ereg_replace('^(\\t*)(.*?)$', '\\1', $t);
  76. // count tabs
  77. return mb_strlen($t);
  78. }
  79. /**
  80. * Returns a copy of an array without any whitespace-only lines at the end.
  81. *
  82. * @param string[] $lines
  83. * @return string[]
  84. */
  85. public static function withoutTrailingBlankLines(array $lines): array {
  86. $stripped = $lines;
  87. while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
  88. array_pop($stripped);
  89. }
  90. return $stripped;
  91. }
  92. /**
  93. * Tests if an array of lines contains at least one blank. A blank line
  94. * can contain whitespace.
  95. *
  96. * @param string[] $lines
  97. */
  98. public static function containsBlankLine(array $lines): bool {
  99. foreach ($lines as $line) {
  100. if (mb_strlen(trim($line)) == 0) return true;
  101. }
  102. return false;
  103. }
  104. /**
  105. * Returns a type or class name of a value.
  106. *
  107. * @param mixed $value
  108. * @return string
  109. */
  110. public static function typename($value): string {
  111. $tn = gettype($value);
  112. return ($tn === 'object') ? get_class($value) : $tn;
  113. }
  114. }
  115. /**
  116. * Token type enum for `MDToken`.
  117. */
  118. enum MDTokenType {
  119. case Text;
  120. /**
  121. * Only used for the leading and trailing whitespace around a run of text,
  122. * not every single whitespace character.
  123. */
  124. case Whitespace;
  125. case Underscore;
  126. case Asterisk;
  127. case Slash;
  128. case Tilde;
  129. case Bang;
  130. case Backtick;
  131. case Equal;
  132. case Caret;
  133. case Label; // content=label
  134. case URL; // content=URL, extra=title
  135. case Email; // content=email address, extra=title
  136. case SimpleLink; // content=URL
  137. case SimpleEmail; // content=email address
  138. case Footnote; // content=symbol
  139. case Modifier; // modifier=MDTagModifier
  140. case HTMLTag; // tag=MDHTMLTag
  141. /** Wildcard for `MDToken::findFirstTokens` */
  142. case META_AnyNonWhitespace;
  143. /** Wildcard for `MDToken::findFirstTokens` */
  144. case META_OptionalWhitespace;
  145. }
  146. /**
  147. * Search results from `MDToken.findFirstTokens`.
  148. */
  149. class MDTokenMatch {
  150. /** @var MDToken[] */
  151. public array $tokens;
  152. public int $index;
  153. /**
  154. * @param MDToken[] $tokens
  155. * @param int $index
  156. */
  157. public function __construct(array $tokens, int $index) {
  158. $this->tokens = $tokens;
  159. $this->index = $index;
  160. }
  161. }
  162. /**
  163. * Search results from `MDToken.findPairedTokens`.
  164. */
  165. class MDPairedTokenMatch {
  166. /** @var MDToken[] */
  167. public array $startTokens;
  168. /** @var MDToken[] */
  169. public array $contentTokens;
  170. /** @var MDToken[] */
  171. public array $endTokens;
  172. public int $startIndex;
  173. public int $contentIndex;
  174. public int $endIndex;
  175. public int $totalLength;
  176. public function __construct(array $startTokens, array $contentTokens,
  177. array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
  178. int $totalLength) {
  179. $this->startTokens = $startTokens;
  180. $this->contentTokens = $contentTokens;
  181. $this->endTokens = $endTokens;
  182. $this->startIndex = $startIndex;
  183. $this->contentIndex = $contentIndex;
  184. $this->endIndex = $endIndex;
  185. $this->totalLength = $totalLength;
  186. }
  187. }
  188. /**
  189. * One lexical unit in inline markdown syntax parsing.
  190. */
  191. class MDToken {
  192. /**
  193. * The original verbatim token string. Required as a plaintext fallback if
  194. * the token remains unresolved.
  195. */
  196. public string $original;
  197. public MDTokenType $type;
  198. public ?string $content = null;
  199. public ?string $extra = null;
  200. public ?MDHTMLTag $tag = null;
  201. public ?MDTagModifier $modifier = null;
  202. /**
  203. * Creates a token.
  204. *
  205. * @param string $original verbatim token string
  206. * @param MDTokenType $type token type
  207. * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
  208. * the token
  209. * @param string|null $extra additional content
  210. */
  211. public function __construct(string $original, MDTokenType $type,
  212. string|MDTagModifier|MDHTMLTag|null $content=null,
  213. ?string $extra=null) {
  214. $this->original = $original;
  215. $this->type = $type;
  216. if ($content instanceof MDTagModifier) {
  217. $this->modifier = $content;
  218. } elseif ($content instanceof MDHTMLTag) {
  219. $this->tag = $content;
  220. } else {
  221. $this->content = $content;
  222. }
  223. $this->extra = $extra;
  224. }
  225. public function __toString(): string {
  226. return "<{" . MDUtils::typename($this) . " type={$this->type->name} " .
  227. "content=\"{$this->content}\">";
  228. }
  229. /**
  230. * Attempts to parse a label token from the beginning of `$line`. A label is
  231. * of the form `[content]`. If found, returns an array:
  232. * - `0`: the entire label including brackets
  233. * - `1`: the content of the label
  234. *
  235. * @param string $line
  236. * @return ?string[] match groups or null if not found
  237. */
  238. public static function tokenizeLabel(string $line): ?array {
  239. if (!str_starts_with($line, '[')) return null;
  240. $parenCount = 0;
  241. $bracketCount = 0;
  242. $l = mb_strlen($line);
  243. for ($p = 1; $p < $l; $p++) {
  244. $ch = mb_substr($line, $p, 1);
  245. if ($ch == '\\') {
  246. $p++;
  247. } elseif ($ch == '(') {
  248. $parenCount++;
  249. } elseif ($ch == ')') {
  250. $parenCount--;
  251. if ($parenCount < 0) return null;
  252. } elseif ($ch == '[') {
  253. $bracketCount++;
  254. } elseif ($ch == ']') {
  255. if ($bracketCount > 0) {
  256. $bracketCount--;
  257. } else {
  258. $all = mb_substr($line, 0, $p + 1);
  259. $content = mb_substr($line, 1, $p - 1);
  260. return [ $all, $content ];
  261. }
  262. }
  263. }
  264. return null;
  265. }
  266. private const urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
  267. private const urlRegex = '^\\((\\S+?)\\)'; // 1=URL
  268. /**
  269. * Attempts to parse a URL token from the beginning of `$line`. A URL token
  270. * is of the form `(url)` or `(url "title")`. If found, returns an array:
  271. * - `0`: the entire URL token including parentheses
  272. * - `1`: the URL
  273. * - `2`: the optional title, or `null`
  274. *
  275. * @param string $line
  276. * @return ?array token tuple
  277. */
  278. public static function tokenizeURL(string $line): ?array {
  279. $groups = [];
  280. if (mb_eregi(self::urlWithTitleRegex, $line, $groups)) {
  281. // make sure it's not better described as an email address
  282. if (self::tokenizeEmail($line)) return null;
  283. return $groups;
  284. }
  285. if (mb_eregi(self::urlRegex, $line, $groups)) {
  286. if (self::tokenizeEmail($line)) return null;
  287. return [ $groups[0], $groups[1], null ];
  288. }
  289. return null;
  290. }
  291. /**
  292. * Attempts to parse an email address from the beginning of `$line`. An
  293. * email address is of the form `(user@example.com)` or
  294. * `(user@example.com "link title")`. If found, returns an array:
  295. * - `0`: the entire token including parentheses
  296. * - `1`: the email address
  297. * - `2`: the optional link title, or `null`
  298. *
  299. * @param string $line
  300. * @return ?string[] token tuple
  301. */
  302. public static function tokenizeEmail(string $line): ?array {
  303. $groups;
  304. if (mb_eregi("^\\(\\s*(" . MDUtils::baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
  305. $line, $groups)) {
  306. return $groups;
  307. }
  308. if (mb_eregi("^\\(\\s*(" . MDUtils::baseEmailRegex . ")\\s*\\)", $line, $groups)) {
  309. return [ $groups[0], $groups[1], null ];
  310. }
  311. return null;
  312. }
  313. /**
  314. * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
  315. * If found, returns a `MDTokenMatch`, otherwise `null`.
  316. *
  317. * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
  318. * are special supported token types. Note that `META_OptionalWhitespace`
  319. * may give a result with a variable number of tokens.
  320. *
  321. * @param (MDToken|MDNode)[] $tokensToSearch mixed array of `MDToken` and
  322. * `MDNode` elements
  323. * @param MDTokenType[] $pattern contiguous run of token types to find
  324. * @param int $startIndex token index to begin searching (defaults to 0)
  325. * @return ?MDTokenMatch match object, or `null` if not found
  326. */
  327. public static function findFirstTokens(array $tokensToSearch, array $pattern,
  328. int $startIndex=0): ?MDTokenMatch {
  329. if (sizeof($pattern) == 0) {
  330. throw new Error("Pattern cannot be empty");
  331. }
  332. $matched = [];
  333. for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
  334. $matchedAll = true;
  335. $matched = [];
  336. $patternOffset = 0;
  337. for ($p = 0; $p < sizeof($pattern); $p++) {
  338. $t0 = $t + $p + $patternOffset;
  339. if ($t0 >= sizeof($tokensToSearch)) return null;
  340. $token = $tokensToSearch[$t0];
  341. $elem = $pattern[$p];
  342. if ($elem == MDTokenType::META_OptionalWhitespace) {
  343. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  344. array_push($matched, $token);
  345. } else {
  346. $patternOffset--;
  347. }
  348. } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
  349. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  350. $matchedAll = false;
  351. break;
  352. }
  353. array_push($matched, $token);
  354. } else {
  355. if (!($token instanceof MDToken) || $token->type != $elem) {
  356. $matchedAll = false;
  357. break;
  358. }
  359. array_push($matched, $token);
  360. }
  361. }
  362. if ($matchedAll) {
  363. return new MDTokenMatch($matched, $t);
  364. }
  365. }
  366. return null;
  367. }
  368. /**
  369. * Searches an array of MDToken for a given starting pattern and ending
  370. * pattern and returns match info about both and the tokens in between.
  371. *
  372. * If `$contentValidator` is specified, it will be called with the content
  373. * tokens of a potential match. If the validator returns `true`, the result
  374. * will be accepted and returned by this method. If the validator returns
  375. * `false`, this method will keep looking for another matching pair. If no
  376. * validator is given the first match will be returned regardless of content.
  377. *
  378. * If a match is found, a `MDPairedTokenMatch` is returned with details
  379. * of the opening tokens, closing tokens, and content tokens between. Otherwise
  380. * `null` is returned.
  381. *
  382. * @param MDToken[] $tokensToSearch array of `MDToken` to search in
  383. * @param MDTokenType[] $startPattern pattern to find first
  384. * @param MDTokenType[] $endPattern pattern to find positioned after
  385. * `$startPattern`
  386. * @param ?callable $contentValidator optional validator function. If
  387. * provided, will be passed an array of inner `MDToken`, and the function
  388. * can return `true` to accept the contents or `false` to keep searching
  389. * @param number $startIndex token index where searching should begin
  390. * @return ?MDPairedTokenMatch match, or `null`
  391. */
  392. public static function findPairedTokens(array $tokensToSearch,
  393. array $startPattern, array $endPattern, ?callable $contentValidator=null,
  394. int $startIndex=0): ?MDPairedTokenMatch {
  395. for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
  396. $startMatch = self::findFirstTokens($tokensToSearch, $startPattern, $s);
  397. if ($startMatch === null) return null;
  398. $endStart = $startMatch->index + sizeof($startMatch->tokens);
  399. while ($endStart < sizeof($tokensToSearch)) {
  400. $endMatch = self::findFirstTokens($tokensToSearch, $endPattern, $endStart);
  401. if ($endMatch === null) break;
  402. $contentStart = $startMatch->index + sizeof($startMatch->tokens);
  403. $contentLength = $endMatch->index - $contentStart;
  404. $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
  405. if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
  406. return new MDPairedTokenMatch($startMatch->tokens,
  407. $contents,
  408. $endMatch->tokens,
  409. $startMatch->index,
  410. $startMatch->index + sizeof($startMatch->tokens),
  411. $endMatch->index,
  412. $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
  413. } else {
  414. // Contents rejected. Try next end match.
  415. $endStart = $endMatch->index + 1;
  416. }
  417. }
  418. // No end matches. Increment start match.
  419. $s = $startMatch->index;
  420. }
  421. return null;
  422. }
  423. public function equals($other) {
  424. if (!($other instanceof MDToken)) return false;
  425. if ($other->original !== $this->original) return false;
  426. if ($other->type != $this->type) return false;
  427. if ($other->content !== $this->content) return false;
  428. if ($other->extra !== $this->extra) return false;
  429. if ($other->tag !== $this->tag) return false;
  430. if ($other->modifier != $this->modifier) return false;
  431. return true;
  432. }
  433. }
  434. /**
  435. * Parsing and rendering state. Passed around throughout the parsing process.
  436. *
  437. * States are hierarchical. A sub-state can be created by calling `->copy()` with
  438. * a new array of lines. The sub-state points back to its parent state. This
  439. * is done to parse inner content of a syntax as its own standalone document.
  440. *
  441. * If a custom `MDReader` implementation wants to store data in this object,
  442. * always do so on `$state->root()` to ensure it's stored on the original state,
  443. * not a child state. Otherwise data may be lost when the sub-state is discarded.
  444. */
  445. class MDState {
  446. /**
  447. * Ascends the parent chain to the root `MDState` instance. This should be
  448. * used when referencing most stored fields except `$lines` and `$p`.
  449. */
  450. public function root(): MDState {
  451. return $this->parent ? $this->parent->root() : $this;
  452. }
  453. /**
  454. * Lines of the markdown document. The current line index is pointed to by `$p`.
  455. *
  456. * @var string[]
  457. */
  458. public array $lines;
  459. /**
  460. * The current line in `$lines`.
  461. */
  462. public function currentLine(): ?string {
  463. return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
  464. }
  465. /**
  466. * Current line pointer into array `$lines`.
  467. */
  468. public int $p = 0;
  469. /**
  470. * General storage for anything readers need to track during the parsing
  471. * process.
  472. */
  473. public array $userInfo = [];
  474. private ?MDState $parent = null;
  475. /**
  476. * Array of `MDReader`s sorted by block reading priority.
  477. * @var MDReader[]
  478. */
  479. public array $readersByBlockPriority = [];
  480. /**
  481. * Array of `MDReader`s sorted by tokenization priority.
  482. * @var MDReader[]
  483. */
  484. public array $readersByTokenPriority = [];
  485. /**
  486. * Array of tuples of `pass:number` and `MDReader` sorted by substitution
  487. * priority.
  488. * @var array[]
  489. */
  490. public array $readersBySubstitutePriority = [];
  491. /**
  492. * Prefix to include in any generated `id` attributes on HTML elements.
  493. * Useful for keeping elements unique in multiple parsed documents in the
  494. * same HTML page.
  495. */
  496. public string $elementIdPrefix = '';
  497. /**
  498. * Filter for removing unapproved HTML tags, attributes, and values.
  499. */
  500. public MDHTMLFilter $tagFilter;
  501. /**
  502. * @param string[] $lines lines of markdown text
  503. */
  504. public function __construct(array $lines) {
  505. $this->lines = $lines;
  506. $this->startTime = microtime(true);
  507. }
  508. /**
  509. * Creates a copy of this state with new lines. Useful for parsing nested
  510. * content.
  511. *
  512. * @param string[] $lines
  513. * @return MDState copied sub-state
  514. */
  515. public function copy(array $lines): MDState {
  516. $cp = new MDState($lines);
  517. $cp->parent = $this;
  518. return $cp;
  519. }
  520. /**
  521. * Tests if there are at least `$minCount` lines available to read. If `$p`
  522. * is not provided it will be relative to `$this->p`.
  523. */
  524. public function hasLines(int $minCount, ?int $p=null): bool {
  525. $relativeTo = ($p === null) ? $this->p : $p;
  526. return $relativeTo + $minCount <= sizeof($this->lines);
  527. }
  528. /**
  529. * Reads and returns an array of blocks from the current line pointer.
  530. *
  531. * @return MDBlockNode[] parsed blocks
  532. */
  533. public function readBlocks(): array {
  534. $blocks = [];
  535. while ($this->hasLines(1)) {
  536. $block = $this->readNextBlock();
  537. if ($block) {
  538. array_push($blocks, $block);
  539. } else {
  540. break;
  541. }
  542. }
  543. return $blocks;
  544. }
  545. /**
  546. * Creates a simple `MDBlockNode` if no other registered blocks match.
  547. */
  548. private function readFallbackBlock(): ?MDBlockNode {
  549. if ($this->p >= sizeof($this->lines)) return null;
  550. $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
  551. if (sizeof($lines) == 0) return null;
  552. $this->p = sizeof($this->lines);
  553. return new MDBlockNode($this->inlineMarkdownToNode(implode("\n", $lines)));
  554. }
  555. /**
  556. * Attempts to read one block from the current line pointer. The pointer
  557. * will be positioned just after the end of the block.
  558. */
  559. private function readNextBlock(): ?MDBlockNode {
  560. while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
  561. $this->p++;
  562. }
  563. if (!$this->hasLines(1)) return null;
  564. foreach ($this->root()->readersByBlockPriority as $reader) {
  565. $startP = $this->p;
  566. $block = $reader->readBlock($this);
  567. if ($block) {
  568. if ($this->p == $startP) {
  569. $readerClassName = MDUtils::typename($reader);
  570. $blockClassName = MDUtils::typename($block);
  571. throw new Error("{$readerClassName} returned an " .
  572. "{$blockClassName} without incrementing MDState.p. " .
  573. "This could lead to an infinite loop.");
  574. }
  575. return $block;
  576. }
  577. }
  578. $fallback = $this->readFallbackBlock();
  579. return $fallback;
  580. }
  581. /**
  582. * @param string $line
  583. * @return MDToken[]
  584. */
  585. private function inlineMarkdownToTokens(string $line): array {
  586. if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
  587. $tokens = [];
  588. $text = '';
  589. $expectLiteral = false;
  590. /**
  591. * Flushes accumulated content in `$text` to `$tokens`.
  592. */
  593. $endText = function() use (&$tokens, &$text) {
  594. if (mb_strlen($text) === 0) return;
  595. $textGroups = [];
  596. if (mb_eregi('^(\\s+)(.*?)$', $text, $textGroups)) {
  597. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
  598. $text = is_string($textGroups[2]) ? $textGroups[2] : '';
  599. }
  600. if (mb_eregi('^(.*?)(\\s+)$', $text, $textGroups)) {
  601. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Text, $textGroups[1]));
  602. array_push($tokens, new MDToken($textGroups[2], MDTokenType::Whitespace, $textGroups[2]));
  603. } elseif (mb_strlen($text) > 0) {
  604. array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
  605. }
  606. $text = '';
  607. };
  608. for ($p = 0; $p < mb_strlen($line); $p++) {
  609. $ch = mb_substr($line, $p, 1);
  610. $remainder = mb_substr($line, $p);
  611. if ($expectLiteral) {
  612. $text .= $ch;
  613. $expectLiteral = false;
  614. continue;
  615. }
  616. if ($ch == '\\') {
  617. $expectLiteral = true;
  618. continue;
  619. }
  620. $found = false;
  621. foreach ($this->root()->readersByTokenPriority as $reader) {
  622. $token = $reader->readToken($this, $remainder);
  623. if ($token === null) continue;
  624. $endText();
  625. array_push($tokens, $token);
  626. if ($token->original == null || mb_strlen($token->original) == 0) {
  627. $readerClassName = MDUtils::typename($reader);
  628. throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
  629. }
  630. $p += mb_strlen($token->original) - 1;
  631. $found = true;
  632. break;
  633. }
  634. if (!$found) {
  635. $text .= $ch;
  636. }
  637. }
  638. $endText();
  639. return $tokens;
  640. }
  641. /**
  642. * Converts a line of markdown to an `MDInlineNode`.
  643. *
  644. * @param string|string[] $line
  645. * @return MDInlineNode
  646. */
  647. public function inlineMarkdownToNode(string|array $line): MDInlineNode {
  648. $nodes = $this->inlineMarkdownToNodes($line);
  649. return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
  650. }
  651. /**
  652. * Converts a line of markdown to an array of `MDInlineNode`s.
  653. *
  654. * @param string|string[] $line
  655. * @return MDInlineNode[]
  656. */
  657. public function inlineMarkdownToNodes(string|array $line): array {
  658. $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
  659. return $this->tokensToNodes($tokens);
  660. }
  661. /**
  662. * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
  663. * of only `MDInlineNode` via repeated `MDReader` substition.
  664. *
  665. * @param (MDToken|MDInlineNode)[] $tokens
  666. * @return MDInlineNode[]
  667. */
  668. public function tokensToNodes(array $tokens): array {
  669. $nodes = $tokens;
  670. // Perform repeated substitutions, converting sequences of tokens into
  671. // nodes, until no more substitutions can be made.
  672. $anyChanges = false;
  673. do {
  674. $this->checkExecutionTime();
  675. $anyChanges = false;
  676. foreach ($this->root()->readersBySubstitutePriority as $readerTuple) {
  677. /** @var int */
  678. $pass = $readerTuple[0];
  679. /** @var MDReader */
  680. $reader = $readerTuple[1];
  681. $changed = $reader->substituteTokens($this, $pass, $nodes);
  682. if (!$changed) continue;
  683. $anyChanges = true;
  684. break;
  685. }
  686. } while ($anyChanges);
  687. // Convert any remaining tokens to text nodes. Also apply any inline
  688. // CSS modifiers.
  689. $lastNode = null;
  690. $me = $this;
  691. $nodes = array_map(function($node) use (&$lastNode, $me, $nodes) {
  692. if ($node instanceof MDToken) {
  693. /** @var MDToken */
  694. $token = $node;
  695. if ($token->type == MDTokenType::Modifier && $lastNode) {
  696. $me->root()->tagFilter->scrubModifier($token->modifier);
  697. $token->modifier->applyTo($lastNode);
  698. $lastNode = null;
  699. return new MDTextNode('');
  700. }
  701. $lastNode = null;
  702. return new MDTextNode($token->original);
  703. } elseif ($node instanceof MDNode) {
  704. $lastNode = ($node instanceof MDTextNode) ? null : $node;
  705. return $node;
  706. } else {
  707. $nodeClassName = MDUtils::typename($node);
  708. throw new Error("Unexpected node type {$nodeClassName}");
  709. }
  710. }, $nodes);
  711. return $nodes;
  712. }
  713. public $startTime;
  714. /**
  715. * Checks if parsing has taken an excessive length of time. Because I'm not
  716. * fully confident in my loops yet. :)
  717. */
  718. public function checkExecutionTime(float $maxSeconds=1.0) {
  719. $elapsed = microtime(true) - $this->root()->startTime;
  720. if ($elapsed > $maxSeconds) {
  721. throw new Error("Markdown parsing taking too long. Infinite loop?");
  722. }
  723. }
  724. /**
  725. * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
  726. * and `MDReferencedImageReader`.
  727. */
  728. private array $referenceToURL = [];
  729. /**
  730. * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
  731. * and `MDReferencedImageReader`.
  732. */
  733. private array $referenceToTitle = [];
  734. /**
  735. * Defines a URL by reference symbol.
  736. */
  737. public function defineURL(string $reference, string $url, ?string $title=null) {
  738. $this->root()->referenceToURL[mb_strtolower($reference)] = $url;
  739. if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
  740. }
  741. /**
  742. * Returns the URL associated with a reference symbol.
  743. */
  744. public function urlForReference(string $reference): ?string {
  745. return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
  746. }
  747. /**
  748. * Returns the link title associated with a reference symbol.
  749. */
  750. public function urlTitleForReference(string $reference): ?string {
  751. return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
  752. }
  753. }
  754. /**
  755. * Defines a set of allowable HTML tags, attributes, and CSS.
  756. */
  757. class MDHTMLFilter {
  758. /**
  759. * Mapping of permitted lowercase tag names to objects containing allowable
  760. * attributes for those tags. Does not need to include those attributes
  761. * defined in `$allowableGlobalAttributes`.
  762. *
  763. * Values are objects with allowable lowercase attribute names mapped to
  764. * allowable value patterns. A `*` means any value is acceptable. Multiple
  765. * allowable values can be joined together with `|`. These special symbols
  766. * represent certain kinds of values and can be used in combination or in
  767. * place of literal values.
  768. *
  769. * - `{classlist}`: A list of legal CSS classnames, separated by spaces
  770. * - `{int}`: An integer
  771. * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
  772. * - `{style}`: One or more CSS declarations, separated by semicolons (simple
  773. * `key: value;` syntax only)
  774. * - `{url}`: A URL
  775. */
  776. public array $allowableTags = [
  777. 'address' => [
  778. 'cite' => '{url}',
  779. ],
  780. 'h1' => [],
  781. 'h2' => [],
  782. 'h3' => [],
  783. 'h4' => [],
  784. 'h5' => [],
  785. 'h6' => [],
  786. 'blockquote' => [],
  787. 'dl' => [],
  788. 'dt' => [],
  789. 'dd' => [],
  790. 'div' => [],
  791. 'hr' => [],
  792. 'ul' => [],
  793. 'ol' => [
  794. 'start' => '{int}',
  795. 'type' => 'a|A|i|I|1',
  796. ],
  797. 'li' => [
  798. 'value' => '{int}',
  799. ],
  800. 'p' => [],
  801. 'pre' => [],
  802. 'table' => [],
  803. 'thead' => [],
  804. 'tbody' => [],
  805. 'tfoot' => [],
  806. 'tr' => [],
  807. 'td' => [],
  808. 'th' => [],
  809. 'a' => [
  810. 'href' => '{url}',
  811. 'target' => '*',
  812. ],
  813. 'abbr' => [],
  814. 'b' => [],
  815. 'br' => [],
  816. 'cite' => [],
  817. 'code' => [],
  818. 'data' => [
  819. 'value' => '*',
  820. ],
  821. 'dfn' => [],
  822. 'em' => [],
  823. 'i' => [],
  824. 'kbd' => [],
  825. 'mark' => [],
  826. 'q' => [
  827. 'cite' => '{url}',
  828. ],
  829. 's' => [],
  830. 'samp' => [],
  831. 'small' => [],
  832. 'span' => [],
  833. 'strong' => [],
  834. 'sub' => [],
  835. 'sup' => [],
  836. 'time' => [
  837. 'datetime' => '*',
  838. ],
  839. 'u' => [],
  840. 'var' => [],
  841. 'wbr' => [],
  842. 'img' => [
  843. 'alt' => '*',
  844. 'href' => '{url}',
  845. ],
  846. 'figure' => [],
  847. 'figcaption' => [],
  848. 'del' => [],
  849. 'ins' => [],
  850. 'details' => [],
  851. 'summary' => [],
  852. ];
  853. /**
  854. * Mapping of allowable lowercase global attributes to their permitted
  855. * values. Uses same value pattern syntax as described in `$allowableTags`.
  856. */
  857. public array $allowableGlobalAttributes = [
  858. 'class' => '{classlist}',
  859. 'data-*' => '*',
  860. 'dir' => 'ltr|rtl|auto',
  861. 'id' => '*',
  862. 'lang' => '*',
  863. 'style' => '{style}',
  864. 'title' => '*',
  865. 'translate' => 'yes|no|{none}',
  866. ];
  867. /**
  868. * Mapping of allowable CSS style names to their allowable value patterns.
  869. * Multiple values can be delimited with `|` characters. Limited support
  870. * so far.
  871. *
  872. * Recognized special values:
  873. * - `{color}`: A hex or named color
  874. */
  875. public array $allowableStyleKeys = [
  876. 'background-color' => '{color}',
  877. 'color' => '{color}',
  878. ];
  879. /**
  880. * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
  881. * itself has already been whitelisted.
  882. *
  883. * @param MDHTMLTag $tag HTML tag
  884. */
  885. public function scrubTag(MDHTMLTag $tag) {
  886. foreach ($tag->attributes as $name => $value) {
  887. if (!$this->isValidAttributeName($tag->tagName, $name)) {
  888. unset($tag->attributes[$name]);
  889. }
  890. if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
  891. unset($tag->attributes[$name]);
  892. }
  893. }
  894. }
  895. /**
  896. * Scrubs all forbidden attributes from an HTML modifier.
  897. *
  898. * @param MDTagModifier $modifier
  899. * @param ?string $tagName HTML tag name, if known, otherwise only
  900. * global attributes will be permitted
  901. */
  902. public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
  903. if (sizeof($modifier->cssClasses) > 0) {
  904. $classList = implode(' ', $modifier->cssClasses);
  905. if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
  906. $modifier->cssClasses = [];
  907. }
  908. }
  909. if ($modifier->cssId !== null) {
  910. if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
  911. $modifier->cssId = null;
  912. }
  913. }
  914. if (!$this->isValidAttributeName($tagName, 'style')) {
  915. $modifier->cssStyles = [];
  916. } else {
  917. foreach ($modifier->cssStyles as $key => $val) {
  918. if (!$this->isValidStyleValue($key, $val)) {
  919. unset($modifier->cssStyles[$key]);
  920. }
  921. }
  922. }
  923. foreach ($modifier->attributes as $key => $val) {
  924. if (!$this->isValidAttributeValue($tagName, $key, $val)) {
  925. unset($modifier->attributes[$key]);
  926. }
  927. }
  928. }
  929. /**
  930. * Tests if an HTML tag name is permitted.
  931. */
  932. public function isValidTagName(string $tagName): bool {
  933. return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
  934. }
  935. /**
  936. * Tests if an HTML attribute name is permitted.
  937. */
  938. public function isValidAttributeName(?string $tagName, string $attributeName): bool {
  939. $lcAttributeName = mb_strtolower($attributeName);
  940. if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
  941. return true;
  942. }
  943. foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
  944. if (!str_ends_with($pattern, '*')) continue;
  945. $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
  946. if (str_starts_with($lcAttributeName, $patternPrefix)) {
  947. return true;
  948. }
  949. }
  950. if ($tagName === null) return false;
  951. $lcTagName = mb_strtolower($tagName);
  952. $tagAttributes = $this->allowableTags[$lcTagName];
  953. if ($tagAttributes !== null) {
  954. return ($tagAttributes[$lcAttributeName] ?? null) !== null;
  955. }
  956. return false;
  957. }
  958. /**
  959. * Tests if an attribute value is allowable.
  960. */
  961. public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
  962. $lcAttributeName = mb_strtolower($attributeName);
  963. $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
  964. if ($globalPattern !== null) {
  965. return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
  966. }
  967. foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
  968. if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
  969. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  970. }
  971. }
  972. if ($tagName === null) return false;
  973. $lcTagName = mb_strtolower($tagName);
  974. $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
  975. if ($tagAttributes === null) return false;
  976. $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
  977. if ($valuePattern === null) return false;
  978. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  979. }
  980. private const permissiveURLRegex = '^\\S+$';
  981. private const integerRegex = '^[\\-]?\\d+$';
  982. private const classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
  983. private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
  984. $options = explode('|', $pattern);
  985. foreach ($options as $option) {
  986. switch ($option) {
  987. case '*':
  988. return true;
  989. case '{classlist}':
  990. if (mb_eregi(self::classListRegex, $value)) return true;
  991. break;
  992. case '{int}':
  993. if (mb_eregi(self::integerRegex, $value)) return true;
  994. break;
  995. case '{none}':
  996. if ($value === true) return true;
  997. break;
  998. case '{style}':
  999. if ($this->isValidStyleDeclaration($value)) return true;
  1000. break;
  1001. case '{url}':
  1002. if (mb_eregi(self::permissiveURLRegex, $value)) return true;
  1003. break;
  1004. default:
  1005. if ($value === $option) return true;
  1006. break;
  1007. }
  1008. }
  1009. return false;
  1010. }
  1011. /**
  1012. * Tests if a string of one or more style `key: value;` declarations is
  1013. * fully allowable.
  1014. */
  1015. public function isValidStyleDeclaration(string $styles): bool {
  1016. $settings = explode(';', $styles);
  1017. foreach ($settings as $setting) {
  1018. if (mb_strlen(trim($setting)) == 0) continue;
  1019. $parts = explode(':', $setting);
  1020. if (sizeof($parts) != 2) return false;
  1021. $name = trim($parts[0]);
  1022. if (!$this->isValidStyleKey($name)) return false;
  1023. $value = trim($parts[1]);
  1024. if (!$this->isValidStyleValue($name, $value)) return false;
  1025. }
  1026. return true;
  1027. }
  1028. /**
  1029. * Tests if a CSS style key is allowable.
  1030. */
  1031. public function isValidStyleKey(string $key): bool {
  1032. return ($this->allowableStyleKeys[$key] ?? null) !== null;
  1033. }
  1034. /**
  1035. * Tests if a CSS style value is allowable.
  1036. */
  1037. public function isValidStyleValue(string $key, string $value): bool {
  1038. $pattern = $this->allowableStyleKeys[$key] ?? null;
  1039. if ($pattern === null) return false;
  1040. $options = explode('|', $pattern);
  1041. foreach ($options as $option) {
  1042. switch ($option) {
  1043. case '{color}':
  1044. if ($this->isValidCSSColor($value)) return true;
  1045. default:
  1046. if ($value === $option) return true;
  1047. }
  1048. }
  1049. return false;
  1050. }
  1051. private const styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
  1052. private function isValidCSSColor(string $value): bool {
  1053. return mb_eregi(self::styleColorRegex, $value);
  1054. }
  1055. }
  1056. /**
  1057. * Represents a single HTML tag. Paired tags are represented separately.
  1058. */
  1059. class MDHTMLTag {
  1060. /**
  1061. * Verbatim string of the original parsed tag. Not modified. Should be
  1062. * considered unsafe for inclusion in the final document. Use `->toString()`
  1063. * instead.
  1064. */
  1065. public string $original;
  1066. public string $tagName;
  1067. public bool $isCloser;
  1068. /**
  1069. * Map of attribute names to value strings.
  1070. */
  1071. public array $attributes;
  1072. /**
  1073. * @param string $original
  1074. * @param string $tagName
  1075. * @param bool $isCloser
  1076. * @param array $attributes
  1077. */
  1078. public function __construct(string $original, string $tagName, bool $isCloser,
  1079. array $attributes) {
  1080. $this->original = $original;
  1081. $this->tagName = $tagName;
  1082. $this->isCloser = $isCloser;
  1083. $this->attributes = $attributes;
  1084. }
  1085. public function __toString(): string {
  1086. if ($this->isCloser) {
  1087. return "</{$this->tagName}>";
  1088. }
  1089. $html = '<';
  1090. $html .= $this->tagName;
  1091. foreach ($this->attributes as $key => $value) {
  1092. $safeName = MDUtils::scrubAttributeName($key);
  1093. if ($value === true) {
  1094. $html .= " {$safeName}";
  1095. } else {
  1096. $escapedValue = MDUtils::escapeHTML("{$value}");
  1097. $html .= " {$safeName}=\"{$escapedValue}\"";
  1098. }
  1099. }
  1100. $html .= '>';
  1101. return $html;
  1102. }
  1103. public function equals($other): bool {
  1104. if (!($other instanceof MDHTMLTag)) return false;
  1105. if ($other->tagName != $this->tagName) return false;
  1106. if ($other->isCloser != $this->isCloser) return false;
  1107. return MDUtils::equal($other->attributes, $this->attributes);
  1108. }
  1109. private const htmlTagNameFirstRegex = '[a-z]';
  1110. private const htmlTagNameMedialRegex = '[a-z0-9]';
  1111. private const htmlAttributeNameFirstRegex = '[a-z]';
  1112. private const htmlAttributeNameMedialRegex = '[a-z0-9-]';
  1113. private const whitespaceCharRegex = '\\s';
  1114. /**
  1115. * Checks the start of the given string for presence of an HTML tag.
  1116. */
  1117. public static function fromLineStart(string $line): ?MDHTMLTag {
  1118. $expectOpenBracket = 0;
  1119. $expectCloserOrName = 1;
  1120. $expectName = 2;
  1121. $expectAttributeNameOrEnd = 3;
  1122. $expectEqualsOrAttributeOrEnd = 4;
  1123. $expectAttributeValue = 5;
  1124. $expectCloseBracket = 6;
  1125. $isCloser = false;
  1126. $tagName = '';
  1127. $attributeName = '';
  1128. $attributeValue = '';
  1129. $attributeQuote = null;
  1130. $attributes = [];
  1131. $fullTag = null;
  1132. $endAttribute = function(bool $unescape=false) use (&$attributes,
  1133. &$attributeName, &$attributeValue, &$attributeQuote) {
  1134. if (mb_strlen($attributeName) > 0) {
  1135. if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
  1136. $attributes[$attributeName] = $unescape ?
  1137. html_entity_decode($attributeValue, ENT_QUOTES |
  1138. ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') :
  1139. $attributeValue;
  1140. } else {
  1141. $attributes[$attributeName] = true;
  1142. }
  1143. }
  1144. $attributeName = '';
  1145. $attributeValue = '';
  1146. $attributeQuote = null;
  1147. };
  1148. $expect = $expectOpenBracket;
  1149. for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
  1150. $ch = mb_substr($line, $p, 1);
  1151. $isWhitespace = mb_eregi(self::whitespaceCharRegex, $ch);
  1152. switch ($expect) {
  1153. case $expectOpenBracket:
  1154. if ($ch != '<') return null;
  1155. $expect = $expectCloserOrName;
  1156. break;
  1157. case $expectCloserOrName:
  1158. if ($ch == '/') {
  1159. $isCloser = true;
  1160. } else {
  1161. $p--;
  1162. }
  1163. $expect = $expectName;
  1164. break;
  1165. case $expectName:
  1166. if (mb_strlen($tagName) == 0) {
  1167. if (!mb_eregi(self::htmlTagNameFirstRegex, $ch)) return null;
  1168. $tagName .= $ch;
  1169. } else {
  1170. if (mb_eregi(self::htmlTagNameMedialRegex, $ch)) {
  1171. $tagName .= $ch;
  1172. } else {
  1173. $p--;
  1174. $expect = ($isCloser) ? $expectCloseBracket :
  1175. $expectAttributeNameOrEnd;
  1176. }
  1177. }
  1178. break;
  1179. case $expectAttributeNameOrEnd:
  1180. if (mb_strlen($attributeName) == 0) {
  1181. if ($isWhitespace) {
  1182. // skip whitespace
  1183. } elseif ($ch == '/') {
  1184. $expect = $expectCloseBracket;
  1185. } elseif ($ch == '>') {
  1186. $fullTag = mb_substr($line, 0, $p + 1);
  1187. break;
  1188. } elseif (mb_eregi(self::htmlAttributeNameFirstRegex, $ch)) {
  1189. $attributeName .= $ch;
  1190. } else {
  1191. return null;
  1192. }
  1193. } elseif ($isWhitespace) {
  1194. $expect = $expectEqualsOrAttributeOrEnd;
  1195. } elseif ($ch == '/') {
  1196. $endAttribute();
  1197. $expect = $expectCloseBracket;
  1198. } elseif ($ch == '>') {
  1199. $endAttribute();
  1200. $fullTag = mb_substr($line, 0, $p + 1);
  1201. break;
  1202. } elseif ($ch == '=') {
  1203. $expect = $expectAttributeValue;
  1204. } elseif (mb_eregi(self::htmlAttributeNameMedialRegex, $ch)) {
  1205. $attributeName .= $ch;
  1206. } else {
  1207. return null;
  1208. }
  1209. break;
  1210. case $expectEqualsOrAttributeOrEnd:
  1211. if ($ch == '=') {
  1212. $expect = $expectAttributeValue;
  1213. } elseif ($isWhitespace) {
  1214. // skip whitespace
  1215. } elseif ($ch == '/') {
  1216. $expect = $expectCloseBracket;
  1217. } elseif ($ch == '>') {
  1218. $fullTag = mb_substr($line, 0, $p + 1);
  1219. break;
  1220. } elseif (mb_eregi(self::htmlAttributeNameFirstRegex, $ch)) {
  1221. $endAttribute();
  1222. $expect = $expectAttributeNameOrEnd;
  1223. $p--;
  1224. }
  1225. break;
  1226. case $expectAttributeValue:
  1227. if (mb_strlen($attributeValue) == 0) {
  1228. if ($attributeQuote === null) {
  1229. if ($isWhitespace) {
  1230. // skip whitespace
  1231. } elseif ($ch == '"' || $ch == "'") {
  1232. $attributeQuote = $ch;
  1233. } else {
  1234. $attributeQuote = ''; // explicitly unquoted
  1235. $p--;
  1236. }
  1237. } else {
  1238. if ($ch === $attributeQuote) {
  1239. // Empty string
  1240. $endAttribute($attributeQuote != '');
  1241. $expect = $expectAttributeNameOrEnd;
  1242. } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
  1243. return null;
  1244. } else {
  1245. $attributeValue .= $ch;
  1246. }
  1247. }
  1248. } else {
  1249. if ($ch === $attributeQuote) {
  1250. $endAttribute($attributeQuote != '');
  1251. $expect = $expectAttributeNameOrEnd;
  1252. } elseif ($attributeQuote === '' && $isWhitespace) {
  1253. $endAttribute();
  1254. $expect = $expectAttributeNameOrEnd;
  1255. } else {
  1256. $attributeValue .= $ch;
  1257. }
  1258. }
  1259. break;
  1260. case $expectCloseBracket:
  1261. if ($isWhitespace) {
  1262. // ignore whitespace
  1263. } elseif ($ch == '>') {
  1264. $fullTag = mb_substr($line, 0, $p + 1);
  1265. break;
  1266. }
  1267. break;
  1268. }
  1269. }
  1270. if ($fullTag === null) return null;
  1271. $endAttribute();
  1272. return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
  1273. }
  1274. }
  1275. /**
  1276. * Represents HTML modifications to a node, such as CSS classes to add or
  1277. * additional attributes. See `MDHTMLFilter->scrubModifier()` to remove disallowed
  1278. * values.
  1279. */
  1280. class MDTagModifier {
  1281. /**
  1282. * Verbatim markdown syntax. Unmodified by changes to other properties.
  1283. */
  1284. public string $original;
  1285. /** @var string[] */
  1286. public array $cssClasses = [];
  1287. public ?string $cssId = null;
  1288. public array $cssStyles = [];
  1289. public array $attributes = [];
  1290. private const leadingClassRegex = '^\\{([^}]+?)}';
  1291. private const trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
  1292. private const classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
  1293. private const idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
  1294. private const attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
  1295. public function applyTo(MDNode $node) {
  1296. if ($node instanceof MDNode) {
  1297. foreach ($this->cssClasses as $cssClass) {
  1298. $node->addClass($cssClass);
  1299. }
  1300. if ($this->cssId) $node->cssId = $this->cssId;
  1301. foreach ($this->attributes as $name => $value) {
  1302. $node->attributes[$name] = $value;
  1303. }
  1304. foreach ($this->cssStyles as $name => $value) {
  1305. $node->cssStyles[$name] = $value;
  1306. }
  1307. }
  1308. }
  1309. /**
  1310. * Adds a CSS class. If already present it will not be duplicated.
  1311. */
  1312. public function addClass(string $cssClass): bool {
  1313. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  1314. array_push($this->cssClasses, $cssClass);
  1315. return true;
  1316. }
  1317. /**
  1318. * Removes a CSS class.
  1319. */
  1320. public function removeClass(string $cssClass): bool {
  1321. $beforeLength = sizeof($this->cssClasses);
  1322. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  1323. return sizeof($this->cssClasses) != $beforeLength;
  1324. }
  1325. public function equals($other): bool {
  1326. if (!($other instanceof MDTagModifier)) return false;
  1327. if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
  1328. if ($other->cssId !== $this->cssId) return false;
  1329. if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
  1330. return true;
  1331. }
  1332. public function __toString(): string {
  1333. return $this->original;
  1334. }
  1335. private static function styleToObject(string $styleValue): array {
  1336. $pairs = explode(';', $styleValue);
  1337. $styles = [];
  1338. foreach ($pairs as $pair) {
  1339. $keyAndValue = explode(':', $pair);
  1340. if (sizeof($keyAndValue) != 2) continue;
  1341. $styles[$keyAndValue[0]] = $keyAndValue[1];
  1342. }
  1343. return $styles;
  1344. }
  1345. private static function fromContents(string $contents): ?MDTagModifier {
  1346. $modifierTokens = mb_split('\\s+', $contents);
  1347. $mod = new MDTagModifier();
  1348. $mod->original = "{{$contents}}";
  1349. foreach ($modifierTokens as $token) {
  1350. if (trim($token) == '') continue;
  1351. if (mb_eregi(self::classRegex, $token, $groups)) {
  1352. $mod->addClass($groups[1]);
  1353. } elseif (mb_eregi(self::idRegex, $token, $groups)) {
  1354. $mod->cssId = $groups[1];
  1355. } elseif (mb_eregi(self::attributeRegex, $token, $groups)) {
  1356. if ($groups[1] == 'style') {
  1357. $mod->cssStyles = self::styleToObject($groups[2]);
  1358. } else {
  1359. $mod->attributes[$groups[1]] = $groups[2];
  1360. }
  1361. } else {
  1362. return null;
  1363. }
  1364. }
  1365. return $mod;
  1366. }
  1367. /**
  1368. * Extracts block modifier from end of a line. Always returns a 2-element
  1369. * tuple array:
  1370. * - `0`: the line without the modifier
  1371. * - `1`: an `MDTagModifier` if found or `null` if not
  1372. *
  1373. * @param string $line
  1374. * @param ?MDState $state
  1375. * @return array tuple with remaining line and `MDTagModifier` or `null`
  1376. */
  1377. public static function fromLine(string $line, ?MDState $state): array {
  1378. if ($state) {
  1379. $found = false;
  1380. foreach ($state->root()->readersByBlockPriority as $reader) {
  1381. if ($reader instanceof MDModifierReader) {
  1382. $found = true;
  1383. break;
  1384. }
  1385. }
  1386. if (!$found) return [ $line, null ];
  1387. }
  1388. if (!mb_eregi(self::trailingClassRegex, $line, $groups)) return [ $line, null ];
  1389. $bareLine = $groups[1];
  1390. $mod = self::fromContents($groups[2]);
  1391. return [ $bareLine, $mod ];
  1392. }
  1393. /**
  1394. * Attempts to extract modifier from head of string.
  1395. */
  1396. public static function fromStart(string $line): ?MDTagModifier {
  1397. if (!mb_eregi(self::leadingClassRegex, $line, $groups)) return null;
  1398. return self::fromContents($groups[1]);
  1399. }
  1400. /**
  1401. * Discards any modifiers from a line and returns what remains.
  1402. */
  1403. public static function strip(string $line): string {
  1404. if (!mb_eregi(self::trailingClassRegex, $line, $groups)) return $line;
  1405. return $groups[1];
  1406. }
  1407. }
  1408. // -- Readers ---------------------------------------------------------------
  1409. /**
  1410. * Base class for readers of various markdown syntax. A `Markdown` instance can
  1411. * be created with any combination of subclasses of these to customize the
  1412. * flavor of markdown parsed.
  1413. *
  1414. * @see {@link custom.md} for details on subclassing
  1415. */
  1416. class MDReader {
  1417. /**
  1418. * Called before processing begins. `$state->lines` is populated and the
  1419. * line pointer `$state->p` will be at `0`.
  1420. *
  1421. * Default implementation does nothing.
  1422. */
  1423. public function preProcess(MDState $state) {}
  1424. /**
  1425. * Attempts to read an `MDBlockNode` subclass at the current line pointer
  1426. * `$state->p`. Only matches if the block pattern starts at the line pointer,
  1427. * not elsewhere in the `$state->lines` array. If a block is found, `$state->p`
  1428. * should be incremented to the next line _after_ the block structure and
  1429. * a `MDBlockNode` subclass instance is returned. If no block is found,
  1430. * returns `null`.
  1431. *
  1432. * Default implementation always returns `null`.
  1433. */
  1434. public function readBlock(MDState $state): ?MDBlockNode { return null; }
  1435. /**
  1436. * Attempts to read an inline token from the beginning of `$line`. Only the
  1437. * start of the given `$line` is considered. If a matching token is found, an
  1438. * `MDToken` is returned. Otherwise `null` is returned.
  1439. *
  1440. * Default implementation always returns `null`.
  1441. */
  1442. public function readToken(MDState $state, string $line): ?MDToken { return null; }
  1443. /**
  1444. * Attempts to find a pattern anywhere in `$tokens` and perform a _single_
  1445. * in-place substitution with one or more `MDNode` subclass instances.
  1446. * If a substitution is performed, must return `true`, otherwise `false`.
  1447. *
  1448. * Default implementation always returns `false`.
  1449. *
  1450. * @param MDState $state
  1451. * @param int $pass what substitution pass this is, starting with 1
  1452. * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and
  1453. * `MDInlineNode` elements
  1454. * @return bool `true` if a substitution was performed, `false` if not
  1455. */
  1456. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return false; }
  1457. /**
  1458. * Called after all parsing has completed. An array `$blocks` is passed of
  1459. * all the top-level `MDBlockNode` elements in the document which this
  1460. * method can traverse or alter in-place via `array_splice` operations if
  1461. * necessary.
  1462. *
  1463. * `MDNode->visitChildren` is useful for recursively looking for certain
  1464. * `MDNode` instances. `MDNode::replaceNodes` is useful for swapping in
  1465. * replacements.
  1466. *
  1467. * Default implementation does nothing.
  1468. *
  1469. * @param MDState $state
  1470. * @param MDBlockNode[] $blocks
  1471. */
  1472. public function postProcess(MDState $state, array &$blocks) {}
  1473. /**
  1474. * Can be overridden to influence ordering of this reader with respect to
  1475. * another during the block parsing phase. Return `-1` to be ordered before
  1476. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1477. * Only return non-`0` values to resolve specific conflicts.
  1478. *
  1479. * Default implementation always returns `0` (no preference).
  1480. *
  1481. * @param MDReader $other
  1482. * @return int a negative, positive, or 0 value to be ordered before,
  1483. * after, or anwhere relative to `$other`, respectively
  1484. */
  1485. public function compareBlockOrdering(MDReader $other): int {
  1486. return 0;
  1487. }
  1488. /**
  1489. * Can be overridden to influence ordering of this reader with respect to
  1490. * another during the tokenizing phase. Return `-1` to be ordered before
  1491. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1492. * Only return non-`0` values to resolve specific conflicts.
  1493. *
  1494. * Default implementation always returns `0` (no preference).
  1495. *
  1496. * @param MDReader $other
  1497. * @return int a negative, positive, or 0 value to be ordered before,
  1498. * after, or anwhere relative to `$other`, respectively
  1499. */
  1500. public function compareTokenizeOrdering(MDReader $other): int {
  1501. return 0;
  1502. }
  1503. /**
  1504. * Can be overridden to influence ordering of this reader with respect to
  1505. * another during the substitution phase. Return `-1` to be ordered before
  1506. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1507. * Only return non-`0` values to resolve specific conflicts.
  1508. *
  1509. * Readers are sorted within each substitution pass. All pass 1 readers are
  1510. * processed first, then all pass 2 readers, etc. The number of passes this
  1511. * reader participates in is dictated by `substitionPassCount()`.
  1512. *
  1513. * Default implementation always returns `0` (no preference).
  1514. *
  1515. * @param MDReader $other
  1516. * @param int $pass substitution pass, with numbering starting at `1`
  1517. * @return int a negative, positive, or 0 value to be ordered before,
  1518. * after, or anwhere relative to `$other`, respectively
  1519. */
  1520. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  1521. return 0;
  1522. }
  1523. /**
  1524. * How many substitution passes this reader requires. Substitution allows
  1525. * all pass 1 readers to process first, then all pass 2 readers, etc.
  1526. */
  1527. public function substitutionPassCount(): int { return 1; }
  1528. /**
  1529. * For sorting readers with ordering preferences. The `compare` methods
  1530. * don't have the properties of normal sorting compares so need to sort
  1531. * differently.
  1532. *
  1533. * @param MDReader[] $arr array to sort
  1534. * @param callable $compareFn comparison function, taking two array element
  1535. * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
  1536. * respectively
  1537. * @param callable $idFn function for returning a unique hashable id for
  1538. * the array element
  1539. * @return MDReader[] sorted array
  1540. */
  1541. private static function kahnTopologicalSort(array $arr, callable $compareFn,
  1542. callable $idFn): array {
  1543. $graph = [];
  1544. $inDegrees = [];
  1545. $valuesById = [];
  1546. // Build the graph and compute in-degrees
  1547. foreach ($arr as $index => $elem) {
  1548. $id = $idFn($elem);
  1549. $graph[$id] = [];
  1550. $inDegrees[$id] = 0;
  1551. $valuesById[$id] = $elem;
  1552. }
  1553. for ($i = 0; $i < sizeof($arr); $i++) {
  1554. $elemA = $arr[$i];
  1555. $idA = $idFn($elemA);
  1556. for ($j = 0; $j < sizeof($arr); $j++) {
  1557. if ($i === $j) continue;
  1558. $elemB = $arr[$j];
  1559. $idB = $idFn($elemB);
  1560. $comparisonResult = $compareFn($elemA, $elemB);
  1561. if ($comparisonResult < 0) {
  1562. array_push($graph[$idA], $idB);
  1563. $inDegrees[$idB]++;
  1564. } elseif ($comparisonResult > 0) {
  1565. array_push($graph[$idB], $idA);
  1566. $inDegrees[$idA]++;
  1567. }
  1568. }
  1569. }
  1570. // Initialize the queue with zero-inDegree nodes
  1571. $queue = [];
  1572. foreach ($inDegrees as $elemId => $degree) {
  1573. if ($degree === 0) {
  1574. array_push($queue, $elemId);
  1575. }
  1576. }
  1577. // Process the queue and build the topological order list
  1578. $sorted = [];
  1579. while (sizeof($queue) > 0) {
  1580. $elemId = array_shift($queue);
  1581. array_push($sorted, $valuesById[$elemId]);
  1582. unset($valuesById[$elemId]);
  1583. foreach ($graph[$elemId] as $neighbor) {
  1584. $inDegrees[$neighbor]--;
  1585. if ($inDegrees[$neighbor] === 0) {
  1586. array_push($queue, $neighbor);
  1587. }
  1588. }
  1589. }
  1590. // Anything left over can go at the end. No ordering dependencies.
  1591. foreach ($valuesById as $elemId => $value) {
  1592. array_push($sorted, $value);
  1593. }
  1594. return $sorted;
  1595. }
  1596. /**
  1597. * Returns a sorted array of readers by their block priority preferences.
  1598. *
  1599. * @param MDReader[] $readers
  1600. * @return MDReader[] sorted readers
  1601. */
  1602. public static function sortReaderForBlocks(array &$readers): array {
  1603. $sorted = $readers;
  1604. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1605. return $a->compareBlockOrdering($b);
  1606. }, fn($elem) => MDUtils::typename($elem));
  1607. }
  1608. /**
  1609. * Returns a sorted array of readers by their tokenization priority preferences.
  1610. *
  1611. * @param MDReader[] $readers
  1612. * @return MDReader[] sorted readers
  1613. */
  1614. public static function sortReadersForTokenizing(array &$readers): array {
  1615. $sorted = $readers;
  1616. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1617. return $a->compareTokenizeOrdering($b);
  1618. }, fn($elem) => MDUtils::typename($elem));
  1619. }
  1620. /**
  1621. * Returns a sorted array of tuples (arrays) containing the substitution
  1622. * pass number and reader instance, sorted by their substitution priority
  1623. * preferences.
  1624. *
  1625. * For readers with `substitutionPassCount()` > `1`, the same reader will
  1626. * appear multiple times in the resulting array, one per pass.
  1627. *
  1628. * @param MDReader[] $readers
  1629. * @return MDReader[] sorted array of tuples with the pass number and
  1630. * reader instance in each
  1631. */
  1632. public static function sortReadersForSubstitution(array &$readers): array {
  1633. $tuples = [];
  1634. $maxPass = 1;
  1635. foreach ($readers as $reader) {
  1636. $passCount = $reader->substitutionPassCount();
  1637. $maxPass = max($maxPass, $passCount);
  1638. for ($pass = 1; $pass <= $passCount; $pass++) {
  1639. array_push($tuples, [ $pass, $reader ]);
  1640. }
  1641. }
  1642. $result = [];
  1643. for ($pass = 1; $pass <= $maxPass; $pass++) {
  1644. $readersThisPass = array_values(array_filter($tuples, fn($tup) => $tup[0] === $pass));
  1645. $passResult = self::kahnTopologicalSort($readersThisPass,
  1646. function(array $a, array $b) use ($pass): int {
  1647. $aReader = $a[1];
  1648. $bReader = $b[1];
  1649. return $aReader->compareSubstituteOrdering($bReader, $pass);
  1650. }, fn($elem) => MDUtils::typename($elem[1]));
  1651. $result = array_merge($result, $passResult);
  1652. }
  1653. return $result;
  1654. }
  1655. }
  1656. /**
  1657. * Reads markdown blocks for headings denoted with the underline syntax.
  1658. *
  1659. * Supports `MDTagModifier` suffixes.
  1660. */
  1661. class MDUnderlinedHeadingReader extends MDReader {
  1662. public function readBlock(MDState $state): ?MDBlockNode {
  1663. $p = $state->p;
  1664. if (!$state->hasLines(2)) return null;
  1665. $modifier;
  1666. $contentLine = trim($state->lines[$p++]);
  1667. [$contentLine, $modifier] = MDTagModifier::fromLine($contentLine, $state);
  1668. $underLine = trim($state->lines[$p++]);
  1669. if ($contentLine == '') return null;
  1670. if (mb_eregi('^=+$', $underLine)) {
  1671. $state->p = $p;
  1672. $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
  1673. if ($modifier) $modifier->applyTo($block);
  1674. return $block;
  1675. }
  1676. if (mb_eregi('^\-+$', $underLine)) {
  1677. $state->p = $p;
  1678. $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
  1679. if ($modifier) $modifier->applyTo($block);
  1680. return $block;
  1681. }
  1682. return null;
  1683. }
  1684. }
  1685. /**
  1686. * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
  1687. * to 6 are supported.
  1688. *
  1689. * Supports `MDTagModifier` suffixes.
  1690. */
  1691. class MDHashHeadingReader extends MDReader {
  1692. private const hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content
  1693. public function readBlock(MDState $state): ?MDBlockNode {
  1694. $p = $state->p;
  1695. $line = $state->lines[$p++];
  1696. $modifier;
  1697. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1698. if (!mb_eregi(self::hashHeadingRegex, $line, $groups)) return null;
  1699. $state->p = $p;
  1700. $level = mb_strlen($groups[1]);
  1701. $content = $groups[2];
  1702. $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
  1703. if ($modifier) $modifier->applyTo($block);
  1704. return $block;
  1705. }
  1706. }
  1707. /**
  1708. * Reads subtext blocks. Subtext is smaller, fainter text for things like
  1709. * disclaimers or sources.
  1710. *
  1711. * Supports `MDTagModifier` suffixes.
  1712. */
  1713. class MDSubtextReader extends MDReader {
  1714. private const subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content
  1715. public function readBlock(MDState $state): ?MDBlockNode {
  1716. $p = $state->p;
  1717. $line = $state->lines[$p++];
  1718. $modifier;
  1719. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1720. if (!mb_eregi(self::subtextRegex, $line, $groups)) return null;
  1721. $state->p = $p;
  1722. $content = $groups[1];
  1723. $block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
  1724. if ($modifier) $modifier->applyTo($block);
  1725. return $block;
  1726. }
  1727. public function compareBlockOrdering(MDReader $other): int {
  1728. if ($other instanceof MDUnorderedListReader) {
  1729. return -1;
  1730. }
  1731. return 0;
  1732. }
  1733. }
  1734. /**
  1735. * Reads markdown blocks for blockquoted text.
  1736. */
  1737. class MDBlockQuoteReader extends MDReader {
  1738. public function readBlock(MDState $state): ?MDBlockNode {
  1739. $blockquoteLines = [];
  1740. $p = $state->p;
  1741. while ($p < sizeof($state->lines)) {
  1742. $line = $state->lines[$p++];
  1743. if (str_starts_with($line, ">")) {
  1744. array_push($blockquoteLines, $line);
  1745. } else {
  1746. break;
  1747. }
  1748. }
  1749. if (sizeof($blockquoteLines) == 0) return null;
  1750. $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '',
  1751. mb_substr($line, 1)), $blockquoteLines);
  1752. $substate = $state->copy($contentLines);
  1753. $quotedBlocks = $substate->readBlocks();
  1754. $state->p = $p;
  1755. return new MDBlockquoteNode($quotedBlocks);
  1756. }
  1757. }
  1758. /**
  1759. * Internal abstract base class for ordered and unordered lists.
  1760. */
  1761. class _MDListReader extends MDReader {
  1762. private static function readItemLines(MDState $state, int $firstLineStartPos): array {
  1763. $p = $state->p;
  1764. $lines = [];
  1765. $seenBlankLine = false;
  1766. $stripTrailingBlankLines = true;
  1767. while ($state->hasLines(1, $p)) {
  1768. $isFirstLine = ($p == $state->p);
  1769. $line = $state->lines[$p++];
  1770. if ($isFirstLine) {
  1771. $line = mb_substr($line, $firstLineStartPos);
  1772. }
  1773. if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
  1774. // Found next list item
  1775. $stripTrailingBlankLines = false; // because this signals extra spacing intended
  1776. break;
  1777. }
  1778. $isBlankLine = trim($line) == '';
  1779. $isIndented = mb_eregi('^\\s+\\S', $line);
  1780. if ($isBlankLine) {
  1781. $seenBlankLine = true;
  1782. } elseif (!$isIndented && $seenBlankLine) {
  1783. // Post-list content
  1784. break;
  1785. }
  1786. array_push($lines, $line);
  1787. }
  1788. $lines = MDUtils::withoutTrailingBlankLines($lines);
  1789. return MDUtils::stripIndent($lines);
  1790. }
  1791. protected function readListItemContent(MDState $state, int $firstLineStartPos): MDNode|array {
  1792. $itemLines = $this->readItemLines($state, $firstLineStartPos);
  1793. $state->p += max(sizeof($itemLines), 1);
  1794. if (sizeof($itemLines) == 1) {
  1795. return $state->inlineMarkdownToNodes($itemLines[0]);
  1796. }
  1797. $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
  1798. if ($hasBlankLines) {
  1799. $substate = $state->copy($itemLines);
  1800. return $substate->readBlocks();
  1801. }
  1802. // Multiline content with no blank lines. Search for new block
  1803. // boundaries without the benefit of a blank line to demarcate it.
  1804. for ($p = 1; $p < sizeof($itemLines); $p++) {
  1805. $line = $itemLines[$p];
  1806. if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
  1807. // Nested list found
  1808. $firstNodes = $state->inlineMarkdownToNodes(
  1809. implode("\n", array_slice($itemLines, 0, $p)));
  1810. $substate = $state->copy(array_slice($itemLines, $p));
  1811. $blocks = $substate->readBlocks();
  1812. return new MDBlockNode(array_merge($firstNodes, $blocks));
  1813. }
  1814. }
  1815. // Ok, give up and just do a standard block read
  1816. {
  1817. $substate = $state->copy($itemLines);
  1818. return $substate->readBlocks();
  1819. }
  1820. }
  1821. public function readBlock(MDState $state): ?MDBlockNode {
  1822. $className = MDUtils::typename($this);
  1823. throw new Error("Abstract readBlock must be overridden in {$className}");
  1824. }
  1825. }
  1826. /**
  1827. * Block reader for unordered (bulleted) lists.
  1828. */
  1829. class MDUnorderedListReader extends _MDListReader {
  1830. private const unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content
  1831. private function readUnorderedListItem(MDState $state): ?MDListItemNode {
  1832. if (!$state->hasLines(1)) return null;
  1833. $p = $state->p;
  1834. $line = $state->lines[$p];
  1835. if (!mb_eregi(self::unorderedListRegex, $line, $groups)) return null;
  1836. $firstLineOffset = mb_strlen($groups[1]);
  1837. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
  1838. }
  1839. public function readBlock(MDState $state): ?MDBlockNode {
  1840. $items = [];
  1841. $item = null;
  1842. do {
  1843. $item = $this->readUnorderedListItem($state);
  1844. if ($item) array_push($items, $item);
  1845. } while ($item);
  1846. if (sizeof($items) == 0) return null;
  1847. return new MDUnorderedListNode($items);
  1848. }
  1849. }
  1850. /**
  1851. * Block reader for ordered (numbered) lists. The number of the first item is
  1852. * used to begin counting. The subsequent items increase by 1, regardless of
  1853. * their value.
  1854. */
  1855. class MDOrderedListReader extends _MDListReader {
  1856. private const orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content
  1857. private function readOrderedListItem(MDState $state): ?MDListItemNode {
  1858. if (!$state->hasLines(1)) return null;
  1859. $p = $state->p;
  1860. $line = $state->lines[$p];
  1861. if (!mb_eregi(self::orderedListRegex, $line, $groups)) return null;
  1862. $ordinal = intval($groups[1]);
  1863. $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
  1864. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
  1865. }
  1866. public function readBlock(MDState $state): ?MDBlockNode {
  1867. $items = [];
  1868. $item = null;
  1869. do {
  1870. $item = $this->readOrderedListItem($state);
  1871. if ($item) array_push($items, $item);
  1872. } while ($item);
  1873. if (sizeof($items) == 0) return null;
  1874. return new MDOrderedListNode($items, $items[0]->ordinal);
  1875. }
  1876. }
  1877. /**
  1878. * Block reader for code blocks denoted by pairs of triple tickmarks. If
  1879. * a programming language name, _xyz_, immediately follows the backticks, a
  1880. * `language-xyz` CSS class will be added to the resulting `<code>`
  1881. * element.
  1882. *
  1883. * Supports `MDTagModifier` suffix.
  1884. */
  1885. class MDFencedCodeBlockReader extends MDReader {
  1886. public function readBlock(MDState $state): ?MDBlockNode {
  1887. if (!$state->hasLines(2)) return null;
  1888. $p = $state->p;
  1889. $openFenceLine = $state->lines[$p++];
  1890. [$openFenceLine, $modifier] = MDTagModifier::fromLine($openFenceLine, $state);
  1891. if (!mb_eregi('```\\s*([a-z0-9]*)\\s*$', $openFenceLine, $groups)) return null;
  1892. $language = $groups[1] !== false && mb_strlen($groups[1]) > 0 ? $groups[1] : null;
  1893. $codeLines = [];
  1894. while ($state->hasLines(1, $p)) {
  1895. $line = $state->lines[$p++];
  1896. if (trim($line) == '```') {
  1897. $state->p = $p;
  1898. $block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
  1899. if ($modifier) $modifier->applyTo($block);
  1900. return $block;
  1901. }
  1902. array_push($codeLines, $line);
  1903. }
  1904. return null;
  1905. }
  1906. }
  1907. /**
  1908. * Block reader for code blocks denoted by indenting text.
  1909. */
  1910. class MDIndentedCodeBlockReader extends MDReader {
  1911. public function readBlock(MDState $state): ?MDBlockNode {
  1912. $p = $state->p;
  1913. $codeLines = [];
  1914. while ($state->hasLines(1, $p)) {
  1915. $line = $state->lines[$p++];
  1916. if (MDUtils::countIndents($line, true) < 1) {
  1917. $p--;
  1918. break;
  1919. }
  1920. array_push($codeLines, MDUtils::stripIndent($line));
  1921. }
  1922. if (sizeof($codeLines) == 0) return null;
  1923. $state->p = $p;
  1924. return new MDCodeBlockNode(implode("\n", $codeLines));
  1925. }
  1926. }
  1927. /**
  1928. * Block reader for horizontal rules. Composed of three or more hypens or
  1929. * asterisks on a line by themselves, with or without intermediate whitespace.
  1930. */
  1931. class MDHorizontalRuleReader extends MDReader {
  1932. private const horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
  1933. public function readBlock(MDState $state): ?MDBlockNode {
  1934. $p = $state->p;
  1935. $line = $state->lines[$p++];
  1936. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1937. if (mb_eregi(self::horizontalRuleRegex, $line)) {
  1938. $state->p = $p;
  1939. $block = new MDHorizontalRuleNode();
  1940. if ($modifier) $modifier->applyTo($block);
  1941. return $block;
  1942. }
  1943. return null;
  1944. }
  1945. public function compareBlockOrdering(MDReader $other): int {
  1946. if ($other instanceof MDUnorderedListReader) {
  1947. return -1;
  1948. }
  1949. return 0;
  1950. }
  1951. }
  1952. /**
  1953. * Block reader for tables.
  1954. *
  1955. * Supports `MDTagModifier` suffix.
  1956. */
  1957. class MDTableReader extends MDReader {
  1958. /**
  1959. * If cell contents begin with `=`, treat entire contents as plaintext.
  1960. * Used by spreadsheet add-on to prevent equation operators from being
  1961. * interpreted as markdown.
  1962. * @type {boolean}
  1963. */
  1964. public bool $preferFormulas = false;
  1965. private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
  1966. if (!$state->hasLines(1)) return null;
  1967. $p = $state->p;
  1968. $line = MDTagModifier::strip(trim($state->lines[$p++]));
  1969. if (!mb_eregi('.*\\|.*', $line)) return null;
  1970. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1971. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1972. $cellTokens = explode('|', $line);
  1973. $cells = array_map(function($token) use ($state, $isHeader) {
  1974. $trimmedToken = trim($token);
  1975. if ($this->preferFormulas && strpos($trimmedToken, '=') !== false) {
  1976. $content = $this->preserveFormula($state, $trimmedToken);
  1977. if ($content === null) {
  1978. $content = $state->inlineMarkdownToNode($trimmedToken);
  1979. }
  1980. } else {
  1981. $content = $state->inlineMarkdownToNode($trimmedToken);
  1982. }
  1983. return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
  1984. }, $cellTokens);
  1985. $state->p = $p;
  1986. return new MDTableRowNode($cells);
  1987. }
  1988. /**
  1989. * @param MDState $state
  1990. * @param string $cellContents
  1991. * @return ?MDNode
  1992. */
  1993. private function preserveFormula(MDState $state, string $cellContents): ?MDNode {
  1994. // Up to three prefix punctuation patterns, formula, then three matching
  1995. // suffixes. Not guaranteed to catch every possible syntax but an awful lot.
  1996. // Using preg_match instead for... reasons.
  1997. $regex = '/^([^a-z0-9\\s]*)([^a-z0-9\\s]*)([^a-z0-9\\s]*)(=.*)\\3\\2\\1$/i';
  1998. if (!preg_match($regex, $cellContents, $groups)) {
  1999. return null;
  2000. }
  2001. $prefix = $groups[1] . $groups[2] . $groups[3];
  2002. $formula = $groups[4];
  2003. if ($prefix === '') {
  2004. return new MDTextNode($formula);
  2005. }
  2006. $suffix = $groups[3] . $groups[2] . $groups[1];
  2007. // Parse substitute markdown with the same prefix and suffix but just
  2008. // an "x" as content. We'll swap in the unaltered formula into the
  2009. // parsed nodes.
  2010. $tempInline = $prefix . 'x' . $suffix;
  2011. $tempNodes = $state->inlineMarkdownToNodes($tempInline);
  2012. if (count($tempNodes) != 1) return null;
  2013. $foundText = false;
  2014. if ($tempNodes[0] instanceof MDTextNode && $tempNodes[0]->text === 'x') {
  2015. $tempNodes[0]->text = $formula;
  2016. $foundText = true;
  2017. } else {
  2018. $tempNodes[0]->visitChildren(function($node) use ($formula, &$foundText) {
  2019. if ($node instanceof MDTextNode && $node->text === 'x') {
  2020. $node->text = $formula;
  2021. $foundText = true;
  2022. }
  2023. });
  2024. }
  2025. if (!$foundText) return null;
  2026. return $tempNodes[0];
  2027. }
  2028. /**
  2029. * @param string $line
  2030. * @return string[]
  2031. */
  2032. private function parseColumnAlignments(string $line): array {
  2033. $line = trim($line);
  2034. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  2035. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  2036. return array_map(function($token) {
  2037. if (str_starts_with($token, ':')) {
  2038. if (str_ends_with($token, ':')) {
  2039. return 'center';
  2040. }
  2041. return 'left';
  2042. } elseif (str_ends_with($token, ':')) {
  2043. return 'right';
  2044. }
  2045. return null;
  2046. }, mb_split('\\s*\\|\\s*', $line));
  2047. }
  2048. private const tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
  2049. public function readBlock(MDState $state): ?MDBlockNode {
  2050. if (!$state->hasLines(2)) return null;
  2051. $startP = $state->p;
  2052. $firstLine = $state->lines[$startP];
  2053. $modifier = MDTagModifier::fromLine($firstLine, $state)[1];
  2054. $headerRow = $this->readTableRow($state, true);
  2055. if ($headerRow === null) {
  2056. $state->p = $startP;
  2057. return null;
  2058. }
  2059. $dividerLine = $state->lines[$state->p++];
  2060. if (!mb_eregi(self::tableDividerRegex, $dividerLine, $dividerGroups)) {
  2061. $state->p = $startP;
  2062. return null;
  2063. }
  2064. $columnAlignments = $this->parseColumnAlignments($dividerLine);
  2065. $bodyRows = [];
  2066. while ($state->hasLines(1)) {
  2067. $row = $this->readTableRow($state, false);
  2068. if ($row === null) break;
  2069. array_push($bodyRows, $row);
  2070. }
  2071. $table = new MDTableNode($headerRow, $bodyRows);
  2072. $table->columnAlignments = $columnAlignments;
  2073. if ($modifier) $modifier->applyTo($table);
  2074. return $table;
  2075. }
  2076. }
  2077. /**
  2078. * Block reader for definition lists. Definitions go directly under terms starting
  2079. * with a colon.
  2080. */
  2081. class MDDefinitionListReader extends MDReader {
  2082. public function readBlock(MDState $state): ?MDBlockNode {
  2083. $p = $state->p;
  2084. $groups;
  2085. $termCount = 0;
  2086. $definitionCount = 0;
  2087. $defLines = [];
  2088. while ($state->hasLines(1, $p)) {
  2089. $line = $state->lines[$p++];
  2090. if (trim($line) === '') {
  2091. break;
  2092. }
  2093. if (mb_eregi('^\\s+', $line)) {
  2094. if (sizeof($defLines) == 0) return null;
  2095. $defLines[sizeof($defLines) - 1] .= "\n" . $line;
  2096. } elseif (mb_eregi('^:\\s+', $line)) {
  2097. array_push($defLines, $line);
  2098. $definitionCount++;
  2099. } else {
  2100. array_push($defLines, $line);
  2101. $termCount++;
  2102. }
  2103. }
  2104. if ($termCount == 0 || $definitionCount == 0) return null;
  2105. $blocks = array_map(function($line) use ($state) {
  2106. if (mb_eregi('^:\\s+(.*?)$', $line, $groups)) {
  2107. return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
  2108. } else {
  2109. return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
  2110. }
  2111. }, $defLines);
  2112. $state->p = $p;
  2113. return new MDDefinitionListNode($blocks);
  2114. }
  2115. }
  2116. /**
  2117. * Block reader for defining footnote contents. Footnotes can be defined anywhere
  2118. * in the document but will always be rendered at the end of a page or end of
  2119. * the document.
  2120. */
  2121. class MDFootnoteReader extends MDReader {
  2122. private const footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
  2123. private const footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
  2124. /**
  2125. * @param MDState $state
  2126. * @param string $symbol
  2127. * @param MDNode[] $footnote
  2128. */
  2129. private function defineFootnote(MDState $state, string $symbol, array $footnote) {
  2130. $footnotes = $state->root()->userInfo['footnotes'] ?? [];
  2131. $footnotes[$symbol] = $footnote;
  2132. $state->root()->userInfo['footnotes'] = $footnotes;
  2133. }
  2134. private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
  2135. $footnoteInstances = $state->root()->userInfo['footnoteInstances'];
  2136. $instances = $footnoteInstances[$symbol] ?? [];
  2137. array_push($instances, $unique);
  2138. $footnoteInstances[$symbol] = $instances;
  2139. $state->root()->userInfo['footnoteInstances'] = $footnoteInstances;
  2140. }
  2141. private function idForFootnoteSymbol(MDState $state, string $symbol): int {
  2142. $footnoteIds = $state->root()->userInfo['footnoteIds'] ?? [];
  2143. $existing = $footnoteIds[$symbol] ?? null;
  2144. if ($existing !== null) return $existing;
  2145. $nextFootnoteId = $state->root()->userInfo['nextFootnoteId'] ?? 1;
  2146. $id = $nextFootnoteId++;
  2147. $footnoteIds[$symbol] = $id;
  2148. $state->root()->userInfo['nextFootnoteId'] = $nextFootnoteId;
  2149. $state->root()->userInfo['footnoteIds'] = $footnoteIds;
  2150. return $id;
  2151. }
  2152. public function preProcess(MDState $state) {
  2153. $state->root()->userInfo['footnoteInstances'] = [];
  2154. $state->root()->userInfo['footnotes'] = [];
  2155. $state->root()->userInfo['footnoteIds'] = [];
  2156. $state->root()->userInfo['nextFootnoteId'] = 1;
  2157. }
  2158. public function readBlock(MDState $state): ?MDBlockNode {
  2159. $p = $state->p;
  2160. if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
  2161. $symbol = $groups[1];
  2162. $def = $groups[2];
  2163. while ($state->hasLines(1, $p)) {
  2164. $line = $state->lines[$p++];
  2165. if (mb_eregi('^\\s+', $line)) {
  2166. $def .= "\n" . $line;
  2167. } else {
  2168. $p--;
  2169. break;
  2170. }
  2171. }
  2172. $content = $state->inlineMarkdownToNodes($def);
  2173. $this->defineFootnote($state, $symbol, $content);
  2174. $state->p = $p;
  2175. return new MDBlockNode(); // empty
  2176. }
  2177. public function readToken(MDState $state, string $line): ?MDToken {
  2178. $groups;
  2179. if (mb_eregi(self::footnoteWithTitleRegex, $line, $groups)) {
  2180. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
  2181. }
  2182. if (mb_eregi(self::footnoteRegex, $line, $groups)) {
  2183. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
  2184. }
  2185. return null;
  2186. }
  2187. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2188. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
  2189. $symbol = $match->tokens[0]->content;
  2190. array_splice($tokens, $match->index, 1, [new MDFootnoteNode($symbol)]);
  2191. return true;
  2192. }
  2193. return false;
  2194. }
  2195. /**
  2196. * @param MDState $state
  2197. * @param MDBlockNode[] $blocks
  2198. */
  2199. public function postProcess(MDState $state, array &$blocks) {
  2200. $nextOccurrenceId = 1;
  2201. foreach ($blocks as $block) {
  2202. $block->visitChildren(function($node) use (&$nextOccurrenceId, $state) {
  2203. if (!($node instanceof MDFootnoteNode)) return;
  2204. $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
  2205. $node->occurrenceId = $nextOccurrenceId++;
  2206. $node->displaySymbol = strval($node->footnoteId);
  2207. $this->registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
  2208. });
  2209. }
  2210. if (sizeof($state->userInfo['footnotes']) == 0) return;
  2211. array_push($blocks, new MDFootnoteListNode());
  2212. }
  2213. public function compareBlockOrdering(MDReader $other): int {
  2214. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2215. return -1;
  2216. }
  2217. return 0;
  2218. }
  2219. public function compareTokenizeOrdering(MDReader $other): int {
  2220. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2221. return -1;
  2222. }
  2223. return 0;
  2224. }
  2225. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2226. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2227. return -1;
  2228. }
  2229. return 0;
  2230. }
  2231. }
  2232. /**
  2233. * Block reader for abbreviation definitions. Anywhere the abbreviation appears
  2234. * in plain text will have its definition available when hovering over it.
  2235. * Definitions can appear anywhere in the document. Their content should only
  2236. * contain simple text, not markdown.
  2237. */
  2238. class MDAbbreviationReader extends MDReader {
  2239. private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
  2240. $abbrevs = $state->root()->userInfo['abbreviations'];
  2241. $abbrevs[$abbreviation] = $definition;
  2242. $state->root()->userInfo['abbreviations'] = $abbrevs;
  2243. }
  2244. public function preProcess(MDState $state) {
  2245. $state->root()->userInfo['abbreviations'] = [];
  2246. }
  2247. public function readBlock(MDState $state): ?MDBlockNode {
  2248. $p = $state->p;
  2249. $line = $state->lines[$p++];
  2250. if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
  2251. $abbrev = $groups[1];
  2252. $def = $groups[2];
  2253. $this->defineAbbreviation($state, $abbrev, $def);
  2254. $state->p = $p;
  2255. return new MDBlockNode(); // empty
  2256. }
  2257. /**
  2258. * @param MDState $state
  2259. * @param MDNode[] $blocks
  2260. */
  2261. public function postProcess(MDState $state, array &$blocks) {
  2262. $abbreviations = $state->root()->userInfo['abbreviations'];
  2263. MDNode::replaceNodes($state, $blocks, function($original) use ($abbreviations) {
  2264. if (!($original instanceof MDTextNode)) return null;
  2265. $changed = false;
  2266. $elems = [ $original->text ]; // mix of strings and MDNodes
  2267. for ($i = 0; $i < sizeof($elems); $i++) {
  2268. $text = $elems[$i];
  2269. if (!is_string($text)) continue;
  2270. foreach ($abbreviations as $abbreviation => $definition) {
  2271. $index = strpos($text, $abbreviation);
  2272. if ($index === false) continue;
  2273. $prefix = substr($text, 0, $index);
  2274. $suffix = substr($text, $index + strlen($abbreviation));
  2275. array_splice($elems, $i, 1, [$prefix,
  2276. new MDAbbreviationNode($abbreviation, $definition),
  2277. $suffix]);
  2278. $i = -1; // start over
  2279. $changed = true;
  2280. break;
  2281. }
  2282. }
  2283. if (!$changed) return null;
  2284. $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem, $elems);
  2285. return new MDNode($nodes);
  2286. });
  2287. }
  2288. }
  2289. /**
  2290. * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
  2291. * whitespace-only) line. This reader is prioritized after every other reader
  2292. * since there is no distinguishing syntax.
  2293. */
  2294. class MDParagraphReader extends MDReader {
  2295. public function readBlock(MDState $state): ?MDBlockNode {
  2296. $paragraphLines = [];
  2297. $p = $state->p;
  2298. while ($state->hasLines(1, $p)) {
  2299. $line = $state->lines[$p++];
  2300. if (trim($line) === '') {
  2301. break;
  2302. }
  2303. array_push($paragraphLines, $line);
  2304. }
  2305. if ($state->p == 0 && $p >= sizeof($state->lines)) {
  2306. // If it's the entire document don't wrap it in a paragraph
  2307. return null;
  2308. }
  2309. if (sizeof($paragraphLines) > 0) {
  2310. $state->p = $p;
  2311. $content = implode("\n", $paragraphLines);
  2312. return new MDParagraphNode($state->inlineMarkdownToNodes($content));
  2313. }
  2314. return null;
  2315. }
  2316. public function compareBlockOrdering(MDReader $other): int {
  2317. return 1; // always dead last
  2318. }
  2319. }
  2320. /**
  2321. * Abstract base class for readers that look for one or two delimiting tokens
  2322. * on either side of some content. E.g. `**strong**`.
  2323. */
  2324. class MDSimplePairInlineReader extends MDReader {
  2325. // Passes:
  2326. // 1. Syntaxes with two delimiting tokens, interior tokens of the same
  2327. // kind must be even in number
  2328. // 2. Syntaxes with one delimiting token, interior tokens of the same
  2329. // kind must be even in number
  2330. // 3. Syntaxes with two delimiting tokens, any tokens inside
  2331. // 4. Syntaxes with one delimiting token, any tokens inside
  2332. public function substitutionPassCount(): int { return 4; }
  2333. /**
  2334. * Attempts a substitution of a matched pair of delimiting token types.
  2335. * If successful, the substitution is performed on `$tokens` and `true` is
  2336. * returned, otherwise `false` is returned and the array is untouched.
  2337. *
  2338. * If `this->substitutionPassCount()` is greater than 1, the first pass
  2339. * will reject matches with the delimiting character inside the content
  2340. * tokens. If the reader uses a single pass or a subsequent pass is performed
  2341. * with multiple pass any contents will be accepted.
  2342. *
  2343. * @param MDState $state
  2344. * @param int $pass pass number, starting with `1`
  2345. * @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on
  2346. * @param string $nodeClass class of the node to return if matched
  2347. * @param MDTokenType $delimiter delimiting token
  2348. * @param int $count how many times the token is repeated to form the delimiter
  2349. * @param bool $plaintext whether to create `$nodeClass` with a verbatim
  2350. * content string instead of parsed `MDNode`s
  2351. * @return bool `true` if substitution was performed, `false` if not
  2352. */
  2353. public function attemptPair(MDState $state, int $pass, array &$tokens,
  2354. string $nodeClass, MDTokenType $delimiter, int $count=1,
  2355. bool $plaintext=false): bool {
  2356. // We do four passes. #1: doubles without inner tokens, #2: singles
  2357. // without inner tokens, #3: doubles with paired inner tokens,
  2358. // #4: singles with paired inner tokens
  2359. if ($count == 1 && $pass != 2 && $pass != 4) return false;
  2360. if ($count > 1 && $pass != 1 && $pass != 3) return false;
  2361. $delimiters = array_fill(0, $count, $delimiter);
  2362. $isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1;
  2363. $match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters,
  2364. function($content) use ($nodeClass, $isFirstOfMultiplePasses, $delimiter) {
  2365. $firstType = $content[0] instanceof MDToken ? $content[0]->type : null;
  2366. $lastType = $content[sizeof($content) - 1] instanceof MDToken ?
  2367. $content[sizeof($content) - 1]->type : null;
  2368. if ($firstType == MDTokenType::Whitespace) return false;
  2369. if ($lastType == MDTokenType::Whitespace) return false;
  2370. foreach ($content as $token) {
  2371. // Don't allow nesting
  2372. if (MDUtils::typename($token) == $nodeClass) return false;
  2373. }
  2374. if ($isFirstOfMultiplePasses) {
  2375. $innerCount = 0;
  2376. foreach ($content as $token) {
  2377. if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++;
  2378. }
  2379. if (($innerCount % 2) != 0) return false;
  2380. }
  2381. return true;
  2382. });
  2383. if ($match === null) return false;
  2384. $state->checkExecutionTime();
  2385. if ($plaintext) {
  2386. $content = implode('', array_map(fn($token) => $token instanceof MDToken ?
  2387. $token->original : $token->toPlaintext($state), $match->contentTokens));
  2388. } else {
  2389. $content = $state->tokensToNodes($match->contentTokens);
  2390. }
  2391. $ref = new ReflectionClass($nodeClass);
  2392. $node = $ref->newInstanceArgs([ $content ]);
  2393. array_splice($tokens, $match->startIndex, $match->totalLength, [$node]);
  2394. return true;
  2395. }
  2396. private static $firstTime = null;
  2397. }
  2398. /**
  2399. * Reader for emphasis syntax. Denoted with a single underscore on either side of
  2400. * some text (preferred) or a single asterisk on either side.
  2401. */
  2402. class MDEmphasisReader extends MDSimplePairInlineReader {
  2403. public function readToken(MDState $state, string $line): ?MDToken {
  2404. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2405. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2406. return null;
  2407. }
  2408. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2409. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true;
  2410. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true;
  2411. return false;
  2412. }
  2413. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2414. if ($other instanceof MDStrongReader) {
  2415. return 1;
  2416. }
  2417. return 0;
  2418. }
  2419. }
  2420. /**
  2421. * Reader for strong syntax. Denoted with two asterisks on either side of some
  2422. * text (preferred) or two underscores on either side. Note that if
  2423. * `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
  2424. */
  2425. class MDStrongReader extends MDSimplePairInlineReader {
  2426. public function readToken(MDState $state, string $line): ?MDToken {
  2427. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2428. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2429. return null;
  2430. }
  2431. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2432. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true;
  2433. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true;
  2434. return false;
  2435. }
  2436. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2437. if ($other instanceof MDEmphasisReader) {
  2438. return -1;
  2439. }
  2440. return 0;
  2441. }
  2442. }
  2443. /**
  2444. * Reader for strikethrough syntax. Consists of two tildes on either side of
  2445. * some text (preferred) or single tildes on either side. Note that if
  2446. * `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
  2447. *
  2448. * The number of recognized tildes can be configured.
  2449. */
  2450. class MDStrikethroughReader extends MDSimplePairInlineReader {
  2451. public bool $singleTildeEnabled = true;
  2452. public bool $doubleTildeEnabled = true;
  2453. public function readToken(MDState $state, string $line): ?MDToken {
  2454. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2455. return null;
  2456. }
  2457. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2458. if ($this->singleTildeEnabled) {
  2459. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true;
  2460. }
  2461. if ($this->doubleTildeEnabled) {
  2462. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true;
  2463. }
  2464. return false;
  2465. }
  2466. }
  2467. /**
  2468. * Reader for underline syntax. Consists of two underscores on either side of
  2469. * some text. If used with `MDStrongReader` which also looks for double
  2470. * underscores, this reader will take priority.
  2471. */
  2472. class MDUnderlineReader extends MDSimplePairInlineReader {
  2473. public function readToken(MDState $state, string $line): ?MDToken {
  2474. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2475. return null;
  2476. }
  2477. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2478. return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2);
  2479. }
  2480. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2481. if ($other instanceof MDStrongReader) {
  2482. return -1;
  2483. }
  2484. return 0;
  2485. }
  2486. }
  2487. /**
  2488. * Reader for highlight syntax. Consists of pairs of equal signs on either side
  2489. * of some text.
  2490. */
  2491. class MDHighlightReader extends MDSimplePairInlineReader {
  2492. public function readToken(MDState $state, string $line): ?MDToken {
  2493. if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal);
  2494. return null;
  2495. }
  2496. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2497. return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2);
  2498. }
  2499. }
  2500. /**
  2501. * Reader for inline code syntax. Consists of one or two delimiting backticks
  2502. * around text. The contents between the backticks will be rendered verbatim,
  2503. * ignoring any inner markdown syntax. To include a backtick inside, escape it
  2504. * with a backslash.
  2505. */
  2506. class MDCodeSpanReader extends MDSimplePairInlineReader {
  2507. public function readToken(MDState $state, string $line): ?MDToken {
  2508. if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick);
  2509. return null;
  2510. }
  2511. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2512. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true;
  2513. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true;
  2514. return false;
  2515. }
  2516. }
  2517. /**
  2518. * Reader for subscript syntax. Consists of single tildes on either side of
  2519. * some text. If used with `MDStrikethroughReader`, this reader will take
  2520. * precedence, and strikethrough can only be done with double tildes.
  2521. */
  2522. class MDSubscriptReader extends MDSimplePairInlineReader {
  2523. public function readToken(MDState $state, string $line): ?MDToken {
  2524. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2525. return null;
  2526. }
  2527. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2528. return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde);
  2529. }
  2530. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2531. if ($other instanceof MDStrikethroughReader) {
  2532. return -1;
  2533. }
  2534. return 0;
  2535. }
  2536. }
  2537. /**
  2538. * Reader for superscript syntax. Consists of single caret characters on either
  2539. * side of some text.
  2540. */
  2541. class MDSuperscriptReader extends MDSimplePairInlineReader {
  2542. public function readToken(MDState $state, string $line): ?MDToken {
  2543. if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret);
  2544. return null;
  2545. }
  2546. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2547. return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret);
  2548. }
  2549. }
  2550. /**
  2551. * Reads a hypertext link. Consists of link text between square brackets
  2552. * followed immediately by a URL in parentheses.
  2553. */
  2554. class MDLinkReader extends MDReader {
  2555. public function readToken(MDState $state, string $line): ?MDToken {
  2556. $simpleEmailRegex = "^<(" . MDUtils::baseEmailRegex . ")>";
  2557. $simpleURLRegex = "^<(" . MDUtils::baseURLRegex . ")>";
  2558. if ($groups = MDToken::tokenizeLabel($line)) {
  2559. return new MDToken($groups[0], MDTokenType::Label, $groups[1]);
  2560. }
  2561. if ($groups = MDToken::tokenizeEmail($line)) {
  2562. return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]);
  2563. }
  2564. if ($groups = MDToken::tokenizeURL($line)) {
  2565. return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]);
  2566. }
  2567. if (mb_eregi($simpleEmailRegex, $line, $groups)) {
  2568. return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]);
  2569. }
  2570. if (mb_eregi($simpleURLRegex, $line, $groups)) {
  2571. return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]);
  2572. }
  2573. return null;
  2574. }
  2575. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2576. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
  2577. MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2578. $text = $match->tokens[0]->content;
  2579. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2580. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2581. array_splice($tokens, $match->index, sizeof($match->tokens),
  2582. [new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title)]);
  2583. return true;
  2584. }
  2585. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
  2586. MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) {
  2587. $text = $match->tokens[0]->content;
  2588. $email = $match->tokens[sizeof($match->tokens) - 1]->content;
  2589. $url = "mailto:{$email}";
  2590. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2591. array_splice($tokens, $match->index, sizeof($match->tokens),
  2592. [new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title)]);
  2593. return true;
  2594. }
  2595. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) {
  2596. $token = $match->tokens[0];
  2597. $link = "mailto:{$token->content}";
  2598. $node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content));
  2599. array_splice($tokens, $match->index, 1, [$node]);
  2600. return true;
  2601. }
  2602. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) {
  2603. $token = $match->tokens[0];
  2604. $link = $token->content;
  2605. $node = new MDLinkNode($link, new MDTextNode($link));
  2606. array_splice($tokens, $match->index, 1, [$node]);
  2607. return true;
  2608. }
  2609. return false;
  2610. }
  2611. }
  2612. /**
  2613. * Reader for referential URL definitions. Consists of link text between square
  2614. * brackets followed immediately by a reference symbol also in square brackets.
  2615. * The URL can be defined elsewhere on a line by itself with the symbol in square
  2616. * brackets, colon, and the URL (and optional title in quotes).
  2617. */
  2618. class MDReferencedLinkReader extends MDLinkReader {
  2619. public function readBlock(MDState $state): ?MDBlockNode {
  2620. $p = $state->p;
  2621. $line = $state->lines[$p++];
  2622. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) {
  2623. $symbol = $groups[1];
  2624. $url = $groups[2];
  2625. $title = $groups[3];
  2626. } else {
  2627. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) {
  2628. $symbol = $groups[1];
  2629. $url = $groups[2];
  2630. $title = null;
  2631. } else {
  2632. return null;
  2633. }
  2634. }
  2635. $state->defineURL($symbol, $url, $title);
  2636. $state->p = $p;
  2637. return new MDBlockNode([]); // empty
  2638. }
  2639. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2640. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
  2641. MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2642. $text = $match->tokens[0]->content;
  2643. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2644. array_splice($tokens, $match->index, sizeof($match->tokens),
  2645. [new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text))]);
  2646. return true;
  2647. }
  2648. return false;
  2649. }
  2650. }
  2651. /**
  2652. * Reader for images. Consists of an exclamation, alt text in square brackets,
  2653. * and image URL in parentheses.
  2654. */
  2655. class MDImageReader extends MDLinkReader {
  2656. public function readToken(MDState $state, string $line): ?MDToken {
  2657. $s = parent::readToken($state, $line);
  2658. if ($s) return $s;
  2659. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2660. return null;
  2661. }
  2662. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2663. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang,
  2664. MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2665. $alt = $match->tokens[1]->content;
  2666. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2667. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2668. $node = new MDImageNode($url, $alt);
  2669. if ($title !== null) {
  2670. $node->attributes['title'] = $title;
  2671. }
  2672. array_splice($tokens, $match->index, sizeof($match->tokens), [$node]);
  2673. return true;
  2674. }
  2675. return false;
  2676. }
  2677. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2678. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2679. return -1;
  2680. }
  2681. return 0;
  2682. }
  2683. }
  2684. /**
  2685. * Reader for images with referential URL definitions. Consists of an
  2686. * exclamation, alt text in square brackets, and link symbol in square brackets.
  2687. * URL is defined the same as for `MDReferencedLinkReader`.
  2688. */
  2689. class MDReferencedImageReader extends MDReferencedLinkReader {
  2690. public function readToken(MDState $state, string $line): ?MDToken {
  2691. $s = parent::readToken($state, $line);
  2692. if ($s) return $s;
  2693. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2694. return null;
  2695. }
  2696. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2697. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang,
  2698. MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2699. $alt = $match->tokens[1]->content;
  2700. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2701. array_splice($tokens, $match->index, sizeof($match->tokens),
  2702. [new MDReferencedImageNode($ref, $alt)]);
  2703. return true;
  2704. }
  2705. return false;
  2706. }
  2707. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2708. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2709. return -1;
  2710. }
  2711. return 0;
  2712. }
  2713. }
  2714. /**
  2715. * Converts line breaks within blocks into line breaks in the HTML. Not
  2716. * included in any of the default reader sets since most flavors ignore
  2717. * line breaks within blocks.
  2718. */
  2719. class MDLineBreakReader extends MDReader {
  2720. public function postProcess(MDState $state, array &$blocks) {
  2721. MDNode::replaceNodes($state, $blocks, function(MDNode $original) {
  2722. if (!($original instanceof MDTextNode)) return null;
  2723. $lines = explode("\n", $original->text);
  2724. if (sizeof($lines) == 1) return null;
  2725. $nodes = [];
  2726. foreach ($lines as $i => $line) {
  2727. if ($i > 0) {
  2728. array_push($nodes, new MDLineBreakNode());
  2729. }
  2730. array_push($nodes, new MDTextNode($line));
  2731. }
  2732. return new MDNode($nodes);
  2733. });
  2734. }
  2735. }
  2736. /**
  2737. * Reads a verbatim HTML tag, and if it passes validation by `MDState->$tagFilter`,
  2738. * will be rendered in the final HTML document. Disallowed tags will be rendered
  2739. * as plain text in the resulting document.
  2740. */
  2741. class MDHTMLTagReader extends MDReader {
  2742. public function readToken(MDState $state, string $line): ?MDToken {
  2743. $tag = MDHTMLTag::fromLineStart($line, $state);
  2744. if ($tag === null) return null;
  2745. if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null;
  2746. $state->root()->tagFilter->scrubTag($tag);
  2747. return new MDToken($tag->original, MDTokenType::HTMLTag, $tag);
  2748. }
  2749. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2750. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) {
  2751. $tag = $match->tokens[0]->tag;
  2752. array_splice($tokens, $match->index, 1, [new MDHTMLTagNode($tag)]);
  2753. return true;
  2754. }
  2755. return false;
  2756. }
  2757. }
  2758. /**
  2759. * Reads tag modifiers. Consists of curly braces with one or more CSS classes,
  2760. * IDs, or custom attributes separated by spaces to apply to the preceding
  2761. * node. Validation is performed on modifiers and only acceptable values are
  2762. * applied.
  2763. */
  2764. class MDModifierReader extends MDReader {
  2765. public function readToken(MDState $state, string $line): ?MDToken {
  2766. $modifier = MDTagModifier::fromStart($line);
  2767. if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier);
  2768. return null;
  2769. }
  2770. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2771. // Modifiers are applied elsewhere, and if they're not it's fine if they're
  2772. // rendered as the original syntax.
  2773. return false;
  2774. }
  2775. }
  2776. // -- Nodes -----------------------------------------------------------------
  2777. /**
  2778. * Base class for nodes in the assembled document tree.
  2779. */
  2780. class MDNode {
  2781. /**
  2782. * Array of CSS classes to add to the node when rendered as HTML.
  2783. * @var string[]
  2784. */
  2785. public array $cssClasses = [];
  2786. public ?string $cssId = null;
  2787. /**
  2788. * Mapping of CSS attributes to values.
  2789. * @var string[]
  2790. */
  2791. public array $cssStyles = [];
  2792. /**
  2793. * Mapping of arbitrary attributes and values to add to this node's top-level
  2794. * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
  2795. * `$cssClasses`, `$cssId`, and `$cssStyles` instead.
  2796. * @var array
  2797. */
  2798. public array $attributes = [];
  2799. /**
  2800. * All child nodes in this node.
  2801. * @var MDNode[]
  2802. */
  2803. public array $children = [];
  2804. /**
  2805. * @param MDNode|MDNode[] $children
  2806. */
  2807. public function __construct(MDNode|array $children=[]) {
  2808. if (is_array($children)) {
  2809. foreach ($children as $elem) {
  2810. if (!($elem instanceof MDNode)) {
  2811. $thisClassName = MDUtils::typename($this);
  2812. $elemClassName = MDUtils::typename($elem);
  2813. throw new Error("{$thisClassName} expects children of type " .
  2814. "MDNode[] or MDNode, got array with {$elemClassName} element");
  2815. }
  2816. }
  2817. $this->children = $children;
  2818. } elseif ($children instanceof MDNode) {
  2819. $this->children = [ $children ];
  2820. } else {
  2821. $thisClassName = MDUtils::typename($this);
  2822. $elemClassName = MDUtils::typename($children);
  2823. throw new Error("{$thisClassName} expects children of type MDNode[] " .
  2824. "or MDNode, got {$elemClassName}");
  2825. }
  2826. }
  2827. public function __toString(): string {
  2828. $s = "<" . get_class($this);
  2829. foreach ($this->children as $child) {
  2830. $s .= " {$child}";
  2831. }
  2832. $s .= ">";
  2833. return $s;
  2834. }
  2835. /**
  2836. * Adds a CSS class. If already present it will not be duplicated.
  2837. */
  2838. public function addClass(string $cssClass): bool {
  2839. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  2840. array_push($this->cssClasses, $cssClass);
  2841. return true;
  2842. }
  2843. /**
  2844. * Removes a CSS class.
  2845. *
  2846. * @param string $cssClass
  2847. * @return bool whether the class was present and removed
  2848. */
  2849. public function removeClass(string $cssClass): bool {
  2850. $beforeLength = sizeof($this->cssClasses);
  2851. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  2852. return sizeof($this->cssClasses) != $beforeLength;
  2853. }
  2854. /**
  2855. * Renders this node and any children as an HTML string. If the node has no
  2856. * content an empty string should be returned.
  2857. */
  2858. public function toHTML(MDState $state): string {
  2859. return MDNode::arrayToHTML($this->children, $state);
  2860. }
  2861. /**
  2862. * Renders this node and any children as a plain text string. The conversion
  2863. * should only render ordinary text, not attempt markdown-like formatting
  2864. * (e.g. list items should not be prefixed with asterisks, only have their
  2865. * content text returned). If the node has no renderable content an empty
  2866. * string should be returned.
  2867. */
  2868. public function toPlaintext(MDState $state): string {
  2869. return MDNode::arrayToPlaintext($this->children, $state);
  2870. }
  2871. /**
  2872. * Protected helper method that renders an HTML fragment of the attributes
  2873. * to apply to the root HTML tag representation of this node.
  2874. *
  2875. * Example result with a couple `$cssClasses`, a `$cssId`, and a custom
  2876. * `$attributes` key-value pair:
  2877. *
  2878. * ```
  2879. * class="foo bar" id="baz" lang="en"
  2880. * ```
  2881. *
  2882. * The value includes a leading space if it's non-empty so that it can be
  2883. * concatenated directly after the tag name and before the closing `>`.
  2884. */
  2885. protected function htmlAttributes(): string {
  2886. $html = '';
  2887. if (sizeof($this->cssClasses) > 0) {
  2888. $classlist = MDUtils::escapeHTML(implode(' ', $this->cssClasses));
  2889. $html .= " class=\"{$classlist}\"";
  2890. }
  2891. if ($this->cssId !== null && mb_strlen($this->cssId) > 0) {
  2892. $html .= " id=\"" . MDUtils::escapeHTML($this->cssId) . "\"";
  2893. }
  2894. $styles = [];
  2895. foreach ($this->cssStyles as $key => $value) {
  2896. array_push($styles, "{$key}: {$value};");
  2897. }
  2898. if (sizeof($styles) > 0) {
  2899. $escaped = MDUtils::escapeHTML(implode(' ', $styles));
  2900. $html .= " style=\"{$escaped}\"";
  2901. }
  2902. foreach ($this->attributes as $key => $value) {
  2903. if ($key === 'class' || $key === 'id' || $key === 'style') continue;
  2904. $cleanKey = MDUtils::scrubAttributeName($key);
  2905. if (mb_strlen($cleanKey) == 0) continue;
  2906. $cleanValue = MDUtils::escapeHTML($value);
  2907. $html .= " {$cleanKey}=\"{$cleanValue}\"";
  2908. }
  2909. return $html;
  2910. }
  2911. /**
  2912. * Protected helper that renders and concatenates the HTML of all children
  2913. * of this node. Mostly for use by subclasses in their `toHTML`
  2914. * implementations.
  2915. */
  2916. protected function childHTML(MDState $state): string {
  2917. return MDNode::arrayToHTML($this->children, $state);
  2918. }
  2919. /**
  2920. * Protected helper that renders and concatenates the plaintext of all
  2921. * children of this node.
  2922. */
  2923. protected function childPlaintext(MDState $state): string {
  2924. return MDNode::arrayToPlaintext($this->children, $state);
  2925. }
  2926. /**
  2927. * Protected helper for rendering nodes represented by simple paired HTML
  2928. * tags. Custom CSS classes and attributes will be included in the result,
  2929. * and child content will be rendered between the tags.
  2930. */
  2931. protected function simplePairedTagHTML(MDState $state, string $tagName): string {
  2932. $openTagSuffix = ($this->children[0] ?? null) instanceof MDBlockNode ? "\n" : "";
  2933. $closeTagPrefix = ($this->children[sizeof($this->children) - 1] ?? null) instanceof MDBlockNode ? "\n" : '';
  2934. $closeTagSuffix = $this instanceof MDBlockNode ? "\n" : '';
  2935. $attr = $this->htmlAttributes();
  2936. $childHTML = $this->childHTML($state);
  2937. return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}</{$tagName}>{$closeTagSuffix}";
  2938. }
  2939. /**
  2940. * Calls the given callback function with every child node, recursively.
  2941. * Nodes are visited depth-first.
  2942. */
  2943. public function visitChildren(callable $fn) {
  2944. foreach ($this->children as $child) {
  2945. $fn($child);
  2946. $child->visitChildren($fn);
  2947. }
  2948. }
  2949. /**
  2950. * Helper for rendering and concatenating HTML from an array of `MDNode`s.
  2951. *
  2952. * @param MDNode[] $nodes
  2953. * @param MDState $state
  2954. * @return string HTML string
  2955. */
  2956. public static function arrayToHTML(array $nodes, MDState $state): string {
  2957. return implode('', array_map(fn($node) => $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : ''), $nodes));
  2958. }
  2959. /**
  2960. * Helper for rendering and concatenating plaintext from an array of `MDNode`s.
  2961. *
  2962. * @param MDNode[] $nodes
  2963. * @param MDState $state
  2964. * @return string plaintext
  2965. */
  2966. public static function arrayToPlaintext(array $nodes, MDState $state): string {
  2967. return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes));
  2968. }
  2969. /**
  2970. * Recursively searches and replaces nodes in a tree. The given `$replacer`
  2971. * is passed every node in the tree. If `$replacer` returns a new `MDNode`
  2972. * the original will be replaced with it. If the function returns `null` no
  2973. * change will be made to that node. Traversal is depth-first.
  2974. *
  2975. * @param MDState $state
  2976. * @param MDNode[] $nodes
  2977. * @param callable $replacer takes a node as an argument, returns either
  2978. * a new node or `null` to leave it unchanged
  2979. */
  2980. public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) {
  2981. for ($i = 0; $i < sizeof($nodes); $i++) {
  2982. $originalNode = $nodes[$i];
  2983. $replacement = $replacer($originalNode);
  2984. if ($replacement instanceof MDNode) {
  2985. array_splice($nodes, $i, 1, [$replacement]);
  2986. } else {
  2987. self::replaceNodes($state, $originalNode->children, $replacer);
  2988. }
  2989. }
  2990. }
  2991. }
  2992. /**
  2993. * Marker subclass that indicates a node represents block syntax.
  2994. */
  2995. class MDBlockNode extends MDNode {}
  2996. /**
  2997. * Paragraph block.
  2998. */
  2999. class MDParagraphNode extends MDBlockNode {
  3000. public function toHTML(MDState $state): string {
  3001. return $this->simplePairedTagHTML($state, 'p');
  3002. }
  3003. }
  3004. /**
  3005. * A heading block with a level from 1 to 6.
  3006. */
  3007. class MDHeadingNode extends MDBlockNode {
  3008. public int $level;
  3009. /**
  3010. * @param int $level
  3011. * @param MDNode|MDNode[] $children
  3012. */
  3013. public function __construct(int $level, MDNode|array $children) {
  3014. parent::__construct($children);
  3015. if (!is_int($level) || ($level < 1 || $level > 6)) {
  3016. $thisClassName = MDUtils::typename($this);
  3017. throw new Error("{$thisClassName} requires heading level 1 to 6");
  3018. }
  3019. $this->level = $level;
  3020. }
  3021. public function toHTML(MDState $state): string {
  3022. return $this->simplePairedTagHTML($state, "h{$this->level}");
  3023. }
  3024. }
  3025. /**
  3026. * A sub-text block with smaller, less prominent text.
  3027. */
  3028. class MDSubtextNode extends MDBlockNode {
  3029. public function toHTML(MDState $state): string {
  3030. $this->addClass('subtext');
  3031. return $this->simplePairedTagHTML($state, 'div');
  3032. }
  3033. }
  3034. /**
  3035. * Node for a horizontal dividing line.
  3036. */
  3037. class MDHorizontalRuleNode extends MDBlockNode {
  3038. public function toHTML(MDState $state): string {
  3039. return "<hr" . $this->htmlAttributes() . ">";
  3040. }
  3041. }
  3042. /**
  3043. * A block quote, usually rendered indented from other text.
  3044. */
  3045. class MDBlockquoteNode extends MDBlockNode {
  3046. public function toHTML(MDState $state): string {
  3047. return $this->simplePairedTagHTML($state, 'blockquote');
  3048. }
  3049. }
  3050. /**
  3051. * A bulleted list. Contains `MDListItemNode` children.
  3052. */
  3053. class MDUnorderedListNode extends MDBlockNode {
  3054. /** @var MDListItemNode[] $children */
  3055. public function toHTML(MDState $state): string {
  3056. return $this->simplePairedTagHTML($state, 'ul');
  3057. }
  3058. }
  3059. /**
  3060. * A numbered list. Contains `MDListItemNode` children.
  3061. */
  3062. class MDOrderedListNode extends MDBlockNode {
  3063. /** @var MDListItemNode[] $children */
  3064. public ?int $startOrdinal;
  3065. /**
  3066. * @param MDListItemNode[] $children
  3067. * @param ?int $startOrdinal
  3068. */
  3069. public function __construct(array $children, ?int $startOrdinal=null) {
  3070. parent::__construct($children);
  3071. $this->startOrdinal = $startOrdinal;
  3072. }
  3073. public function toHTML(MDState $state): string {
  3074. if ($this->startOrdinal !== null && $this->startOrdinal != 1) {
  3075. $this->attributes['start'] = strval($this->startOrdinal);
  3076. }
  3077. return $this->simplePairedTagHTML($state, 'ol');
  3078. }
  3079. }
  3080. /**
  3081. * An item in a bulleted or numbered list.
  3082. */
  3083. class MDListItemNode extends MDBlockNode {
  3084. public ?int $ordinal;
  3085. /**
  3086. * @param MDNode|MDNode[] $children
  3087. * @param ?int $ordinal
  3088. */
  3089. public function __construct(MDNode|array $children, ?int $ordinal=null) {
  3090. parent::__construct($children);
  3091. $this->ordinal = $ordinal;
  3092. }
  3093. public function toHTML(MDState $state): string {
  3094. return $this->simplePairedTagHTML($state, 'li');
  3095. }
  3096. }
  3097. /**
  3098. * A block of preformatted computer code. Inner markdown is ignored.
  3099. */
  3100. class MDCodeBlockNode extends MDBlockNode {
  3101. public string $text;
  3102. /**
  3103. * The programming language of the content.
  3104. */
  3105. public ?string $language;
  3106. public function __construct(string $text, ?string $language=null) {
  3107. parent::__construct([]);
  3108. $this->text = $text;
  3109. $this->language = $language;
  3110. }
  3111. public function toHTML(MDState $state): string {
  3112. $languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : '';
  3113. return "<pre" . $this->htmlAttributes() . "><code{$languageModifier}>" .
  3114. MDUtils::escapeHTML($this->text) . "</code></pre>\n";
  3115. }
  3116. }
  3117. /**
  3118. * A table node with a single header row and any number of body rows.
  3119. */
  3120. class MDTableNode extends MDBlockNode {
  3121. /** @var MDTableRowNode[] $children */
  3122. public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; }
  3123. public function bodyRows(): array { return array_slice($this->children, 1); }
  3124. /**
  3125. * How to align each column. Columns beyond the length of the array or with
  3126. * corresponding `null` elements will have no alignment set. Values should
  3127. * be valid CSS `text-align` values.
  3128. *
  3129. * @var string[]
  3130. */
  3131. public array $columnAlignments = [];
  3132. /**
  3133. * @param MDTableRowNode $headerRow
  3134. * @param MDTableRowNode[] $bodyRows
  3135. */
  3136. public function __construct(MDTableRowNode $headerRow, array $bodyRows) {
  3137. parent::__construct(array_merge([ $headerRow ], $bodyRows));
  3138. }
  3139. /**
  3140. * Returns a given body cell.
  3141. *
  3142. * @param {number} column
  3143. * @param {number} row
  3144. * @returns {MDTableCellNode|null} cell or `null` if out of bounds
  3145. */
  3146. public function bodyCellAt(int $column, int $row): ?MDTableCellNode {
  3147. $rowNode = $this->bodyRows()[$row] ?? null;
  3148. if ($rowNode === null) return null;
  3149. $cellNode = $rowNode->children[$column] ?? null;
  3150. return ($cellNode === null) ? null : $cellNode;
  3151. }
  3152. public function applyAlignments() {
  3153. foreach ($this->children as $child) {
  3154. $this->applyAlignmentsToRow($child);
  3155. }
  3156. }
  3157. private function applyAlignmentsToRow(MDTableRowNode $row) {
  3158. foreach ($row->children as $columnIndex => $cell) {
  3159. $alignment = $this->columnAlignments[$columnIndex] ?? null;
  3160. $this->applyAlignmentToCell($cell, $alignment);
  3161. }
  3162. }
  3163. public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) {
  3164. if ($alignment) {
  3165. $cell->cssStyles['text-align'] = $alignment;
  3166. } else {
  3167. unset($cell->cssStyles['text-align']);
  3168. }
  3169. }
  3170. public function toHTML(MDState $state): string {
  3171. $this->applyAlignments();
  3172. $html = '';
  3173. $html .= "<table" . $this->htmlAttributes() . ">\n";
  3174. $html .= "<thead>\n";
  3175. $html .= $this->headerRow()->toHTML($state) . "\n";
  3176. $html .= "</thead>\n";
  3177. $html .= "<tbody>\n";
  3178. $html .= MDNode::arrayToHTML($this->bodyRows(), $state) . "\n";
  3179. $html .= "</tbody>\n";
  3180. $html .= "</table>\n";
  3181. return $html;
  3182. }
  3183. }
  3184. /**
  3185. * Node for one row (header or body) in a table.
  3186. */
  3187. class MDTableRowNode extends MDBlockNode {
  3188. /** @var MDTableCellNode[] $children */
  3189. public function toHTML(MDState $state): string {
  3190. return $this->simplePairedTagHTML($state, 'tr');
  3191. }
  3192. }
  3193. /**
  3194. * Node for one cell in a table row.
  3195. */
  3196. class MDTableCellNode extends MDBlockNode {
  3197. public function toHTML(MDState $state): string {
  3198. return $this->simplePairedTagHTML($state, 'td');
  3199. }
  3200. }
  3201. /**
  3202. * Node for a header cell in a header table row.
  3203. */
  3204. class MDTableHeaderCellNode extends MDTableCellNode {
  3205. public function toHTML(MDState $state): string {
  3206. return $this->simplePairedTagHTML($state, 'th');
  3207. }
  3208. }
  3209. /**
  3210. * Definition list with `MDDefinitionListTermNode` and
  3211. * `MDDefinitionListDefinitionNode` children.
  3212. */
  3213. class MDDefinitionListNode extends MDBlockNode {
  3214. public function toHTML(MDState $state): string {
  3215. return $this->simplePairedTagHTML($state, 'dl');
  3216. }
  3217. }
  3218. /**
  3219. * A word or term in a definition list.
  3220. */
  3221. class MDDefinitionListTermNode extends MDBlockNode {
  3222. public function toHTML(MDState $state): string {
  3223. return $this->simplePairedTagHTML($state, 'dt');
  3224. }
  3225. }
  3226. /**
  3227. * The definition of a word or term in a definition list. Should follow a
  3228. * definition term, or another definition to serve as an alternate.
  3229. */
  3230. class MDDefinitionListDefinitionNode extends MDBlockNode {
  3231. public function toHTML(MDState $state): string {
  3232. return $this->simplePairedTagHTML($state, 'dd');
  3233. }
  3234. }
  3235. /**
  3236. * Block at the bottom of a document listing all the footnotes with their
  3237. * content.
  3238. */
  3239. class MDFootnoteListNode extends MDBlockNode {
  3240. private function footnoteId(MDState $state, string $symbol): ?int {
  3241. $lookup = $state->root()->userInfo['footnoteIds'];
  3242. if (!$lookup) return null;
  3243. return $lookup[$symbol] ?? null;
  3244. }
  3245. public function toHTML(MDState $state): string {
  3246. $footnotes = $state->root()->userInfo['footnotes'];
  3247. $symbolOrder = array_keys($footnotes);
  3248. if (sizeof($footnotes) == 0) return '';
  3249. $footnoteUniques = $state->root()->userInfo['footnoteInstances'];
  3250. $html = '';
  3251. $html .= '<div class="footnotes">';
  3252. $html .= '<ol>';
  3253. foreach ($symbolOrder as $symbolRaw) {
  3254. $symbol = "{$symbolRaw}";
  3255. $content = $footnotes[$symbol];
  3256. if (!$content) continue;
  3257. $footnoteId = $this->footnoteId($state, $symbol);
  3258. $contentHTML = MDNode::arrayToHTML($content, $state);
  3259. $html .= "<li value=\"{$footnoteId}\" id=\"" .
  3260. "{$state->root()->elementIdPrefix}footnote_{$footnoteId}\">{$contentHTML}";
  3261. $uniques = $footnoteUniques[$symbol] ?? null;
  3262. if ($uniques) {
  3263. foreach ($uniques as $unique) {
  3264. $html .= " <a href=\"#{$state->root()->elementIdPrefix}footnoteref_{$unique}\"" .
  3265. " class=\"footnote-backref\">↩︎</a>";
  3266. }
  3267. }
  3268. $html .= "</li>\n";
  3269. }
  3270. $html .= '</ol>';
  3271. $html .= '</div>';
  3272. return $html;
  3273. }
  3274. public function toPlaintext(MDState $state): string {
  3275. $footnotes = $state->userInfo['footnotes'];
  3276. $symbolOrder = array_keys($footnotes);
  3277. if (sizeof($footnotes) == 0) return '';
  3278. $text = '';
  3279. foreach ($symbolOrder as $symbolRaw) {
  3280. $symbol = "{$symbolRaw}";
  3281. $content = $footnotes[$symbol];
  3282. if (!$content) continue;
  3283. $text .= "{$symbol}. " . $this->childPlaintext(state) . "\n";
  3284. }
  3285. return trim($text);
  3286. }
  3287. }
  3288. /**
  3289. * Marker subclass that indicates a node represents inline syntax.
  3290. */
  3291. class MDInlineNode extends MDNode {}
  3292. /**
  3293. * Contains plain text. Special HTML characters are escaped when rendered.
  3294. */
  3295. class MDTextNode extends MDInlineNode {
  3296. public string $text;
  3297. public function __construct(string $text) {
  3298. parent::__construct([]);
  3299. $this->text = $text;
  3300. }
  3301. public function toHTML(MDState $state): string {
  3302. return MDUtils::escapeHTML($this->text);
  3303. }
  3304. public function toPlaintext(MDState $state): string {
  3305. return $this->text;
  3306. }
  3307. public function __toString(): string {
  3308. return "<MDTextNode \"{$this->text}\">";
  3309. }
  3310. }
  3311. /**
  3312. * Contains plain text which is rendered with HTML entities when rendered to
  3313. * be marginally more difficult for web scapers to decipher. Used for
  3314. * semi-sensitive info like email addresses.
  3315. */
  3316. class MDObfuscatedTextNode extends MDTextNode {
  3317. public function toHTML(MDState $state): string {
  3318. return MDUtils::escapeObfuscated($this->text);
  3319. }
  3320. }
  3321. /**
  3322. * Emphasized (italicized) content.
  3323. */
  3324. class MDEmphasisNode extends MDInlineNode {
  3325. public function toHTML(MDState $state): string {
  3326. return $this->simplePairedTagHTML($state, 'em');
  3327. }
  3328. }
  3329. /**
  3330. * Strong (bold) content.
  3331. */
  3332. class MDStrongNode extends MDInlineNode {
  3333. public function toHTML(MDState $state): string {
  3334. return $this->simplePairedTagHTML($state, 'strong');
  3335. }
  3336. }
  3337. /**
  3338. * Content rendered with a line through it.
  3339. */
  3340. class MDStrikethroughNode extends MDInlineNode {
  3341. public function toHTML(MDState $state): string {
  3342. return $this->simplePairedTagHTML($state, 's');
  3343. }
  3344. }
  3345. /**
  3346. * Underlined content.
  3347. */
  3348. class MDUnderlineNode extends MDInlineNode {
  3349. public function toHTML(MDState $state): string {
  3350. return $this->simplePairedTagHTML($state, 'u');
  3351. }
  3352. }
  3353. /**
  3354. * Highlighted content. Usually rendered with a bright colored background.
  3355. */
  3356. class MDHighlightNode extends MDInlineNode {
  3357. public function toHTML(MDState $state): string {
  3358. return $this->simplePairedTagHTML($state, 'mark');
  3359. }
  3360. }
  3361. /**
  3362. * Superscripted content.
  3363. */
  3364. class MDSuperscriptNode extends MDInlineNode {
  3365. public function toHTML(MDState $state): string {
  3366. return $this->simplePairedTagHTML($state, 'sup');
  3367. }
  3368. }
  3369. /**
  3370. * Subscripted content.
  3371. */
  3372. class MDSubscriptNode extends MDInlineNode {
  3373. public function toHTML(MDState $state): string {
  3374. return $this->simplePairedTagHTML($state, 'sub');
  3375. }
  3376. }
  3377. /**
  3378. * Inline plaintext indicating computer code.
  3379. */
  3380. class MDCodeNode extends MDInlineNode {
  3381. public string $text;
  3382. public function __construct(string $text) {
  3383. parent::__construct([]);
  3384. $this->text = $text;
  3385. }
  3386. public function toHTML(MDState $state): string {
  3387. return "<code" . $this->htmlAttributes() . ">" . MDUtils::escapeHTML($this->text) . "</code>";
  3388. }
  3389. }
  3390. /**
  3391. * A footnote symbol in a document. Denoted as a superscripted number that can
  3392. * be clicked to go to its content at the bottom of the document.
  3393. */
  3394. class MDFootnoteNode extends MDInlineNode {
  3395. /**
  3396. * Symbol the author used to match up the footnote to its content definition.
  3397. */
  3398. public string $symbol;
  3399. /**
  3400. * The superscript symbol rendered in HTML. May be the same or different
  3401. * than `$symbol`.
  3402. */
  3403. public ?string $displaySymbol = null;
  3404. /**
  3405. * Unique ID for the footnote definition.
  3406. */
  3407. public ?int $footnoteId = null;
  3408. /**
  3409. * Unique number for backlinking to a footnote occurrence. Populated by
  3410. * `MDFootnoteReader->postProcess()`.
  3411. */
  3412. public ?int $occurrenceId = null;
  3413. public function __construct(string $symbol, ?string $title=null) {
  3414. parent::__construct([]);
  3415. $this->symbol = $symbol;
  3416. if ($title) $this->attributes['title'] = $title;
  3417. }
  3418. public function toHTML(MDState $state): string {
  3419. if ($this->footnoteId !== null) {
  3420. return "<sup class=\"footnote\" id=\"{$state->root()->elementIdPrefix}footnoteref_{$this->occurrenceId}\"" .
  3421. $this->htmlAttributes() . ">" .
  3422. "<a href=\"#{$state->root()->elementIdPrefix}footnote_{$this->footnoteId}\">" .
  3423. MDUtils::escapeHTML($this->displaySymbol ?? $this->symbol) . "</a></sup>";
  3424. }
  3425. return "<!--FNREF:{{$this->symbol}}-->";
  3426. }
  3427. }
  3428. /**
  3429. * A clickable hypertext link.
  3430. */
  3431. class MDLinkNode extends MDInlineNode {
  3432. public string $href;
  3433. /**
  3434. * @param string $href
  3435. * @param MDNode|MDNode[] $children
  3436. * @param ?string $title
  3437. */
  3438. public function __construct(string $href, MDNode|array $children, ?string $title=null) {
  3439. parent::__construct($children);
  3440. $this->href = $href;
  3441. if ($title !== null) $this->attributes['title'] = $title;
  3442. }
  3443. public function toHTML(MDState $state): string {
  3444. if (str_starts_with($this->href, 'mailto:')) {
  3445. $escapedLink = MDUtils::escapeObfuscated($this->href);
  3446. } else {
  3447. $escapedLink = MDUtils::escapeHTML($this->href);
  3448. }
  3449. return "<a href=\"{$escapedLink}\"" . $this->htmlAttributes() . ">" . $this->childHTML($state) . "</a>";
  3450. }
  3451. }
  3452. /**
  3453. * A clickable hypertext link where the URL is defined elsewhere by reference.
  3454. */
  3455. class MDReferencedLinkNode extends MDLinkNode {
  3456. public string $reference;
  3457. /**
  3458. * @param string $reference
  3459. * @param MDNode|MDNode[] $children
  3460. */
  3461. public function __construct(string $reference, MDNode|array $children) {
  3462. parent::__construct('', $children);
  3463. $this->reference = $reference;
  3464. }
  3465. public function toHTML(MDState $state): string {
  3466. if ($this->href === '') {
  3467. $url = $state->urlForReference($this->reference);
  3468. if ($url) $this->href = $url;
  3469. $title = $state->urlTitleForReference($this->reference);
  3470. if ($title) $this->attributes['title'] = $title;
  3471. }
  3472. return parent::toHTML($state);
  3473. }
  3474. }
  3475. /**
  3476. * An inline image.
  3477. */
  3478. class MDImageNode extends MDInlineNode {
  3479. public string $src;
  3480. public ?string $alt;
  3481. public function __construct(string $src, ?string $alt) {
  3482. parent::__construct([]);
  3483. $this->src = $src;
  3484. $this->alt = $alt;
  3485. }
  3486. public function toHTML(MDState $state): string {
  3487. $html = "<img src=\"" . MDUtils::escapeHTML($this->src) . "\"";
  3488. if ($this->alt) $html .= " alt=\"" . MDUtils::escapeHTML($this->alt) . "\"";
  3489. $html .= $this->htmlAttributes() . ">";
  3490. return $html;
  3491. }
  3492. }
  3493. /**
  3494. * An inline image where the URL is defined elsewhere by reference.
  3495. */
  3496. class MDReferencedImageNode extends MDImageNode {
  3497. public string $reference;
  3498. public function __construct(string $reference, ?string $alt=null) {
  3499. parent::__construct('', $alt, []);
  3500. $this->reference = $reference;
  3501. }
  3502. public function toHTML(MDState $state): string {
  3503. if ($this->src === '') {
  3504. $url = $state->urlForReference($this->reference);
  3505. if ($url !== null) $this->src = $url;
  3506. $title = $state->urlTitleForReference($this->reference);
  3507. if ($title !== null) $this->attributes['title'] = $title;
  3508. }
  3509. return parent::toHTML($state);
  3510. }
  3511. }
  3512. /**
  3513. * An abbreviation that can be hovered over to see its full expansion.
  3514. */
  3515. class MDAbbreviationNode extends MDInlineNode {
  3516. public string $abbreviation;
  3517. /**
  3518. * @param string $abbreviation
  3519. * @param string $definition
  3520. */
  3521. public function __construct(string $abbreviation, string $definition) {
  3522. parent::__construct([]);
  3523. $this->abbreviation = $abbreviation;
  3524. $this->attributes['title'] = $definition;
  3525. }
  3526. public function toHTML(MDState $state): string {
  3527. return "<abbr" . $this->htmlAttributes() . ">" . MDUtils::escapeHTML($this->abbreviation) . "</abbr>";
  3528. }
  3529. }
  3530. /**
  3531. * A line break that is preserved when rendered to HTML.
  3532. */
  3533. class MDLineBreakNode extends MDInlineNode {
  3534. public function toHTML(MDState $state): string {
  3535. return '<br>';
  3536. }
  3537. public function toPlaintext(MDState $state): string {
  3538. return "\n";
  3539. }
  3540. }
  3541. /**
  3542. * A verbatim HTML tag. May be altered to strip out disallowed attributes or
  3543. * CSS values.
  3544. */
  3545. class MDHTMLTagNode extends MDInlineNode {
  3546. public MDHTMLTag $tag;
  3547. public function __construct(MDHTMLTag $tag) {
  3548. parent::__construct([]);
  3549. $this->tag = $tag;
  3550. }
  3551. public function toHTML(MDState $state): string {
  3552. return "{$this->tag}";
  3553. }
  3554. }
  3555. // -- Main class ------------------------------------------------------------
  3556. /**
  3557. * Markdown parser.
  3558. */
  3559. class Markdown {
  3560. /**
  3561. * Set of standard readers to handle common syntax.
  3562. * @return MDReader[]
  3563. */
  3564. public static function standardReaders(): array {
  3565. if (self::$sharedStandardReaders === null) {
  3566. self::$sharedStandardReaders = [
  3567. new MDUnderlinedHeadingReader(),
  3568. new MDHashHeadingReader(),
  3569. new MDBlockQuoteReader(),
  3570. new MDHorizontalRuleReader(),
  3571. new MDUnorderedListReader(),
  3572. new MDOrderedListReader(),
  3573. new MDFencedCodeBlockReader(),
  3574. new MDIndentedCodeBlockReader(),
  3575. new MDParagraphReader(),
  3576. new MDStrongReader(),
  3577. new MDEmphasisReader(),
  3578. new MDCodeSpanReader(),
  3579. new MDImageReader(),
  3580. new MDLinkReader(),
  3581. new MDHTMLTagReader(),
  3582. ];
  3583. }
  3584. return self::$sharedStandardReaders;
  3585. }
  3586. private static ?array $sharedStandardReaders = null;
  3587. /**
  3588. * All supported readers except `MDLineBreakReader`.
  3589. * @return MDReader[]
  3590. */
  3591. public static function allReaders(): array {
  3592. if (self::$sharedAllReaders === null) {
  3593. $sharedAllReaders = array_merge(self::standardReaders(), [
  3594. new MDSubtextReader(),
  3595. new MDTableReader(),
  3596. new MDDefinitionListReader(),
  3597. new MDFootnoteReader(),
  3598. new MDAbbreviationReader(),
  3599. new MDUnderlineReader(),
  3600. new MDSubscriptReader(),
  3601. new MDStrikethroughReader(),
  3602. new MDHighlightReader(),
  3603. new MDSuperscriptReader(),
  3604. new MDReferencedImageReader(),
  3605. new MDReferencedLinkReader(),
  3606. new MDModifierReader(),
  3607. ]);
  3608. }
  3609. return $sharedAllReaders;
  3610. }
  3611. private static ?array $sharedAllReaders = null;
  3612. /**
  3613. * Shared instance of a parser with standard syntax.
  3614. */
  3615. public static function standardParser(): Markdown {
  3616. if (self::$sharedStandardMarkdown === null) {
  3617. self::$sharedStandardMarkdown = new Markdown(self::standardReaders());
  3618. }
  3619. return self::$sharedStandardMarkdown;
  3620. }
  3621. private static ?Markdown $sharedStandardMarkdown = null;
  3622. /**
  3623. * Shared instance of a parser with all supported syntax.
  3624. */
  3625. public static function completeParser(): Markdown {
  3626. if (self::$sharedCompleteParser === null) {
  3627. self::$sharedCompleteParser = new Markdown(self::allReaders());
  3628. }
  3629. return self::$sharedCompleteParser;
  3630. }
  3631. public static ?Markdown $sharedCompleteParser = null;
  3632. /**
  3633. * Filter for what non-markdown HTML is permitted. HTML generated as a
  3634. * result of markdown is unaffected.
  3635. */
  3636. public MDHTMLFilter $tagFilter;
  3637. /**
  3638. * If an exception occurs, attempts to narrow down the portion of the
  3639. * markdown that triggered the error and outputs it to the console. For
  3640. * debugging. Investigation mode can be slow.
  3641. */
  3642. public bool $investigateErrors = false;
  3643. /** @var MDReader[] */
  3644. private array $readers;
  3645. /** @var MDReader[] */
  3646. private array $readersByBlockPriority;
  3647. /** @var MDReader[] */
  3648. private array $readersByTokenPriority;
  3649. private array $readersBySubstitutePriority;
  3650. /**
  3651. * Creates a Markdown parser with the given syntax readers.
  3652. *
  3653. * @param MDReader[] $readers
  3654. */
  3655. public function __construct(?array $readers=null) {
  3656. $this->readers = $readers ?? self::allReaders();
  3657. $this->readersByBlockPriority = MDReader::sortReaderForBlocks($this->readers);
  3658. $this->readersByTokenPriority = MDReader::sortReadersForTokenizing($this->readers);
  3659. $this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($this->readers);
  3660. $this->tagFilter = new MDHTMLFilter();
  3661. }
  3662. /**
  3663. * Converts a markdown string to an HTML string.
  3664. *
  3665. * @param string $markdown
  3666. * @param string $elementIdPrefix Optional prefix for generated element
  3667. * `id`s and links to them. For differentiating multiple markdown docs in
  3668. * the same HTML page.
  3669. * @return string HTML
  3670. */
  3671. public function toHTML(string $markdown, string $elementIdPrefix='') {
  3672. $lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown);
  3673. try {
  3674. return $this->parse($lines, $elementIdPrefix);
  3675. } catch (Error $e) {
  3676. if ($this->investigateErrors) {
  3677. $this->investigateException($lines, $elementIdPrefix);
  3678. }
  3679. throw $e;
  3680. }
  3681. }
  3682. /**
  3683. * @param string[] $lines
  3684. * @param string $elementIdPrefix
  3685. */
  3686. private function parse(array $lines, string $elementIdPrefix) {
  3687. $state = new MDState($lines);
  3688. $state->readersByBlockPriority = $this->readersByBlockPriority;
  3689. $state->readersByTokenPriority = $this->readersByTokenPriority;
  3690. $state->readersBySubstitutePriority = $this->readersBySubstitutePriority;
  3691. $state->tagFilter = $this->tagFilter;
  3692. $state->elementIdPrefix = $elementIdPrefix;
  3693. foreach ($this->readers as $reader) {
  3694. $reader->preProcess($state);
  3695. }
  3696. $nodes = $state->readBlocks();
  3697. foreach ($this->readers as $reader) {
  3698. $reader->postProcess($state, $nodes);
  3699. }
  3700. return MDNode::arrayToHTML($nodes, $state);
  3701. }
  3702. /**
  3703. * Keeps removing first and last lines of markdown to locate the source of
  3704. * an exception and prints the minimal snippet.
  3705. *
  3706. * @param string[] $lines
  3707. * @param string $elementIdPrefix
  3708. */
  3709. private function investigateException(array $lines, string $elementIdPrefix) {
  3710. print("Investigating error...\n");
  3711. $startIndex = 0;
  3712. $endIndex = sizeof($lines);
  3713. // Keep stripping away first line until an exception stops being thrown
  3714. for ($i = 0; $i < sizeof($lines); $i++) {
  3715. try {
  3716. $this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix);
  3717. break;
  3718. } catch (Error $e0) {
  3719. $startIndex = $i;
  3720. }
  3721. }
  3722. // Keep stripping away last line until an exception stops being thrown
  3723. for ($i = sizeof($lines); $i > $startIndex; $i--) {
  3724. try {
  3725. $this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix);
  3726. break;
  3727. } catch (Error $e0) {
  3728. $endIndex = $i;
  3729. }
  3730. }
  3731. $problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex));
  3732. print("This portion of markdown caused an unexpected exception:\n{$problematicMarkdown}\n");
  3733. }
  3734. }
  3735. ?>