PHP and Javascript implementations of a simple markdown parser
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

markdown.php 119KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Static utilities.
  5. */
  6. class MDUtils {
  7. // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
  8. public static $baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
  9. // Modified from https://emailregex.com/ to remove capture groups.
  10. public static $baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
  11. /**
  12. * Encodes characters as HTML numeric entities to make it marginally more
  13. * difficult for web scrapers to grab sensitive info. If `text` starts with
  14. * `mailto:` only the email address following it will be obfuscated.
  15. */
  16. public static function escapeObfuscated(string $text): string {
  17. if (str_starts_with($text, 'mailto:')) {
  18. return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
  19. }
  20. $html = '';
  21. $l = mb_strlen($text);
  22. for ($p = 0; $p < $l; $p++) {
  23. $cp = mb_ord(mb_substr($text, $p, 1));
  24. $html .= "&#{{$cp}}";
  25. }
  26. return $html;
  27. }
  28. /**
  29. * Removes illegal characters from an HTML attribute name.
  30. */
  31. public static function scrubAttributeName(string $name): string {
  32. return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
  33. }
  34. /**
  35. * Strips one or more leading indents from a line or lines of markdown. An
  36. * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
  37. * spaces) are treated like one indent level.
  38. *
  39. * @param string|string[] $line
  40. * @param int $levels
  41. * @return string|string[]
  42. */
  43. public static function stripIndent(string|array &$line, int $levels=1): string|array {
  44. $regex = "^(?: {1,4}|\\t){{$levels}}";
  45. return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
  46. }
  47. /**
  48. * Counts the number of indent levels in a line of text. Partial indents
  49. * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly`
  50. * is `true`.
  51. */
  52. public static function countIndents(string &$line, bool $fullIndentsOnly=false): int {
  53. // normalize indents to tabs
  54. $t = mb_ereg_replace($fullIndentsOnly ? "(?: {4}|\\t)" : "(?: {1,4}|\\t)", "\t", $line);
  55. // remove content after indent
  56. $t = mb_ereg_replace("^(\\t*)(.*?)$", "\\1", $t);
  57. // count tabs
  58. return mb_strlen($t);
  59. }
  60. /**
  61. * Returns a copy of an array without any whitespace-only lines at the end.
  62. *
  63. * @param string[] $lines
  64. * @return string[]
  65. */
  66. public static function withoutTrailingBlankLines(array $lines): array {
  67. $stripped = $lines;
  68. while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
  69. array_pop($stripped);
  70. }
  71. return $stripped;
  72. }
  73. /**
  74. * Tests if an array of lines contains at least one blank. A blank line
  75. * can contain whitespace.
  76. *
  77. * @param string[] $lines
  78. */
  79. public static function containsBlankLine(array $lines): bool {
  80. foreach ($lines as $line) {
  81. if (mb_strlen(trim($line)) == 0) return true;
  82. }
  83. return false;
  84. }
  85. public static function equalAssocArrays(array &$a, array &$b) {
  86. return empty(array_diff_assoc($a, $b));
  87. }
  88. }
  89. /**
  90. * Token type enum for `MDToken`.
  91. */
  92. enum MDTokenType {
  93. case Text;
  94. /**
  95. * Only used for the leading and trailing whitespace around a run of text,
  96. * not every single whitespace character.
  97. */
  98. case Whitespace;
  99. case Underscore;
  100. case Asterisk;
  101. case Slash;
  102. case Tilde;
  103. case Bang;
  104. case Backtick;
  105. case Equal;
  106. case Caret;
  107. case Label; // content=label
  108. case URL; // content=URL, extra=title
  109. case Email; // content=email address, extra=title
  110. case SimpleLink; // content=URL
  111. case SimpleEmail; // content=email address
  112. case Footnote; // content=symbol
  113. case Modifier; // modifier=MDTagModifier
  114. case HTMLTag; // tag=MDHTMLTag
  115. /** Wildcard for `MDToken::findFirstTokens` */
  116. case META_AnyNonWhitespace;
  117. /** Wildcard for `MDToken::findFirstTokens` */
  118. case META_OptionalWhitespace;
  119. }
  120. /**
  121. * Search results from `MDToken.findFirstTokens`.
  122. */
  123. class MDTokenMatch {
  124. /** @var MDToken{} */
  125. public array $tokens;
  126. public int $index;
  127. /**
  128. * @param MDToken[] $tokens
  129. * @param int $index
  130. */
  131. public function __construct(array $tokens, int $index) {
  132. $this->tokens = $tokens;
  133. $this->index = $index;
  134. }
  135. }
  136. /**
  137. * Search results from `MDToken.findPairedTokens`.
  138. */
  139. class MDPairedTokenMatch {
  140. /** @var MDToken[] */
  141. public array $startTokens;
  142. /** @var MDToken[] */
  143. public array $contentTokens;
  144. /** @var MDToken[] */
  145. public array $endTokens;
  146. public int $startIndex;
  147. public int $contentIndex;
  148. public int $endIndex;
  149. public int $totalLength;
  150. public function __construct(array $startTokens, array $contentTokens,
  151. array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
  152. int $totalLength) {
  153. $this->startTokens = $startTokens;
  154. $this->contentTokens = $contentTokens;
  155. $this->endTokens = $endTokens;
  156. $this->startIndex = $startIndex;
  157. $this->contentIndex = $contentIndex;
  158. $this->endIndex = $endIndex;
  159. $this->totalLength = $totalLength;
  160. }
  161. }
  162. /**
  163. * One lexical unit in inline markdown syntax parsing.
  164. */
  165. class MDToken {
  166. /**
  167. * The original verbatim token string. Required as a plaintext fallback if
  168. * the token remains unresolved.
  169. */
  170. public string $original;
  171. public MDTokenType $type;
  172. public ?string $content = null;
  173. public ?string $extra = null;
  174. public ?MDHTMLTag $tag = null;
  175. public ?MDTagModifier $modifier = null;
  176. /**
  177. * Creates a token.
  178. *
  179. * @param string $original verbatim token string
  180. * @param MDTokenType $type token type
  181. * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
  182. * the token
  183. * @param string|null $extra additional content
  184. */
  185. public function __construct(string $original, MDTokenType $type,
  186. string|MDTagModifier|MDHTMLTag|null $content=null,
  187. ?string $extra=null) {
  188. $this->original = $original;
  189. $this->type = $type;
  190. if ($content instanceof MDTagModifier) {
  191. $this->modifier = $content;
  192. } elseif ($content instanceof MDHTMLTag) {
  193. $this->tag = $content;
  194. } else {
  195. $this->content = $content;
  196. }
  197. $this->extra = $extra;
  198. }
  199. public function __toString(): string {
  200. $classname = get_class($this);
  201. return "({$classname} type={$this->type} content={$this->content})";
  202. }
  203. /**
  204. * Attempts to parse a label token from the beginning of `line`. A label is
  205. * of the form `[content]`. If found, returns an array:
  206. * - `0`: the entire label including brackets
  207. * - `1`: the content of the label
  208. *
  209. * @param string $line
  210. * @return ?string[] match groups or null if not found
  211. */
  212. public static function tokenizeLabel(string $line): ?array {
  213. if (!str_starts_with($line, '[')) return null;
  214. $parenCount = 0;
  215. $bracketCount = 0;
  216. $l = mb_strlen($line);
  217. for ($p = 1; $p < $l; $p++) {
  218. $ch = mb_substr($line, $p, 1);
  219. if ($ch == '\\') {
  220. $p++;
  221. } elseif ($ch == '(') {
  222. $parenCount++;
  223. } elseif ($ch == ')') {
  224. $parenCount--;
  225. if ($parenCount < 0) return null;
  226. } elseif ($ch == '[') {
  227. $bracketCount++;
  228. } elseif ($ch == ']') {
  229. if ($bracketCount > 0) {
  230. $bracketCount--;
  231. } else {
  232. return [ mb_substr($line, 0, $p + 1), mb_substr($line, 1, $p - 1) ];
  233. }
  234. }
  235. }
  236. return null;
  237. }
  238. private static $urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
  239. private static $urlRegex = '^\\((\\S+?)\\)'; // 1=URL
  240. /**
  241. * Attempts to parse a URL token from the beginning of `line`. A URL token
  242. * is of the form `(url)` or `(url "title")`. If found, returns an array:
  243. * - `0`: the entire URL token including parentheses
  244. * - `1`: the URL
  245. * - `2`: the optional title, or `null`
  246. *
  247. * @param string $line
  248. * @return ?array token tuple
  249. */
  250. public static function tokenizeURL(string $line): ?array {
  251. $groups = [];
  252. if (mb_eregi(self::$urlWithTitleRegex, $line, $groups)) {
  253. if (self::tokenizeEmail($line)) return null; // make sure it's not better described as an email address
  254. return $groups;
  255. }
  256. if (mb_eregi(self::$urlRegex, $line, $groups)) {
  257. if (self::tokenizeEmail($line)) return null;
  258. return [ $groups[0], $groups[1], null ];
  259. }
  260. return null;
  261. }
  262. /**
  263. * Attempts to parse an email address from the beginning of `line`. An
  264. * email address is of the form `(user@example.com)` or
  265. * `(user@example.com "link title")`. If found, returns an array:
  266. * - `0`: the entire token including parentheses
  267. * - `1`: the email address
  268. * - `2`: the optional link title, or `null`
  269. *
  270. * @param string $line
  271. * @return ?string[] token tuple
  272. */
  273. public static function tokenizeEmail(string $line): ?array {
  274. $groups;
  275. if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
  276. $line, $groups)) {
  277. return $groups;
  278. }
  279. if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s*\\)", $line, $groups)) {
  280. return [ $groups[0], $groups[1], null ];
  281. }
  282. return null;
  283. }
  284. /**
  285. * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
  286. * If found, returns a `MDTokenMatch`, otherwise `null`.
  287. *
  288. * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
  289. * are special supported token types. Note that `META_OptionalWhitespace`
  290. * may give a result with a variable number of tokens.
  291. *
  292. * @param (MDToken|MDNode)[] $tokensToSearch - mixed array of `MDToken` and
  293. * `MDNode` elements
  294. * @param MDTokenType[] $pattern - contiguous run of token types to find
  295. * @param int $startIndex - token index to begin searching (defaults to 0)
  296. * @return ?MDTokenMatch match object, or `null` if not found
  297. */
  298. public static function findFirstTokens(array $tokensToSearch, array $pattern, int $startIndex=0): ?MDTokenMatch {
  299. $matched = [];
  300. for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
  301. $matchedAll = true;
  302. $matched = [];
  303. $patternOffset = 0;
  304. for ($p = 0; $p < mb_strlen($pattern); $p++) {
  305. $t0 = $t + $p + $patternOffset;
  306. if ($t0 >= sizeof($tokensToSearch)) return null;
  307. $token = $tokensToSearch[$t0];
  308. $elem = $pattern[$p];
  309. if ($elem == MDTokenType::META_OptionalWhitespace) {
  310. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  311. array_push($matched, $token);
  312. } else {
  313. $patternOffset--;
  314. }
  315. } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
  316. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  317. $matchedAll = false;
  318. break;
  319. }
  320. array_push($matched, $token);
  321. } else {
  322. if (!($token instanceof MDToken) || $token->type != $elem) {
  323. $matchedAll = false;
  324. break;
  325. }
  326. array_push($matched, $token);
  327. }
  328. }
  329. if ($matchedAll) {
  330. return new MDTokenMatch($matched, $t);
  331. }
  332. }
  333. return null;
  334. }
  335. /**
  336. * Searches an array of MDToken for a given starting pattern and ending
  337. * pattern and returns match info about both and the tokens in between.
  338. *
  339. * If `contentValidator` is specified, it will be called with the content
  340. * tokens of a potential match. If the validator returns `true`, the result
  341. * will be accepted and returned by this method. If the validator returns
  342. * `false`, this method will keep looking for another matching pair. If no
  343. * validator is given the first match will be returned regardless of content.
  344. *
  345. * If a match is found, a `MDPairedTokenMatch` is returned with details
  346. * of the opening tokens, closing tokens, and content tokens between. Otherwise
  347. * `null` is returned.
  348. *
  349. * @param MDToken[] $tokensToSearch - array of `MDToken` to search in
  350. * @param MDTokenType[] $startPattern - array of `MDTokenType` to find first
  351. * @param MDTokenType[] $endPattern - array of `MDTokenType` to find positioned after `startPattern`
  352. * @param ?callable $contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching
  353. * @param number $startIndex - token index where searching should begin
  354. * @return ?MDPairedTokenMatch match, or `null`
  355. */
  356. public static function findPairedTokens(array $tokensToSearch,
  357. array $startPattern, array $endPattern, ?callable $contentValidator=null,
  358. int $startIndex=0): ?MDPairedTokenMatch {
  359. for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
  360. $startMatch = findFirstTokens($tokensToSearch, $startPattern, $s);
  361. if ($startMatch === null) return null;
  362. $endStart = $startMatch->index + sizeof($startMatch->tokens);
  363. while ($endStart < sizeof($tokensToSearch)) {
  364. $endMatch = findFirstTokens($tokensToSearch, $endPattern, $endStart);
  365. if ($endMatch === null) break;
  366. $contentStart = $startMatch->index + sizeof($startMatch->tokens);
  367. $contentLength = $endMatch->index - $contentStart;
  368. $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
  369. if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
  370. return new MDPairedTokenMatch($startMatch->tokens,
  371. $contents,
  372. $endMatch->tokens,
  373. $startMatch->index,
  374. $startMatch->index + sizeof($startMatch->tokens),
  375. $endMatch->index,
  376. $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
  377. } else {
  378. // Contents rejected. Try next end match.
  379. $endStart = $endMatch->index + 1;
  380. }
  381. }
  382. // No end matches. Increment start match.
  383. $s = $startMatch->index;
  384. }
  385. return null;
  386. }
  387. public function equals($other) {
  388. if (!($other instanceof MDToken)) return false;
  389. if ($other->original !== $this->original) return false;
  390. if ($other->type != $this->type) return false;
  391. if ($other->content !== $this->content) return false;
  392. if ($other->extra !== $this->extra) return false;
  393. if ($other->tag !== $this->tag) return false;
  394. if ($other->modifier != $this->modifier) return false;
  395. return true;
  396. }
  397. }
  398. /**
  399. * Parsing and rendering state. Passed around throughout the parsing process.
  400. *
  401. * States are hierarchical. A sub-state can be created by calling `.copy()` with
  402. * a new array of lines. The sub-state points back to its parent state. This
  403. * is done to parse inner content of a syntax as its own standalone document.
  404. *
  405. * If a custom `MDReader` implementation wants to store data in this object,
  406. * always do so on `state.root` to ensure it's stored on the original state,
  407. * not a child state. Otherwise data may be lost when the sub-state is discarded.
  408. */
  409. class MDState {
  410. /**
  411. * Ascends the parent chain to the root `MDState` instance. This should be
  412. * used when referencing most stored fields except `lines` and `p`.
  413. */
  414. public function root(): MDState {
  415. return $this->parent ? $this->parent->root() : $this;
  416. }
  417. /**
  418. * Lines of the markdown document. The current line index is pointed to by `p`.
  419. *
  420. * @var string[]
  421. */
  422. public array $lines;
  423. /**
  424. * The current line in `lines`.
  425. */
  426. public function currentLine(): ?string {
  427. return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
  428. }
  429. /**
  430. * Current line pointer into array `lines`.
  431. */
  432. public int $p = 0;
  433. /**
  434. * General storage for anything readers need to track during the parsing
  435. * process.
  436. */
  437. public array $userInfo = [];
  438. private ?MDState $parent = null;
  439. /**
  440. * Array of `MDReader`s sorted by block reading priority.
  441. * @var MDReader[]
  442. */
  443. public array $readersByBlockPriority = [];
  444. /**
  445. * Array of `MDReader`s sorted by tokenization priority.
  446. * @var MDReader[]
  447. */
  448. public array $readersByTokenPriority = [];
  449. /**
  450. * Array of tuples of `pass:number` and `MDReader` sorted by substitution
  451. * priority.
  452. * @var array[]
  453. */
  454. public array $readersBySubstitutePriority = [];
  455. /**
  456. * Prefix to include in any generated `id` attributes on HTML elements.
  457. * Useful for keeping elements unique in multiple parsed documents in the
  458. * same HTML page.
  459. */
  460. public string $elementIdPrefix = '';
  461. /**
  462. * Filter for removing unapproved HTML tags, attributes, and values.
  463. */
  464. public MDHTMLFilter $tagFilter;
  465. /**
  466. * @param string[] $lines - lines of markdown text
  467. */
  468. public function __construct(array $lines) {
  469. $this->lines = $lines;
  470. }
  471. /**
  472. * Creates a copy of this state with new lines. Useful for parsing nested
  473. * content.
  474. *
  475. * @param string[] $lines
  476. * @return MDState copied sub-state
  477. */
  478. public function copy(array $lines) {
  479. $cp = new MDState($lines);
  480. $cp->parent = $this;
  481. return $cp;
  482. }
  483. /**
  484. * Tests if there are at least `minCount` lines available to read. If `p`
  485. * is not provided it will be relative to `this.p`.
  486. */
  487. public function hasLines(int $minCount, ?int $p=null): bool {
  488. $relativeTo = ($p === null) ? $this->p : $p;
  489. return $relativeTo + $minCount <= sizeof($this->lines);
  490. }
  491. /**
  492. * Reads and returns an array of blocks from the current line pointer.
  493. *
  494. * @return MDBlockNode[] parsed blocks
  495. */
  496. public function readBlocks(): array {
  497. $blocks = [];
  498. while ($this->hasLines(1)) {
  499. $block = $this->readNextBlock();
  500. if ($block) {
  501. array_push($blocks, $block);
  502. } else {
  503. break;
  504. }
  505. }
  506. return $blocks;
  507. }
  508. /**
  509. * Creates a simple `MDBlockNode` if no other registered blocks match.
  510. */
  511. private function readFallbackBlock(): ?MDBlockNode {
  512. if ($this->p >= sizeof($this->lines)) return null;
  513. $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
  514. if (sizeof($lines) == 0) return null;
  515. $this->p = sizeof($this->lines);
  516. return new MDBlockNode($this->inlineMarkdownToNode(implode("\n", $lines)));
  517. }
  518. /**
  519. * Attempts to read one block from the current line pointer. The pointer
  520. * will be positioned just after the end of the block.
  521. */
  522. private function readNextBlock(): ?MDBlockNode {
  523. while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
  524. $this->p++;
  525. }
  526. if (!$this->hasLines(1)) return null;
  527. foreach ($this->root()->readersByBlockPriority as $reader) {
  528. $startP = $this->p;
  529. $block = $reader->readBlock($this);
  530. if ($block) {
  531. if ($this->p == $startP) {
  532. $readerClassName = get_class($reader);
  533. $blockClassName = get_class($block);
  534. throw new Error("{$readerClassName} returned an " .
  535. "{$blockClassName} without incrementing MDState.p. " .
  536. "This could lead to an infinite loop.");
  537. }
  538. return $block;
  539. }
  540. }
  541. $fallback = $this->readFallbackBlock();
  542. return $fallback;
  543. }
  544. /**
  545. * @param string $line
  546. * @return MDToken[]
  547. */
  548. private function inlineMarkdownToTokens(string $line): array {
  549. if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
  550. $tokens = [];
  551. $text = '';
  552. $expectLiteral = false;
  553. /**
  554. * Flushes accumulated content in `text` to `tokens`.
  555. */
  556. $endText = function() use (&$tokens, &$text) {
  557. if (mb_strlen($text) == 0) return;
  558. $textGroups = [];
  559. if (mb_eregi('^(\s+)(.*?)$', $text, $textGroups)) {
  560. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
  561. $text = $textGroups[2];
  562. }
  563. if (mb_eregi('^(.*?)(\s+)$', $text, $textGroups)) {
  564. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Text, $textGroups[1]));
  565. array_push($tokens, new MDToken($textGroups[2], MDTokenType::Whitespace, $textGroups[2]));
  566. } else {
  567. array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
  568. }
  569. $text = '';
  570. };
  571. for ($p = 0; $p < mb_strlen($line); $p++) {
  572. $ch = mb_substr($line, $p, 1);
  573. $remainder = mb_substr($line, $p);
  574. if ($expectLiteral) {
  575. $text .= $ch;
  576. $expectLiteral = false;
  577. continue;
  578. }
  579. if ($ch == '\\') {
  580. $expectLiteral = true;
  581. continue;
  582. }
  583. $found = false;
  584. foreach ($this->root()->readersByTokenPriority as $reader) {
  585. $token = $reader->readToken($this, $remainder);
  586. if ($token === null) continue;
  587. $endText();
  588. array_push($tokens, $token);
  589. if ($token->original == null || mb_strlen($token->original) == 0) {
  590. $readerClassName = get_class($reader);
  591. throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
  592. }
  593. $p += mb_strlen($token->original) - 1;
  594. $found = true;
  595. break;
  596. }
  597. if (!$found) {
  598. $text .= $ch;
  599. }
  600. }
  601. $endText();
  602. return $tokens;
  603. }
  604. /**
  605. * Converts a line of markdown to an `MDInlineNode`.
  606. *
  607. * @param string|string[] $line
  608. * @return MDInlineNode
  609. */
  610. public function inlineMarkdownToNode(string|array $line): MDInlineNode {
  611. $nodes = $this->inlineMarkdownToNodes($line);
  612. return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
  613. }
  614. /**
  615. * Converts a line of markdown to an array of `MDInlineNode`s.
  616. *
  617. * @param string|string[] $line
  618. * @return MDInlineNode[]
  619. */
  620. public function inlineMarkdownToNodes(string|array $line): array {
  621. $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
  622. return $this->tokensToNodes($tokens);
  623. }
  624. /**
  625. * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
  626. * of only `MDInlineNode` via repeated `MDReader` substition.
  627. *
  628. * @param (MDToken|MDInlineNode)[] $tokens
  629. * @return MDInlineNode[]
  630. */
  631. public function tokensToNodes(array $tokens): array {
  632. $nodes = $tokens;
  633. // Perform repeated substitutions, converting sequences of tokens into
  634. // nodes, until no more substitutions can be made.
  635. $anyChanges = false;
  636. do {
  637. $anyChanges = false;
  638. foreach ($this->root()->readersBySubstitutePriority as $readerTuple) {
  639. /** @var int */
  640. $pass = $readerTuple[0];
  641. /** @var MDReader */
  642. $reader = $readerTuple[1];
  643. $changed = $reader->substituteTokens($this, $pass, $nodes);
  644. if (!$changed) continue;
  645. $anyChanges = true;
  646. break;
  647. }
  648. } while ($anyChanges);
  649. // Convert any remaining tokens to text nodes. Also apply any inline
  650. // CSS modifiers.
  651. $lastNode = null;
  652. $me = $this;
  653. $nodes = array_map(function($node) use ($lastNode, $me) {
  654. if ($node instanceof MDToken) {
  655. /** @var MDToken */
  656. $token = $node;
  657. if ($token->type == MDTokenType::Modifier && $lastNode) {
  658. $me->root()->tagFilter->scrubModifier($token->modifier);
  659. $token->modifier->applyTo($lastNode);
  660. $lastNode = null;
  661. return new MDTextNode('');
  662. }
  663. $lastNode = null;
  664. return new MDTextNode($token->original);
  665. } elseif ($node instanceof MDNode) {
  666. $lastNode = ($node instanceof MDTextNode) ? null : $node;
  667. return $node;
  668. } else {
  669. $nodeClassName = get_class($node);
  670. throw new Error("Unexpected node type {$nodeClassName}");
  671. }
  672. }, $nodes);
  673. return $nodes;
  674. }
  675. /**
  676. * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
  677. * and `MDReferencedImageReader`.
  678. * @var array symbol -> URL
  679. */
  680. private array $referenceToURL = [];
  681. /**
  682. * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
  683. * and `MDReferencedImageReader`.
  684. * @var array symbol -> title string
  685. */
  686. private array $referenceToTitle = [];
  687. /**
  688. * Defines a URL by reference symbol.
  689. */
  690. public function defineURL(string $reference, string $url, ?string $title=null) {
  691. $this->root()->referenceToURL[mb_strtolower($reference)] = $url;
  692. if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
  693. }
  694. /**
  695. * Returns the URL associated with a reference symbol.
  696. */
  697. public function urlForReference(string $reference): ?string {
  698. return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
  699. }
  700. /**
  701. * Returns the link title associated with a reference symbol.
  702. */
  703. public function urlTitleForReference(string $reference): ?string {
  704. return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
  705. }
  706. }
  707. /**
  708. * Defines a set of allowable HTML tags, attributes, and CSS.
  709. */
  710. class MDHTMLFilter {
  711. /**
  712. * Mapping of permitted lowercase tag names to objects containing allowable
  713. * attributes for those tags. Does not need to include those attributes
  714. * defined in `allowableGlobalAttributes`.
  715. *
  716. * Values are objects with allowable lowercase attribute names mapped to
  717. * allowable value patterns. A `*` means any value is acceptable. Multiple
  718. * allowable values can be joined together with `|`. These special symbols
  719. * represent certain kinds of values and can be used in combination or in
  720. * place of literal values.
  721. *
  722. * - `{classlist}`: A list of legal CSS classnames, separated by spaces
  723. * - `{int}`: An integer
  724. * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
  725. * - `{style}`: One or more CSS declarations, separated by semicolons (simple
  726. * `key: value;` syntax only)
  727. * - `{url}`: A URL
  728. * @type {object}
  729. */
  730. public array $allowableTags = [
  731. 'address' => [
  732. 'cite' => '{url}',
  733. ],
  734. 'h1' => [],
  735. 'h2' => [],
  736. 'h3' => [],
  737. 'h4' => [],
  738. 'h5' => [],
  739. 'h6' => [],
  740. 'blockquote' => [],
  741. 'dl' => [],
  742. 'dt' => [],
  743. 'dd' => [],
  744. 'div' => [],
  745. 'hr' => [],
  746. 'ul' => [],
  747. 'ol' => [
  748. 'start' => '{int}',
  749. 'type' => 'a|A|i|I|1',
  750. ],
  751. 'li' => [
  752. 'value' => '{int}',
  753. ],
  754. 'p' => [],
  755. 'pre' => [],
  756. 'table' => [],
  757. 'thead' => [],
  758. 'tbody' => [],
  759. 'tfoot' => [],
  760. 'tr' => [],
  761. 'td' => [],
  762. 'th' => [],
  763. 'a' => [
  764. 'href' => '{url}',
  765. 'target' => '*',
  766. ],
  767. 'abbr' => [],
  768. 'b' => [],
  769. 'br' => [],
  770. 'cite' => [],
  771. 'code' => [],
  772. 'data' => [
  773. 'value' => '*',
  774. ],
  775. 'dfn' => [],
  776. 'em' => [],
  777. 'i' => [],
  778. 'kbd' => [],
  779. 'mark' => [],
  780. 'q' => [
  781. 'cite' => '{url}',
  782. ],
  783. 's' => [],
  784. 'samp' => [],
  785. 'small' => [],
  786. 'span' => [],
  787. 'strong' => [],
  788. 'sub' => [],
  789. 'sup' => [],
  790. 'time' => [
  791. 'datetime' => '*',
  792. ],
  793. 'u' => [],
  794. 'var' => [],
  795. 'wbr' => [],
  796. 'img' => [
  797. 'alt' => '*',
  798. 'href' => '{url}',
  799. ],
  800. 'figure' => [],
  801. 'figcaption' => [],
  802. 'del' => [],
  803. 'ins' => [],
  804. 'details' => [],
  805. 'summary' => [],
  806. ];
  807. /**
  808. * Mapping of allowable lowercase global attributes to their permitted
  809. * values. Uses same value pattern syntax as described in `allowableTags`.
  810. * @type {object}
  811. */
  812. public array $allowableGlobalAttributes = [
  813. 'class' => '{classlist}',
  814. 'data-*' => '*',
  815. 'dir' => 'ltr|rtl|auto',
  816. 'id' => '*',
  817. 'lang' => '*',
  818. 'style' => '{style}',
  819. 'title' => '*',
  820. 'translate' => 'yes|no|{none}',
  821. ];
  822. /**
  823. * Mapping of allowable CSS style names to their allowable value patterns.
  824. * Multiple values can be delimited with `|` characters. Limited support
  825. * so far.
  826. *
  827. * Recognized special values:
  828. * - `{color}`: A hex or named color
  829. *
  830. * @type {object}
  831. */
  832. public array $allowableStyleKeys = [
  833. 'background-color' => '{color}',
  834. 'color' => '{color}',
  835. ];
  836. /**
  837. * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
  838. * itself has already been whitelisted.
  839. *
  840. * @param {MDHTMLTag} tag - HTML tag
  841. */
  842. public function scrubTag(MDHTMLTag $tag) {
  843. foreach ($tag->attributes as $name => $value) {
  844. if (!$this->isValidAttributeName($tag->tagName, $name)) {
  845. unset($tag->attributes[$name]);
  846. }
  847. if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
  848. unset($tag->attributes[$name]);
  849. }
  850. }
  851. }
  852. /**
  853. * Scrubs all forbidden attributes from an HTML modifier.
  854. *
  855. * @param MDTagModifier $modifier
  856. * @param ?string $tagName HTML tag name, if known, otherwise only
  857. * global attributes will be permitted
  858. */
  859. public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
  860. if (sizeof($modifier->cssClasses) > 0) {
  861. $classList = implode(' ', $modifier->cssClasses);
  862. if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
  863. $modifier->cssClasses = [];
  864. }
  865. }
  866. if ($modifier->cssId !== null) {
  867. if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
  868. $modifier->cssId = null;
  869. }
  870. }
  871. if (!$this->isValidAttributeName($tagName, 'style')) {
  872. $modifier->cssStyles = [];
  873. } else {
  874. foreach ($modifier->cssStyles as $key => $val) {
  875. if (!$this->isValidStyleValue($key, $val)) {
  876. unset($modifier->cssStyles[$key]);
  877. }
  878. }
  879. }
  880. foreach ($modifier->attributes as $key => $val) {
  881. if (!$this->isValidAttributeValue($tagName, $key, $val)) {
  882. unset($modifier->attributes[$key]);
  883. }
  884. }
  885. }
  886. /**
  887. * Tests if an HTML tag name is permitted.
  888. */
  889. public function isValidTagName(string $tagName): bool {
  890. return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
  891. }
  892. /**
  893. * Tests if an HTML attribute name is permitted.
  894. */
  895. public function isValidAttributeName(?string $tagName, string $attributeName): bool {
  896. $lcAttributeName = mb_strtolower($attributeName);
  897. if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
  898. return true;
  899. }
  900. foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
  901. if (!str_ends_with($pattern, '*')) continue;
  902. $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
  903. if (str_starts_with($lcAttributeName, $patternPrefix)) {
  904. return true;
  905. }
  906. }
  907. if ($tagName === null) return false;
  908. $lcTagName = mb_strtolower($tagName);
  909. $tagAttributes = $this->allowableTags[$lcTagName];
  910. if ($tagAttributes !== null) {
  911. return ($tagAttributes[$lcAttributeName] ?? null) !== null;
  912. }
  913. return false;
  914. }
  915. /**
  916. * Tests if an attribute value is allowable.
  917. */
  918. public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
  919. $lcAttributeName = mb_strtolower($attributeName);
  920. $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
  921. if ($globalPattern !== null) {
  922. return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
  923. }
  924. foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
  925. if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
  926. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  927. }
  928. }
  929. if ($tagName === null) return false;
  930. $lcTagName = mb_strtolower($tagName);
  931. $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
  932. if ($tagAttributes === null) return false;
  933. $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
  934. if ($valuePattern === null) return false;
  935. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  936. }
  937. private static string $permissiveURLRegex = '^\\S+$';
  938. private static string $integerRegex = '^[\\-]?\\d+$';
  939. private static string $classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
  940. private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
  941. $options = explode('|', $pattern);
  942. foreach ($options as $option) {
  943. switch ($option) {
  944. case '*':
  945. return true;
  946. case '{classlist}':
  947. if (mb_eregi(self::$classListRegex, $value)) return true;
  948. break;
  949. case '{int}':
  950. if (mb_eregi(self::$integerRegex, $value)) return true;
  951. break;
  952. case '{none}':
  953. if ($value === true) return true;
  954. break;
  955. case '{style}':
  956. if ($this->isValidStyleDeclaration($value)) return true;
  957. break;
  958. case '{url}':
  959. if (mb_eregi(self::$permissiveURLRegex, $value)) return true;
  960. break;
  961. default:
  962. if ($value === $option) return true;
  963. break;
  964. }
  965. }
  966. return false;
  967. }
  968. /**
  969. * Tests if a string of one or more style `key: value;` declarations is
  970. * fully allowable.
  971. */
  972. public function isValidStyleDeclaration(string $styles): bool {
  973. $settings = explode(';', $styles);
  974. foreach ($settings as $setting) {
  975. if (mb_strlen(trim($setting)) == 0) continue;
  976. $parts = explode(':', $setting);
  977. if (sizeof($parts) != 2) return false;
  978. $name = trim($parts[0]);
  979. if (!$this->isValidStyleKey($name)) return false;
  980. $value = trim($parts[1]);
  981. if (!$this->isValidStyleValue($name, $value)) return false;
  982. }
  983. return true;
  984. }
  985. /**
  986. * Tests if a CSS style key is allowable.
  987. */
  988. public function isValidStyleKey(string $key): bool {
  989. return ($this->allowableStyleKeys[$key] ?? null) !== null;
  990. }
  991. /**
  992. * Tests if a CSS style value is allowable.
  993. */
  994. public function isValidStyleValue(string $key, string $value): bool {
  995. $pattern = $this->allowableStyleKeys[$key] ?? null;
  996. if ($pattern === null) return false;
  997. $options = explode('|', $pattern);
  998. foreach ($options as $option) {
  999. switch ($option) {
  1000. case '{color}':
  1001. if ($this->isValidCSSColor($value)) return true;
  1002. default:
  1003. if ($value === $option) return true;
  1004. }
  1005. }
  1006. return false;
  1007. }
  1008. private static string $styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
  1009. private function isValidCSSColor(string $value): bool {
  1010. return mb_eregi(self::$styleColorRegex, $value);
  1011. }
  1012. }
  1013. /**
  1014. * Represents a single HTML tag. Paired tags are represented separately.
  1015. */
  1016. class MDHTMLTag {
  1017. /**
  1018. * Verbatim string of the original parsed tag. Not modified. Should be
  1019. * considered unsafe for inclusion in the final document. Use `toString()`
  1020. * instead.
  1021. */
  1022. public string $original;
  1023. public string $tagName;
  1024. public bool $isCloser;
  1025. /**
  1026. * Map of attribute names to value strings.
  1027. */
  1028. public array $attributes;
  1029. /**
  1030. * @param string $original
  1031. * @param string $tagName
  1032. * @param bool $isCloser
  1033. * @param array $attributes
  1034. */
  1035. public function __construct(string $original, string $tagName, bool $isCloser,
  1036. array $attributes) {
  1037. $this->original = $original;
  1038. $this->tagName = $tagName;
  1039. $this->isCloser = $isCloser;
  1040. $this->attributes = $attributes;
  1041. }
  1042. public function __toString(): string {
  1043. if ($this->isCloser) {
  1044. return "</{$this->tagName}>";
  1045. }
  1046. $html = '<';
  1047. $html .= $this->tagName;
  1048. foreach ($this->attributes as $key => $value) {
  1049. $safeName = MDUtils::scrubAttributeName($key);
  1050. if ($value === true) {
  1051. $html .= " {$safeName}";
  1052. } else {
  1053. $escapedValue = htmlentities("{$value}");
  1054. $html .= " {$safeName}=\"{$escapedValue}\"";
  1055. }
  1056. }
  1057. $html .= '>';
  1058. return $html;
  1059. }
  1060. public function equals($other): bool {
  1061. if (!($other instanceof MDHTMLTag)) return false;
  1062. if ($other->tagName != $this->tagName) return false;
  1063. if ($other->isCloser != $this->isCloser) return false;
  1064. return MDUtils::equal($other->attributes, $this->attributes);
  1065. }
  1066. private static string $htmlTagNameFirstRegex = '[a-z]';
  1067. private static string $htmlTagNameMedialRegex = '[a-z0-9]';
  1068. private static string $htmlAttributeNameFirstRegex = '[a-z]';
  1069. private static string $htmlAttributeNameMedialRegex = '[a-z0-9-]';
  1070. private static string $whitespaceCharRegex = '\\s';
  1071. /**
  1072. * Checks the start of the given string for presence of an HTML tag.
  1073. */
  1074. public static function fromLineStart(string $line): ?MDHTMLTag {
  1075. $expectOpenBracket = 0;
  1076. $expectCloserOrName = 1;
  1077. $expectName = 2;
  1078. $expectAttributeNameOrEnd = 3;
  1079. $expectEqualsOrAttributeOrEnd = 4;
  1080. $expectAttributeValue = 5;
  1081. $expectCloseBracket = 6;
  1082. $isCloser = false;
  1083. $tagName = '';
  1084. $attributeName = '';
  1085. $attributeValue = '';
  1086. $attributeQuote = null;
  1087. $attributes = [];
  1088. $fullTag = null;
  1089. $endAttribute = function(bool $unescape=false) use (&$attributes, &$attributeName, &$attributeValue, &$attributeQuote) {
  1090. if (mb_strlen($attributeName) > 0) {
  1091. if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
  1092. $attributes[$attributeName] = $unescape ? html_entity_decode($attributeValue, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') : $attributeValue;
  1093. } else {
  1094. $attributes[$attributeName] = true;
  1095. }
  1096. }
  1097. $attributeName = '';
  1098. $attributeValue = '';
  1099. $attributeQuote = null;
  1100. };
  1101. $expect = $expectOpenBracket;
  1102. for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
  1103. $ch = mb_substr($line, $p, 1);
  1104. $isWhitespace = mb_eregi(self::$whitespaceCharRegex, $ch);
  1105. switch ($expect) {
  1106. case $expectOpenBracket:
  1107. if ($ch != '<') return null;
  1108. $expect = $expectCloserOrName;
  1109. break;
  1110. case $expectCloserOrName:
  1111. if ($ch == '/') {
  1112. $isCloser = true;
  1113. } else {
  1114. $p--;
  1115. }
  1116. $expect = $expectName;
  1117. break;
  1118. case $expectName:
  1119. if (mb_strlen($tagName) == 0) {
  1120. if (!mb_eregi(self::$htmlTagNameFirstRegex, $ch)) return null;
  1121. $tagName .= $ch;
  1122. } else {
  1123. if (mb_eregi(self::$htmlTagNameMedialRegex, $ch)) {
  1124. $tagName .= $ch;
  1125. } else {
  1126. $p--;
  1127. $expect = ($isCloser) ? $expectCloseBracket : $expectAttributeNameOrEnd;
  1128. }
  1129. }
  1130. break;
  1131. case $expectAttributeNameOrEnd:
  1132. if (mb_strlen($attributeName) == 0) {
  1133. if ($isWhitespace) {
  1134. // skip whitespace
  1135. } elseif ($ch == '/') {
  1136. $expect = $expectCloseBracket;
  1137. } elseif ($ch == '>') {
  1138. $fullTag = mb_substr($line, 0, $p + 1);
  1139. break;
  1140. } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
  1141. $attributeName .= $ch;
  1142. } else {
  1143. return null;
  1144. }
  1145. } elseif ($isWhitespace) {
  1146. $expect = $expectEqualsOrAttributeOrEnd;
  1147. } elseif ($ch == '/') {
  1148. $endAttribute();
  1149. $expect = $expectCloseBracket;
  1150. } elseif ($ch == '>') {
  1151. $endAttribute();
  1152. $fullTag = mb_substr($line, 0, $p + 1);
  1153. break;
  1154. } elseif ($ch == '=') {
  1155. $expect = $expectAttributeValue;
  1156. } elseif (mb_eregi(self::$htmlAttributeNameMedialRegex, $ch)) {
  1157. $attributeName .= $ch;
  1158. } else {
  1159. return null;
  1160. }
  1161. break;
  1162. case $expectEqualsOrAttributeOrEnd:
  1163. if ($ch == '=') {
  1164. $expect = $expectAttributeValue;
  1165. } elseif ($isWhitespace) {
  1166. // skip whitespace
  1167. } elseif ($ch == '/') {
  1168. $expect = $expectCloseBracket;
  1169. } elseif ($ch == '>') {
  1170. $fullTag = mb_substr($line, 0, $p + 1);
  1171. break;
  1172. } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
  1173. $endAttribute();
  1174. $expect = $expectAttributeNameOrEnd;
  1175. $p--;
  1176. }
  1177. break;
  1178. case $expectAttributeValue:
  1179. if (mb_strlen($attributeValue) == 0) {
  1180. if ($attributeQuote === null) {
  1181. if ($isWhitespace) {
  1182. // skip whitespace
  1183. } elseif ($ch == '"' || $ch == "'") {
  1184. $attributeQuote = $ch;
  1185. } else {
  1186. $attributeQuote = ''; // explicitly unquoted
  1187. $p--;
  1188. }
  1189. } else {
  1190. if ($ch === $attributeQuote) {
  1191. // Empty string
  1192. $endAttribute($attributeQuote != '');
  1193. $expect = $expectAttributeNameOrEnd;
  1194. } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
  1195. return null;
  1196. } else {
  1197. $attributeValue .= $ch;
  1198. }
  1199. }
  1200. } else {
  1201. if ($ch === $attributeQuote) {
  1202. $endAttribute($attributeQuote != '');
  1203. $expect = $expectAttributeNameOrEnd;
  1204. } elseif ($attributeQuote === '' && $isWhitespace) {
  1205. $endAttribute();
  1206. $expect = $expectAttributeNameOrEnd;
  1207. } else {
  1208. $attributeValue .= $ch;
  1209. }
  1210. }
  1211. break;
  1212. case $expectCloseBracket:
  1213. if ($isWhitespace) {
  1214. // ignore whitespace
  1215. } elseif ($ch == '>') {
  1216. $fullTag = mb_substr($line, 0, $p + 1);
  1217. break;
  1218. }
  1219. break;
  1220. }
  1221. }
  1222. if ($fullTag === null) return null;
  1223. $endAttribute();
  1224. return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
  1225. }
  1226. }
  1227. /**
  1228. * Represents HTML modifications to a node, such as CSS classes to add or
  1229. * additional attributes. See `MDHTMLFilter.scrubModifier()` to remove disallowed
  1230. * values.
  1231. */
  1232. class MDTagModifier {
  1233. /**
  1234. * Verbatim markdown syntax. Unmodified by changes to other properties.
  1235. */
  1236. public string $original;
  1237. /** @var string[] */
  1238. public array $cssClasses = [];
  1239. public ?string $cssId = null;
  1240. public array $cssStyles = [];
  1241. public array $attributes = [];
  1242. private static $baseClassRegex = '\\.([a-z_\\-][a-z0-9_\\-]*?)';
  1243. private static $baseIdRegex = '#([a-z_\\-][a-z0-9_\\-]*?)';
  1244. private static $baseAttributeRegex = '([a-z0-9]+?)=([^\\s\\}]+?)';
  1245. private static $baseRegex = '\\{([^}]+?)}';
  1246. private static $leadingClassRegex = '^\\{([^}]+?)}';
  1247. private static $trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
  1248. private static $classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
  1249. private static $idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
  1250. private static $attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
  1251. public function applyTo(MDNode $node) {
  1252. if ($node instanceof MDNode) {
  1253. foreach ($this->cssClasses as $cssClass) {
  1254. $node->addClass($cssClass);
  1255. }
  1256. if ($this->cssId) $node->cssId = $this->cssId;
  1257. foreach ($this->attributes as $name => $value) {
  1258. $node->attributes[$name] = $value;
  1259. }
  1260. foreach ($this->cssStyles as $name => $value) {
  1261. $node->cssStyles[$name] = $value;
  1262. }
  1263. }
  1264. }
  1265. /**
  1266. * Adds a CSS class. If already present it will not be duplicated.
  1267. */
  1268. public function addClass(string $cssClass): bool {
  1269. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  1270. array_push($this->cssClasses, $cssClass);
  1271. return true;
  1272. }
  1273. /**
  1274. * Removes a CSS class.
  1275. */
  1276. public function removeClass(string $cssClass): bool {
  1277. $beforeLength = sizeof($this->cssClasses);
  1278. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  1279. return sizeof($this->cssClasses) != beforeLength;
  1280. }
  1281. public function equals($other): bool {
  1282. if (!($other instanceof MDTagModifier)) return false;
  1283. if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
  1284. if ($other->cssId !== $this->cssId) return false;
  1285. if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
  1286. return true;
  1287. }
  1288. public function __toString(): string {
  1289. return $this->original;
  1290. }
  1291. private static function styleToObject(string $styleValue): array {
  1292. $pairs = explode(';', $styleValue);
  1293. $styles = [];
  1294. foreach ($pairs as $pair) {
  1295. $keyAndValue = explode(':', $pair);
  1296. if (sizeof($keyAndValue) != 2) continue;
  1297. $styles[$keyAndValue[0]] = $keyAndValue[1];
  1298. }
  1299. return $styles;
  1300. }
  1301. private static function fromContents(string $contents): ?MDTagModifier {
  1302. $modifierTokens = mb_split('\\s+', $contents);
  1303. $mod = new MDTagModifier();
  1304. $mod->original = "{{$contents}}";
  1305. foreach ($modifierTokens as $token) {
  1306. if (trim($token) == '') continue;
  1307. if (mb_eregi(self::$classRegex, $token, $groups)) {
  1308. $mod->addClass($groups[1]);
  1309. } elseif (mb_eregi(self::$idRegex, $token, $groups)) {
  1310. $mod->cssId = $groups[1];
  1311. } elseif (mb_eregi(self::$attributeRegex, $token, $groups)) {
  1312. if ($groups[1] == 'style') {
  1313. $mod->cssStyles = self::styleToObject($groups[2]);
  1314. } else {
  1315. $mod->attributes[$groups[1]] = $groups[2];
  1316. }
  1317. } else {
  1318. return null;
  1319. }
  1320. }
  1321. return $mod;
  1322. }
  1323. /**
  1324. * Extracts block modifier from end of a line. Always returns a 2-element
  1325. * tuple array:
  1326. * - `0`: the line without the modifier
  1327. * - `1`: an `MDTagModifier` if found or `null` if not
  1328. *
  1329. * @param string $line
  1330. * @param ?MDState $state
  1331. * @return array tuple with remaining line and `MDTagModifier` or `null`
  1332. */
  1333. public static function fromLine(string $line, ?MDState $state): array {
  1334. if ($state) {
  1335. $found = false;
  1336. foreach ($state->root()->readersByBlockPriority as $reader) {
  1337. if ($reader instanceof MDModifierReader) {
  1338. $found = true;
  1339. break;
  1340. }
  1341. }
  1342. if (!$found) return [ $line, null ];
  1343. }
  1344. if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return [ $line, null ];
  1345. $bareLine = $groups[1];
  1346. $mod = self::fromContents($groups[2]);
  1347. return [ $bareLine, $mod ];
  1348. }
  1349. /**
  1350. * Attempts to extract modifier from head of string.
  1351. */
  1352. public static function fromStart(string $line): ?MDTagModifier {
  1353. if (!mb_eregi(self::$leadingClassRegex, $line, $groups)) return null;
  1354. return self::fromContents($groups[1]);
  1355. }
  1356. /**
  1357. * Discards any modifiers from a line and returns what remains.
  1358. */
  1359. public static function strip(string $line): string {
  1360. if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return $line;
  1361. return $groups[1];
  1362. }
  1363. }
  1364. // -- Readers ---------------------------------------------------------------
  1365. /**
  1366. * Base class for readers of various markdown syntax. A `Markdown` instance can
  1367. * be created with any combination of subclasses of these to customize the
  1368. * flavor of markdown parsed.
  1369. *
  1370. * @see {@link custom.md} for details on subclassing
  1371. */
  1372. class MDReader {
  1373. /**
  1374. * Called before processing begins. `state.lines` is populated and the
  1375. * line pointer `state.p` will be at `0`.
  1376. *
  1377. * Default implementation does nothing.
  1378. */
  1379. public function preProcess(MDState $state) {}
  1380. /**
  1381. * Attempts to read an `MDBlockNode` subclass at the current line pointer
  1382. * `state.p`. Only matches if the block pattern starts at the line pointer,
  1383. * not elsewhere in the `state.lines` array. If a block is found, `state.p`
  1384. * should be incremented to the next line _after_ the block structure and
  1385. * a `MDBlockNode` subclass instance is returned. If no block is found,
  1386. * returns `null`.
  1387. *
  1388. * Default implementation always returns `null`.
  1389. */
  1390. public function readBlock(MDState $state): ?MDBlockNode { return null; }
  1391. /**
  1392. * Attempts to read an inline token from the beginning of `line`. Only the
  1393. * start of the given `line` is considered. If a matching token is found, an
  1394. * `MDToken` is returned. Otherwise `null` is returned.
  1395. *
  1396. * Default implementation always returns `null`.
  1397. */
  1398. public function readToken(MDState $state, string $line): ?MDToken { return null; }
  1399. /**
  1400. * Attempts to find a pattern anywhere in `tokens` and perform a _single_
  1401. * in-place substitution with one or more `MDNode` subclass instances.
  1402. * If a substitution is performed, must return `true`, otherwise `false`.
  1403. *
  1404. * Default implementation always returns `false`.
  1405. *
  1406. * @param MDState $state
  1407. * @param int $pass what substitution pass this is, starting with 1
  1408. * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and `MDInlineNode` elements
  1409. * @return bool `true` if a substitution was performed, `false` if not
  1410. */
  1411. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return false; }
  1412. /**
  1413. * Called after all parsing has completed. An array `blocks` is passed of
  1414. * all the top-level `MDBlockNode` elements in the document which this
  1415. * method can traverse or alter in-place via `.splice` operations if
  1416. * necessary.
  1417. *
  1418. * `MDNode.visitChildren` is useful for recursively looking for certain
  1419. * `MDNode` instances. `MDNode.replaceNodes` is useful for swapping in
  1420. * replacements.
  1421. *
  1422. * Default implementation does nothing.
  1423. *
  1424. * @param MDState $state
  1425. * @param MDBlockNode[] $blocks
  1426. */
  1427. public function postProcess(MDState $state, array &$blocks) {}
  1428. /**
  1429. * Can be overridden to influence ordering of this reader with respect to
  1430. * another during the block parsing phase. Return `-1` to be ordered before
  1431. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1432. * Only return non-`0` values to resolve specific conflicts.
  1433. *
  1434. * Default implementation always returns `0` (no preference).
  1435. *
  1436. * @param MDReader $other
  1437. * @return int a negative, positive, or 0 value to be ordered before,
  1438. * after, or anwhere relative to `other`, respectively
  1439. */
  1440. public function compareBlockOrdering(MDReader $other): int {
  1441. return 0;
  1442. }
  1443. /**
  1444. * Can be overridden to influence ordering of this reader with respect to
  1445. * another during the tokenizing phase. Return `-1` to be ordered before
  1446. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1447. * Only return non-`0` values to resolve specific conflicts.
  1448. *
  1449. * Default implementation always returns `0` (no preference).
  1450. *
  1451. * @param MDReader $other
  1452. * @return int a negative, positive, or 0 value to be ordered before,
  1453. * after, or anwhere relative to `other`, respectively
  1454. */
  1455. public function compareTokenizeOrdering(MDReader $other): int {
  1456. return 0;
  1457. }
  1458. /**
  1459. * Can be overridden to influence ordering of this reader with respect to
  1460. * another during the substitution phase. Return `-1` to be ordered before
  1461. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1462. * Only return non-`0` values to resolve specific conflicts.
  1463. *
  1464. * Readers are sorted within each substitution pass. All pass 1 readers are
  1465. * processed first, then all pass 2 readers, etc. The number of passes this
  1466. * reader participates in is dictated by `substitionPassCount`.
  1467. *
  1468. * Default implementation always returns `0` (no preference).
  1469. *
  1470. * @param MDReader $other
  1471. * @param int $pass substitution pass, with numbering starting at `1`
  1472. * @return int a negative, positive, or 0 value to be ordered before,
  1473. * after, or anwhere relative to `other`, respectively
  1474. */
  1475. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  1476. return 0;
  1477. }
  1478. /**
  1479. * How many substitution passes this reader requires. Substitution allows
  1480. * all pass 1 readers to process first, then all pass 2 readers, etc.
  1481. */
  1482. public function substitutionPassCount(): int { return 1; }
  1483. /**
  1484. * For sorting readers with ordering preferences. The `compare` methods
  1485. * don't have the properties of normal sorting compares so need to sort
  1486. * differently.
  1487. *
  1488. * @param MDReader[] $arr array to sort
  1489. * @param callable $compareFn comparison function, taking two array element
  1490. * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
  1491. * respectively
  1492. * @param callable $idFn function for returning a unique hashable id for
  1493. * the array element
  1494. * @return MDReader[] sorted array
  1495. */
  1496. private static function kahnTopologicalSort(array $arr, callable $compareFn, callable $idFn): array {
  1497. $graph = [];
  1498. $inDegrees = [];
  1499. $valuesById = [];
  1500. // Build the graph and compute in-degrees
  1501. foreach ($arr as $index => $elem) {
  1502. $id = $idFn($elem);
  1503. $graph[$id] = [];
  1504. $inDegrees[$id] = 0;
  1505. $valuesById[$id] = $elem;
  1506. }
  1507. for ($i = 0; $i < sizeof($arr); $i++) {
  1508. $elemA = $arr[$i];
  1509. $idA = $idFn($elemA);
  1510. for ($j = 0; $j < sizeof($arr); $j++) {
  1511. if ($i === $j) continue;
  1512. $elemB = $arr[$j];
  1513. $idB = $idFn($elemB);
  1514. $comparisonResult = $compareFn($elemA, $elemB);
  1515. if ($comparisonResult < 0) {
  1516. array_push($graph[$idA], $idB);
  1517. $inDegrees[$idB]++;
  1518. } elseif ($comparisonResult > 0) {
  1519. array_push($graph[$idB], $idA);
  1520. $inDegrees[$idA]++;
  1521. }
  1522. }
  1523. }
  1524. // Initialize the queue with zero-inDegree nodes
  1525. $queue = [];
  1526. foreach ($inDegrees as $elemId => $degree) {
  1527. if ($degree === 0) {
  1528. array_push($queue, $elemId);
  1529. }
  1530. }
  1531. // Process the queue and build the topological order list
  1532. $sorted = [];
  1533. while (sizeof($queue) > 0) {
  1534. $elemId = array_shift($queue);
  1535. array_push($sorted, $valuesById[$elemId]);
  1536. unset($valuesById[$elemId]);
  1537. foreach ($graph[$elemId] as $neighbor) {
  1538. $inDegrees[$neighbor]--;
  1539. if ($inDegrees[$neighbor] === 0) {
  1540. array_push($queue, $neighbor);
  1541. }
  1542. }
  1543. }
  1544. // Anything left over can go at the end. No ordering dependencies.
  1545. foreach ($valuesById as $elemId => $value) {
  1546. array_push($sorted, $value);
  1547. }
  1548. return $sorted;
  1549. }
  1550. /**
  1551. * Returns a sorted array of readers by their block priority preferences.
  1552. *
  1553. * @param MDReader[] $readers
  1554. * @return MDReader[] sorted readers
  1555. */
  1556. public static function sortReaderForBlocks(array &$readers): array {
  1557. $sorted = $readers;
  1558. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1559. return $a->compareBlockOrdering($b);
  1560. }, fn($elem) => get_class($elem));
  1561. }
  1562. /**
  1563. * Returns a sorted array of readers by their tokenization priority preferences.
  1564. *
  1565. * @param MDReader[] $readers
  1566. * @return MDReader[] sorted readers
  1567. */
  1568. public static function sortReadersForTokenizing(array &$readers): array {
  1569. $sorted = $readers;
  1570. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1571. return $a->compareTokenizeOrdering($b);
  1572. }, fn($elem) => get_class($elem));
  1573. }
  1574. /**
  1575. * Returns a sorted array of tuples (arrays) containing the substitution
  1576. * pass number and reader instance, sorted by their substitution priority
  1577. * preferences.
  1578. *
  1579. * For readers with `substitutionPassCount` > `1`, the same reader will
  1580. * appear multiple times in the resulting array, one per pass.
  1581. *
  1582. * @param MDReader[] $readers
  1583. * @return MDReader[] sorted array of tuples with the pass number and
  1584. * reader instance in each
  1585. */
  1586. public static function sortReadersForSubstitution(array &$readers): array {
  1587. $tuples = [];
  1588. $maxPass = 1;
  1589. foreach ($readers as $reader) {
  1590. $passCount = $reader->substitutionPassCount();
  1591. for ($pass = 1; $pass <= $passCount; $pass++) {
  1592. array_push($tuples, [[ $pass, $reader ]]);
  1593. }
  1594. $maxPass = max($maxPass, $pass);
  1595. }
  1596. $result = [];
  1597. for ($pass = 1; $pass <= $maxPass; $pass++) {
  1598. $readersThisPass = array_filter($tuples, fn($tup) => $tup[0] == $pass);
  1599. $passResult = self::kahnTopologicalSort($readersThisPass, function(array $a, array $b) use ($pass): int {
  1600. $aReader = $a[1];
  1601. $bReader = $b[1];
  1602. return $aReader->compareSubstituteOrdering($bReader, $pass);
  1603. }, fn($elem) => get_class($elem[1]));
  1604. $result = array_merge($result, $passResult);
  1605. }
  1606. return $result;
  1607. }
  1608. }
  1609. /**
  1610. * Reads markdown blocks for headings denoted with the underline syntax.
  1611. *
  1612. * Supports `MDTagModifier` suffixes.
  1613. */
  1614. class MDUnderlinedHeadingReader extends MDReader {
  1615. public function readBlock(MDState $state): ?MDBlockNode {
  1616. $p = $state->p;
  1617. if (!$state->hasLines(2)) return null;
  1618. $modifier;
  1619. $contentLine = trim($state->lines[$p++]);
  1620. [$contentLine, $modifier] = MDTagModifier::fromLine($contentLine, $state);
  1621. $underLine = trim($state->lines[$p++]);
  1622. if ($contentLine == '') return null;
  1623. if (mb_eregi('^=+$', $underLine)) {
  1624. $state->p = $p;
  1625. $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
  1626. if ($modifier) $modifier->applyTo($block);
  1627. return $block;
  1628. }
  1629. if (mb_eregi('^\-+$', $underLine)) {
  1630. $state->p = $p;
  1631. $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
  1632. if ($modifier) $modifier->applyTo($block);
  1633. return $block;
  1634. }
  1635. return null;
  1636. }
  1637. }
  1638. /**
  1639. * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
  1640. * to 6 are supported.
  1641. *
  1642. * Supports `MDTagModifier` suffixes.
  1643. */
  1644. class MDHashHeadingReader extends MDReader {
  1645. private static $hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content
  1646. public function readBlock(MDState $state): ?MDBlockNode {
  1647. $p = $state->p;
  1648. $line = $state->lines[$p++];
  1649. $modifier;
  1650. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1651. if (!mb_eregi(self::$hashHeadingRegex, $line, $groups)) return null;
  1652. $state->p = $p;
  1653. $level = mb_strlen($groups[1]);
  1654. $content = $groups[2];
  1655. $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
  1656. if ($modifier) $modifier->applyTo($block);
  1657. return $block;
  1658. }
  1659. }
  1660. /**
  1661. * Reads subtext blocks. Subtext is smaller, fainter text for things like
  1662. * disclaimers or sources.
  1663. *
  1664. * Supports `MDTagModifier` suffixes.
  1665. */
  1666. class MDSubtextReader extends MDReader {
  1667. private static $subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content
  1668. public function readBlock(MDState $state): ?MDBlockNode {
  1669. $p = $state->p;
  1670. $line = $state->lines[$p++];
  1671. $modifier;
  1672. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1673. if (!mb_eregi(self::$subtextRegex, $line, $groups)) return null;
  1674. $state->p = $p;
  1675. $content = $groups[1];
  1676. $block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
  1677. if ($modifier) $modifier->applyTo($block);
  1678. return $block;
  1679. }
  1680. public function compareBlockOrdering(MDReader $other): int {
  1681. if ($other instanceof MDUnorderedListReader) {
  1682. return -1;
  1683. }
  1684. return 0;
  1685. }
  1686. }
  1687. /**
  1688. * Reads markdown blocks for blockquoted text.
  1689. */
  1690. class MDBlockQuoteReader extends MDReader {
  1691. public function readBlock(MDState $state): ?MDBlockNode {
  1692. $blockquoteLines = [];
  1693. $p = $state->p;
  1694. while ($p < sizeof($state->lines)) {
  1695. $line = $state->lines[$p++];
  1696. if (str_starts_with($line, ">")) {
  1697. array_push($blockquoteLines, $line);
  1698. } else {
  1699. break;
  1700. }
  1701. }
  1702. if (sizeof($blockquoteLines) == 0) return null;
  1703. $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '', mb_substr($line, 1)), $blockquoteLines);
  1704. $substate = $state->copy($contentLines);
  1705. $quotedBlocks = $substate->readBlocks();
  1706. $state->p = $p;
  1707. return new MDBlockquoteNode($quotedBlocks);
  1708. }
  1709. }
  1710. /**
  1711. * Internal abstract base class for ordered and unordered lists.
  1712. */
  1713. class _MDListReader extends MDReader {
  1714. private static function readItemLines(MDState $state, int $firstLineStartPos): array {
  1715. $p = $state->p;
  1716. $lines = [];
  1717. $seenBlankLine = false;
  1718. $stripTrailingBlankLines = true;
  1719. while ($state->hasLines(1, $p)) {
  1720. $isFirstLine = ($p == $state->p);
  1721. $line = $state->lines[$p++];
  1722. if ($isFirstLine) {
  1723. $line = mb_substr($line, $firstLineStartPos);
  1724. }
  1725. if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
  1726. // Found next list item
  1727. $stripTrailingBlankLines = false; // because this signals extra spacing intended
  1728. break;
  1729. }
  1730. $isBlankLine = trim($line) == '';
  1731. $isIndented = mb_eregi('^\\s+\\S', $line);
  1732. if ($isBlankLine) {
  1733. $seenBlankLine = true;
  1734. } elseif (!$isIndented && $seenBlankLine) {
  1735. // Post-list content
  1736. break;
  1737. }
  1738. array_push($lines, $line);
  1739. }
  1740. $lines = MDUtils::withoutTrailingBlankLines($lines);
  1741. return MDUtils::stripIndent($lines);
  1742. }
  1743. protected function readListItemContent(MDState $state, int $firstLineStartPos): MDBlockNode {
  1744. $itemLines = $this->readItemLines($state, $firstLineStartPos);
  1745. $state->p += max(sizeof($itemLines), 1);
  1746. if (sizeof($itemLines) == 1) {
  1747. return $state->inlineMarkdownToNode($itemLines[0]);
  1748. }
  1749. $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
  1750. if ($hasBlankLines) {
  1751. $substate = $state->copy($itemLines);
  1752. $blocks = $substate->readBlocks();
  1753. return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
  1754. }
  1755. // Multiline content with no blank lines. Search for new block
  1756. // boundaries without the benefit of a blank line to demarcate it.
  1757. for ($p = 1; $p < sizeof($itemLines); $p++) {
  1758. $line = $itemLines[$p];
  1759. if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
  1760. // Nested list found
  1761. $firstBlock = $state->inlineMarkdownToNode(implode("\n", array_slice($itemLines, 0, $p)));
  1762. $substate = $state->copy(array_slice($itemLines, $p));
  1763. $blocks = $substate->readBlocks();
  1764. return array_merge([ $firstBlock, $blocks ]);
  1765. }
  1766. }
  1767. // Ok, give up and just do a standard block read
  1768. {
  1769. $substate = $state->copy($itemLines);
  1770. $blocks = $substate->readBlocks();
  1771. return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
  1772. }
  1773. }
  1774. public function readBlock(MDState $state): ?MDBlockNode {
  1775. $className = get_class($this);
  1776. throw new Error("Abstract readBlock must be overridden in {$className}");
  1777. }
  1778. }
  1779. /**
  1780. * Block reader for unordered (bulleted) lists.
  1781. */
  1782. class MDUnorderedListReader extends _MDListReader {
  1783. private static string $unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content
  1784. private function readUnorderedListItem(MDState $state): ?MDListItemNode {
  1785. $p = $state->p;
  1786. $line = $state->lines[$p];
  1787. if (!mb_eregi(self::$unorderedListRegex, $line, $groups)) return null;
  1788. $firstLineOffset = mb_strlen($groups[1]);
  1789. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
  1790. }
  1791. public function readBlock(MDState $state): ?MDBlockNode {
  1792. $items = [];
  1793. $item = null;
  1794. do {
  1795. $item = $this->readUnorderedListItem($state);
  1796. if ($item) array_push($items, $item);
  1797. } while ($item);
  1798. if (sizeof($items) == 0) return null;
  1799. return new MDUnorderedListNode($items);
  1800. }
  1801. }
  1802. /**
  1803. * Block reader for ordered (numbered) lists. The number of the first item is
  1804. * used to begin counting. The subsequent items increase by 1, regardless of
  1805. * their value.
  1806. */
  1807. class MDOrderedListReader extends _MDListReader {
  1808. private static string $orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content
  1809. private function readOrderedListItem(MDState $state): ?MDListItemNode {
  1810. $p = $state->p;
  1811. $line = $state->lines[$p];
  1812. if (!mb_eregi(self::$orderedListRegex, $line, $groups)) return null;
  1813. $ordinal = intval($groups[1]);
  1814. $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
  1815. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
  1816. }
  1817. public function readBlock(MDState $state): ?MDBlockNode {
  1818. $items = [];
  1819. $item = null;
  1820. do {
  1821. $item = $this->readOrderedListItem($state);
  1822. if ($item) array_push($items, $item);
  1823. } while ($item);
  1824. if (sizeof($items) == 0) return null;
  1825. return new MDOrderedListNode($items, $items[0]->ordinal);
  1826. }
  1827. }
  1828. /**
  1829. * Block reader for code blocks denoted by pairs of triple tickmarks. If
  1830. * a programming language name, _xyz_, immediately follows the backticks, a
  1831. * `language-xyz` CSS class will be added to the resulting `<code>`
  1832. * element.
  1833. *
  1834. * Supports `MDTagModifier` suffix.
  1835. */
  1836. class MDFencedCodeBlockReader extends MDReader {
  1837. public function readBlock(MDState $state): ?MDBlockNode {
  1838. if (!$state->hasLines(2)) return null;
  1839. $p = $state->p;
  1840. $openFenceLine = $state->lines[$p++];
  1841. [$openFenceLine, $modifier] = MDTagModifier::fromLine($openFenceLine, $state);
  1842. if (!mb_eregi('```\s*([a-z0-9]*)\s*$', $openFenceLine, $groups)) return null;
  1843. $language = mb_strlen($groups[1]) > 0 ? $groups[1] : null;
  1844. $codeLines = [];
  1845. while ($state->hasLines(1, $p)) {
  1846. $line = $state->lines[$p++];
  1847. if (trim($line) == '```') {
  1848. $state->p = $p;
  1849. $block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
  1850. if ($modifier) $modifier->applyTo($block);
  1851. return $block;
  1852. }
  1853. array_push($codeLines, $line);
  1854. }
  1855. return null;
  1856. }
  1857. }
  1858. /**
  1859. * Block reader for code blocks denoted by indenting text.
  1860. */
  1861. class MDIndentedCodeBlockReader extends MDReader {
  1862. public function readBlock(MDState $state): ?MDBlockNode {
  1863. $p = $state->p;
  1864. $codeLines = [];
  1865. while ($state->hasLines(1, $p)) {
  1866. $line = $state->lines[$p++];
  1867. if (MDUtils::countIndents($line, true) < 1) {
  1868. $p--;
  1869. break;
  1870. }
  1871. array_push($codeLines, MDUtils::stripIndent($line));
  1872. }
  1873. if (sizeof($codeLines) == 0) return null;
  1874. $state->p = $p;
  1875. return new MDCodeBlockNode(implode("\n", $codeLines));
  1876. }
  1877. }
  1878. /**
  1879. * Block reader for horizontal rules. Composed of three or more hypens or
  1880. * asterisks on a line by themselves, with or without intermediate whitespace.
  1881. */
  1882. class MDHorizontalRuleReader extends MDReader {
  1883. private static string $horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
  1884. public function readBlock(MDState $state): ?MDBlockNode {
  1885. $p = $state->p;
  1886. $line = $state->lines[$p++];
  1887. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1888. if (mb_eregi(self::$horizontalRuleRegex, $line)) {
  1889. $state->p = $p;
  1890. $block = new MDHorizontalRuleNode();
  1891. if ($modifier) $modifier->applyTo($block);
  1892. return $block;
  1893. }
  1894. return null;
  1895. }
  1896. public function compareBlockOrdering(MDReader $other): int {
  1897. if ($other instanceof MDUnorderedListReader) {
  1898. return -1;
  1899. }
  1900. return 0;
  1901. }
  1902. }
  1903. /**
  1904. * Block reader for tables.
  1905. *
  1906. * Supports `MDTagModifier` suffix.
  1907. */
  1908. class MDTableReader extends MDReader {
  1909. private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
  1910. if (!$state->hasLines(1)) return null;
  1911. $p = $state->p;
  1912. $line = MDTagModifier::strip(trim($state->lines[$p++]));
  1913. if (!mb_eregi('.*\\|.*', $line)) return null;
  1914. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1915. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1916. $cellTokens = explode('|', $line);
  1917. $cells = array_map(function($token) use ($isHeader) {
  1918. $content = $state->inlineMarkdownToNode(trim($token));
  1919. return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
  1920. }, $cellTokens);
  1921. $state->p = $p;
  1922. return new MDTableRowNode($cells);
  1923. }
  1924. /**
  1925. * @param string $line
  1926. * @return string[]
  1927. */
  1928. private function parseColumnAlignments(string $line): array {
  1929. $line = trim($line);
  1930. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1931. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1932. return array_map(function($token) {
  1933. if (str_starts_with($token, ':')) {
  1934. if (str_ends_with($token, ':')) {
  1935. return 'center';
  1936. }
  1937. return 'left';
  1938. } elseif (str_ends_with($token, ':')) {
  1939. return 'right';
  1940. }
  1941. return null;
  1942. }, mb_split('\\s*\\|\\s*', $line));
  1943. }
  1944. private static string $tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
  1945. public function readBlock(MDState $state): ?MDBlockNode {
  1946. if (!$state->hasLines(2)) return null;
  1947. $startP = $state->p;
  1948. $firstLine = $state->lines[$startP];
  1949. $modifier = MDTagModifier::fromLine($firstLine, $state)[1];
  1950. $headerRow = $this->readTableRow($state, true);
  1951. if ($headerRow === null) {
  1952. $state->p = $startP;
  1953. return null;
  1954. }
  1955. $dividerLine = $state->lines[$state->p++];
  1956. if (!mb_eregi(self::$tableDividerRegex, $dividerLine, $dividerGroups)) {
  1957. $state->p = $startP;
  1958. return null;
  1959. }
  1960. $columnAlignments = $this->parseColumnAlignments($dividerLine);
  1961. $bodyRows = [];
  1962. while ($state->hasLines(1)) {
  1963. $row = $this->readTableRow($state, false);
  1964. if ($row === null) break;
  1965. array_push($bodyRows, $row);
  1966. }
  1967. $table = new MDTableNode($headerRow, $bodyRows);
  1968. $table->columnAlignments = $columnAlignments;
  1969. if ($modifier) $modifier->applyTo($table);
  1970. return $table;
  1971. }
  1972. }
  1973. /**
  1974. * Block reader for definition lists. Definitions go directly under terms starting
  1975. * with a colon.
  1976. */
  1977. class MDDefinitionListReader extends MDReader {
  1978. public function readBlock(MDState $state): ?MDBlockNode {
  1979. $p = $state->p;
  1980. $groups;
  1981. $termCount = 0;
  1982. $definitionCount = 0;
  1983. $defLines = [];
  1984. while ($state->hasLines(1, $p)) {
  1985. $line = $state->lines[$p++];
  1986. if (trim($line) === '') {
  1987. break;
  1988. }
  1989. if (mb_eregi('^\\s+', $line)) {
  1990. if (sizeof($defLines) == 0) return null;
  1991. $defLines[sizeof($defLines) - 1] .= "\n" . $line;
  1992. } elseif (mb_eregi('^:\\s+', $line)) {
  1993. array_push($defLines, $line);
  1994. $definitionCount++;
  1995. } else {
  1996. array_push($defLines, $line);
  1997. $termCount++;
  1998. }
  1999. }
  2000. if ($termCount == 0 || $definitionCount == 0) return null;
  2001. $blocks = array_map(function($line) {
  2002. if (mb_eregi('^:\\s+(.*?)$', $line)) {
  2003. return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
  2004. } else {
  2005. return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
  2006. }
  2007. }, $defLines);
  2008. $state->p = $p;
  2009. return new MDDefinitionListNode($blocks);
  2010. }
  2011. }
  2012. /**
  2013. * Block reader for defining footnote contents. Footnotes can be defined anywhere
  2014. * in the document but will always be rendered at the end of a page or end of
  2015. * the document.
  2016. */
  2017. class MDFootnoteReader extends MDReader {
  2018. private static string $footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
  2019. private static string $footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
  2020. /**
  2021. * @param MDState $state
  2022. * @param string $symbol
  2023. * @param MDNode[] $footnote
  2024. */
  2025. private function defineFootnote(MDState $state, string $symbol, array $footnote) {
  2026. $footnotes = $state->root()->userInfo['footnotes'] ?? [];
  2027. $footnotes[$symbol] = $footnote;
  2028. $state->root()->userInfo['footnotes'] = $footnotes;
  2029. }
  2030. private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
  2031. $footnoteInstances = $state->root()->userInfo['footnoteInstances'];
  2032. $instances = $footnoteInstances[$symbol] ?? [];
  2033. array_push($instances, $unique);
  2034. $footnoteInstances[$symbol] = $instances;
  2035. }
  2036. private function idForFootnoteSymbol(MDState $state, string $symbol): int {
  2037. $footnoteIds = $state->root()->userInfo['footnoteIds'];
  2038. $existing = $footnoteIds[$symbol];
  2039. if ($existing) return $existing;
  2040. $nextFootnoteId = $state->root()->userInfo['nextFootnoteId'];
  2041. $id = $nextFootnoteId++;
  2042. $footnoteIds[$symbol] = $id;
  2043. $state->root()->userInfo['nextFootnoteId'] = $nextFootnoteId;
  2044. return $id;
  2045. }
  2046. public function preProcess(MDState $state) {
  2047. $state->root()->userInfo['footnoteInstances'] = [];
  2048. $state->root()->userInfo['footnotes'] = [];
  2049. $state->root()->userInfo['footnoteIds'] = [];
  2050. $state->root()->userInfo['nextFootnoteId'] = 1;
  2051. }
  2052. public function readBlock(MDState $state): ?MDBlockNode {
  2053. $p = $state->p;
  2054. if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
  2055. $symbol = $groups[1];
  2056. $def = $groups[2];
  2057. while ($state->hasLines(1, $p)) {
  2058. $line = $state->lines[$p++];
  2059. if (mb_eregi('^\\s+', $line)) {
  2060. $def .= "\n" . $line;
  2061. } else {
  2062. $p--;
  2063. break;
  2064. }
  2065. }
  2066. $content = $state->inlineMarkdownToNodes($def);
  2067. $this->defineFootnote($state, $symbol, $content);
  2068. $state->p = $p;
  2069. return new MDNode(); // empty
  2070. }
  2071. public function readToken(MDState $state, string $line): ?MDToken {
  2072. $groups;
  2073. if (mb_eregi(self::$footnoteWithTitleRegex, $line, $groups)) {
  2074. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
  2075. }
  2076. if (mb_eregi(self::$footnoteRegex, $line, $groups)) {
  2077. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
  2078. }
  2079. return null;
  2080. }
  2081. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2082. if ($match = self::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
  2083. $symbol = $match->tokens[0]->content;
  2084. array_splice($tokens, $match->index, 1, new MDFootnoteNode($symbol));
  2085. return true;
  2086. }
  2087. return false;
  2088. }
  2089. /**
  2090. * @param {MDState} state
  2091. * @param {MDBlockNode[]} blocks
  2092. */
  2093. public function postProcess(MDState $state, array &$blocks) {
  2094. $nextOccurrenceId = 1;
  2095. foreach ($blocks as $block) {
  2096. $block->visitChildren(function($node) use (&$nextOccurrenceId) {
  2097. if (!($node instanceof MDFootnoteNode)) return;
  2098. $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
  2099. $node->occurrenceId = $nextOccurrenceId++;
  2100. $node->displaySymbol = strval($node->footnoteId);
  2101. $this->$registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
  2102. });
  2103. }
  2104. if (sizeof($state->userInfo['footnotes']) == 0) return;
  2105. array_push($blocks, new MDFootnoteListNode());
  2106. }
  2107. public function compareBlockOrdering(MDReader $other): int {
  2108. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2109. return -1;
  2110. }
  2111. return 0;
  2112. }
  2113. public function compareTokenizeOrdering(MDReader $other): int {
  2114. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2115. return -1;
  2116. }
  2117. return 0;
  2118. }
  2119. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2120. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2121. return -1;
  2122. }
  2123. return 0;
  2124. }
  2125. }
  2126. /**
  2127. * Block reader for abbreviation definitions. Anywhere the abbreviation appears
  2128. * in plain text will have its definition available when hovering over it.
  2129. * Definitions can appear anywhere in the document. Their content should only
  2130. * contain simple text, not markdown.
  2131. */
  2132. class MDAbbreviationReader extends MDReader {
  2133. private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
  2134. $state->root()->abbreviations[$abbreviation] = $definition;
  2135. $regex = "\\b(" . preg_quote($abbreviation) . ")\\b";
  2136. $state->root()->abbreviationRegexes[$abbreviation] = $regex;
  2137. }
  2138. public function preProcess(MDState $state) {
  2139. $state->root()->userInfo['abbreviations'] = [];
  2140. $state->root()->userInfo['abbreviationRegexes'] = [];
  2141. }
  2142. public function readBlock(MDState $state): ?MDBlockNode {
  2143. $p = $state->p;
  2144. $line = $state->lines[$p++];
  2145. if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
  2146. $abbrev = $groups[1];
  2147. $def = $groups[2];
  2148. $this->defineAbbreviation($state, $abbrev, $def);
  2149. $state->p = $p;
  2150. return new MDNode(); // empty
  2151. }
  2152. /**
  2153. * @param MDState $state
  2154. * @param MDNode[] $blocks
  2155. */
  2156. public function postProcess(MDState $state, array &$blocks) {
  2157. $abbreviations = $state->root()->userInfo['abbreviations'];
  2158. $regexes = $state->root()->userInfo['abbreviationRegexes'];
  2159. MDNode::replaceNodes($state, $blocks, function($original) use ($abbreviations, $regexes) {
  2160. if (!($original instanceof MDTextNode)) return null;
  2161. $changed = false;
  2162. $elems = [ $original->text ]; // mix of strings and MDNodes
  2163. for ($i = 0; $i < sizeof($elems); $i++) {
  2164. $text = $elems[$i];
  2165. if (!is_string($text)) continue;
  2166. foreach ($abbreviations as $abbreviation) {
  2167. $index = strpos($text, $abbreviation);
  2168. if ($index === false) break;
  2169. $prefix = substr($text, 0, $index);
  2170. $suffix = substr($text, $index + strlen($abbreviation));
  2171. $definition = $abbreviations[$abbreviation];
  2172. array_splice($elems, $i, 1, [ $prefix, new MDAbbreviationNode($abbreviation, $definition), $suffix ]);
  2173. $i = -1; // start over
  2174. $changed = true;
  2175. break;
  2176. }
  2177. }
  2178. if (!$changed) return null;
  2179. $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem);
  2180. return new MDNode($nodes);
  2181. });
  2182. }
  2183. }
  2184. /**
  2185. * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
  2186. * whitespace-only) line. This reader is prioritized after every other reader
  2187. * since there is no distinguishing syntax.
  2188. */
  2189. class MDParagraphReader extends MDReader {
  2190. public function readBlock(MDState $state): ?MDBlockNode {
  2191. $paragraphLines = [];
  2192. $p = $state->p;
  2193. while ($state->hasLines(1, $p)) {
  2194. $line = $state->lines[$p++];
  2195. if (trim($line) === '') {
  2196. break;
  2197. }
  2198. array_push($paragraphLines, $line);
  2199. }
  2200. if ($state->p == 0 && $p >= sizeof($state->lines)) {
  2201. // If it's the entire document don't wrap it in a paragraph
  2202. return null;
  2203. }
  2204. if (sizeof($paragraphLines) > 0) {
  2205. $state->p = $p;
  2206. $content = implode("\n", $paragraphLines);
  2207. return new MDParagraphNode($state->inlineMarkdownToNodes($content));
  2208. }
  2209. return null;
  2210. }
  2211. public function compareBlockOrdering(MDReader $other): int {
  2212. return 1; // always dead last
  2213. }
  2214. }
  2215. /**
  2216. * Abstract base class for readers that look for one or two delimiting tokens
  2217. * on either side of some content. E.g. `**strong**`.
  2218. */
  2219. class MDSimplePairInlineReader extends MDReader {
  2220. // Passes:
  2221. // 1. Syntaxes with two delimiting tokens, interior tokens of the same
  2222. // kind must be even in number
  2223. // 2. Syntaxes with one delimiting token, interior tokens of the same
  2224. // kind must be even in number
  2225. // 3. Syntaxes with two delimiting tokens, any tokens inside
  2226. // 4. Syntaxes with one delimiting token, any tokens inside
  2227. public function substitutionPassCount(): int { return 4; }
  2228. /**
  2229. * Attempts a substitution of a matched pair of delimiting token types.
  2230. * If successful, the substitution is performed on `tokens` and `true` is
  2231. * returned, otherwise `false` is returned and the array is untouched.
  2232. *
  2233. * If `this.substitutionPassCount` is greater than 1, the first pass
  2234. * will reject matches with the delimiting character inside the content
  2235. * tokens. If the reader uses a single pass or a subsequent pass is performed
  2236. * with multiple pass any contents will be accepted.
  2237. *
  2238. * @param MDState $state
  2239. * @param int $pass pass number, starting with `1`
  2240. * @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on
  2241. * @param string $nodeClass class of the node to return if matched
  2242. * @param MDTokenType $delimiter delimiting token
  2243. * @param int $count how many times the token is repeated to form the delimiter
  2244. * @param bool $plaintext whether to invoke `nodeClass` with a verbatim
  2245. * content string instead of parsed `MDNode`s
  2246. * @return bool `true` if substitution was performed, `false` if not
  2247. */
  2248. public function attemptPair(MDState $state, int $pass, array &$tokens, string $nodeClass, MDTokenType $delimiter, int $count=1, bool $plaintext=false): bool {
  2249. // We do four passes. #1: doubles without inner tokens, #2: singles
  2250. // without inner tokens, #3: doubles with paired inner tokens,
  2251. // #4: singles with paired inner tokens
  2252. if ($count == 1 && $pass != 2 && $pass != 4) return false;
  2253. if ($count > 1 && $pass != 1 && $pass != 3) return false;
  2254. $delimiters = [];
  2255. array_fill(0, $count, $delimiter);
  2256. $isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1;
  2257. $match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters, function($content) {
  2258. $firstType = $content[0] instanceof MDToken ? $content[0]->type : null;
  2259. $lastType = $content[sizeof($content) - 1] instanceof MDToken ? $content[sizeof($content) - 1]->type : null;
  2260. if ($firstType == MDTokenType::Whitespace) return false;
  2261. if ($lastType == MDTokenType::Whitespace) return false;
  2262. foreach ($content as $token) {
  2263. // Don't allow nesting
  2264. if (get_class($token) == $nodeClass) return false;
  2265. }
  2266. if ($isFirstOfMultiplePasses) {
  2267. $innerCount = 0;
  2268. foreach ($content as $token) {
  2269. if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++;
  2270. }
  2271. if (($innerCount % 2) != 0) return false;
  2272. }
  2273. return true;
  2274. });
  2275. if ($match === null) return false;
  2276. $content = ($plaintext)
  2277. ? implode('', array_map(fn($token) => $token->original, $match->contentTokens))
  2278. : $state->tokensToNodes($match->contentTokens);
  2279. $ref = new ReflectionClass($nodeClass);
  2280. $node = $ref->newInstanceArgs([ $content ]);
  2281. array_splice($tokens, $match->startIndex, $match->totalLength, [ $node ]);
  2282. return true;
  2283. }
  2284. }
  2285. /**
  2286. * Reader for emphasis syntax. Denoted with a single underscore on either side of
  2287. * some text (preferred) or a single asterisk on either side.
  2288. */
  2289. class MDEmphasisReader extends MDSimplePairInlineReader {
  2290. public function readToken(MDState $state, string $line): ?MDToken {
  2291. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2292. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2293. return null;
  2294. }
  2295. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2296. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true;
  2297. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true;
  2298. return false;
  2299. }
  2300. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2301. if ($other instanceof MDStrongReader) {
  2302. return 1;
  2303. }
  2304. return 0;
  2305. }
  2306. }
  2307. /**
  2308. * Reader for strong syntax. Denoted with two asterisks on either side of some
  2309. * text (preferred) or two underscores on either side. Note that if
  2310. * `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
  2311. */
  2312. class MDStrongReader extends MDSimplePairInlineReader {
  2313. public function readToken(MDState $state, string $line): ?MDToken {
  2314. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2315. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2316. return null;
  2317. }
  2318. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2319. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true;
  2320. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true;
  2321. return false;
  2322. }
  2323. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2324. if ($other instanceof MDEmphasisReader) {
  2325. return -1;
  2326. }
  2327. return 0;
  2328. }
  2329. }
  2330. /**
  2331. * Reader for strikethrough syntax. Consists of two tildes on either side of
  2332. * some text (preferred) or single tildes on either side. Note that if
  2333. * `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
  2334. *
  2335. * The number of recognized tildes can be configured.
  2336. */
  2337. class MDStrikethroughReader extends MDSimplePairInlineReader {
  2338. /** @type {boolean} */
  2339. public bool $singleTildeEnabled = true;
  2340. /** @type {boolean} */
  2341. public bool $doubleTildeEnabled = true;
  2342. public function readToken(MDState $state, string $line): ?MDToken {
  2343. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2344. return null;
  2345. }
  2346. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2347. if ($this->singleTildeEnabled) {
  2348. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true;
  2349. }
  2350. if ($this->doubleTildeEnabled) {
  2351. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true;
  2352. }
  2353. return false;
  2354. }
  2355. }
  2356. /**
  2357. * Reader for underline syntax. Consists of two underscores on either side of
  2358. * some text. If used with `MDStrongReader` which also looks for double
  2359. * underscores, this reader will take priority.
  2360. */
  2361. class MDUnderlineReader extends MDSimplePairInlineReader {
  2362. public function readToken(MDState $state, string $line): ?MDToken {
  2363. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2364. return null;
  2365. }
  2366. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2367. return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2);
  2368. }
  2369. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2370. if ($other instanceof MDStrongReader) {
  2371. return -1;
  2372. }
  2373. return 0;
  2374. }
  2375. }
  2376. /**
  2377. * Reader for highlight syntax. Consists of pairs of equal signs on either side
  2378. * of some text.
  2379. */
  2380. class MDHighlightReader extends MDSimplePairInlineReader {
  2381. public function readToken(MDState $state, string $line): ?MDToken {
  2382. if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal);
  2383. return null;
  2384. }
  2385. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2386. return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2);
  2387. }
  2388. }
  2389. /**
  2390. * Reader for inline code syntax. Consists of one or two delimiting backticks
  2391. * around text. The contents between the backticks will be rendered verbatim,
  2392. * ignoring any inner markdown syntax. To include a backtick inside, escape it
  2393. * with a backslash.
  2394. */
  2395. class MDCodeSpanReader extends MDSimplePairInlineReader {
  2396. public function readToken(MDState $state, string $line): ?MDToken {
  2397. if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick);
  2398. return null;
  2399. }
  2400. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2401. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true;
  2402. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true;
  2403. }
  2404. }
  2405. /**
  2406. * Reader for subscript syntax. Consists of single tildes on either side of
  2407. * some text. If used with `MDStrikethroughReader`, this reader will take
  2408. * precedence, and strikethrough can only be done with double tildes.
  2409. */
  2410. class MDSubscriptReader extends MDSimplePairInlineReader {
  2411. public function readToken(MDState $state, string $line): ?MDToken {
  2412. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2413. return null;
  2414. }
  2415. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2416. return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde);
  2417. }
  2418. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2419. if ($other instanceof MDStrikethroughReader) {
  2420. return -1;
  2421. }
  2422. return 0;
  2423. }
  2424. }
  2425. /**
  2426. * Reader for superscript syntax. Consists of single caret characters on either
  2427. * side of some text.
  2428. */
  2429. class MDSuperscriptReader extends MDSimplePairInlineReader {
  2430. public function readToken(MDState $state, string $line): ?MDToken {
  2431. if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret);
  2432. return null;
  2433. }
  2434. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2435. return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret);
  2436. }
  2437. }
  2438. /**
  2439. * Reads a hypertext link. Consists of link text between square brackets
  2440. * followed immediately by a URL in parentheses.
  2441. */
  2442. class MDLinkReader extends MDReader {
  2443. public function readToken(MDState $state, string $line): ?MDToken {
  2444. $simpleEmailRegex = "^<(" . MDUtils::$baseEmailRegex . ")>";
  2445. $simpleURLRegex = "^<(" . MDUtils::$baseURLRegex . ")>";
  2446. if ($groups = MDToken::tokenizeLabel($line)) {
  2447. return new MDToken($groups[0], MDTokenType::Label, $groups[1]);
  2448. }
  2449. if ($groups = MDToken::tokenizeEmail($line)) {
  2450. return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]);
  2451. }
  2452. if ($groups = MDToken::tokenizeURL($line)) {
  2453. return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]);
  2454. }
  2455. if (mb_eregi($simpleEmailRegex, $line, $groups)) {
  2456. return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]);
  2457. }
  2458. if (mb_eregi($simpleURLRegex, $line, $groups)) {
  2459. return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]);
  2460. }
  2461. return null;
  2462. }
  2463. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2464. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2465. $text = $match->tokens[0]->content;
  2466. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2467. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2468. array_splice($tokens, $match->index, sizeof($match->tokens), new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title));
  2469. return true;
  2470. }
  2471. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) {
  2472. $text = $match->tokens[0]->content;
  2473. $email = $match->tokens[sizeof($match->tokens) - 1]->content;
  2474. $url = "mailto:{$email}";
  2475. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2476. array_splice($tokens, $match->index, sizeof($match->tokens), new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title));
  2477. return true;
  2478. }
  2479. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) {
  2480. $token = $match->tokens[0];
  2481. $link = "mailto:{$token->content}";
  2482. $node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content));
  2483. array_splice($tokens, $match->index, 1, $node);
  2484. return true;
  2485. }
  2486. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) {
  2487. $token = $match->tokens[0];
  2488. $link = $token->content;
  2489. $node = new MDLinkNode($link, new MDTextNode($link));
  2490. array_splice($tokens, $match->index, 1, $node);
  2491. return true;
  2492. }
  2493. return false;
  2494. }
  2495. }
  2496. /**
  2497. * Reader for referential URL definitions. Consists of link text between square
  2498. * brackets followed immediately by a reference symbol also in square brackets.
  2499. * The URL can be defined elsewhere on a line by itself with the symbol in square
  2500. * brackets, colon, and the URL (and optional title in quotes).
  2501. */
  2502. class MDReferencedLinkReader extends MDLinkReader {
  2503. public function readBlock(MDState $state): ?MDBlockNode {
  2504. $p = $state->p;
  2505. $line = $state->lines[$p++];
  2506. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) {
  2507. $symbol = $groups[1];
  2508. $url = $groups[2];
  2509. $title = $groups[3];
  2510. } else {
  2511. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) {
  2512. $symbol = $groups[1];
  2513. $url = $groups[2];
  2514. } else {
  2515. return null;
  2516. }
  2517. }
  2518. $state->defineURL($symbol, $url, $title);
  2519. $state->p = $p;
  2520. return new MDNode([]); // empty
  2521. }
  2522. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2523. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2524. $text = $match->tokens[0]->content;
  2525. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2526. array_splice($tokens, $match->index, sizeof($match->tokens), new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text)));
  2527. return true;
  2528. }
  2529. return false;
  2530. }
  2531. }
  2532. /**
  2533. * Reader for images. Consists of an exclamation, alt text in square brackets,
  2534. * and image URL in parentheses.
  2535. */
  2536. class MDImageReader extends MDLinkReader {
  2537. public function readToken(MDState $state, string $line): ?MDToken {
  2538. $s = parent::readToken($state, $line);
  2539. if ($s) return $s;
  2540. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2541. return null;
  2542. }
  2543. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2544. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2545. $alt = $match->tokens[1]->content;
  2546. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2547. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2548. $node = new MDImageNode($url, $alt);
  2549. if ($title !== null) {
  2550. $node->attributes['title'] = $title;
  2551. }
  2552. array_splice($tokens, $match->index, sizeof($match->tokens), $node);
  2553. return true;
  2554. }
  2555. return false;
  2556. }
  2557. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2558. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2559. return -1;
  2560. }
  2561. return 0;
  2562. }
  2563. }
  2564. /**
  2565. * Reader for images with referential URL definitions. Consists of an
  2566. * exclamation, alt text in square brackets, and link symbol in square brackets.
  2567. * URL is defined the same as for `MDReferencedLinkReader`.
  2568. */
  2569. class MDReferencedImageReader extends MDReferencedLinkReader {
  2570. public function readToken(MDState $state, string $line): ?MDToken {
  2571. $s = parent::readToken($state, $line);
  2572. if ($s) return $s;
  2573. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2574. return null;
  2575. }
  2576. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2577. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2578. $alt = $match->tokens[1]->content;
  2579. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2580. array_splice($tokens, $match->index, sizeof($match->tokens), new MDReferencedImageNode($ref, $alt));
  2581. return true;
  2582. }
  2583. return false;
  2584. }
  2585. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2586. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2587. return -1;
  2588. }
  2589. return 0;
  2590. }
  2591. }
  2592. /**
  2593. * Converts line breaks within blocks into line breaks in the HTML. Not
  2594. * included in any of the default reader sets since most flavors ignore
  2595. * line breaks within blocks.
  2596. */
  2597. class MDLineBreakReader extends MDReader {
  2598. public function postProcess(MDState $state, array &$blocks) {
  2599. MDNode::replaceNodes($state, $blocks, function(MDNode $original) {
  2600. if (!($original instanceof MDTextNode)) return null;
  2601. $lines = explode("\n", $original->text);
  2602. if (sizeof($lines) == 1) return null;
  2603. $nodes = [];
  2604. foreach ($lines as $i => $line) {
  2605. if ($i > 0) {
  2606. array_push($nodes, new MDLineBreakNode());
  2607. }
  2608. array_push($nodes, new MDTextNode($line));
  2609. }
  2610. return new MDNode($nodes);
  2611. });
  2612. }
  2613. }
  2614. /**
  2615. * Reads a verbatim HTML tag, and if it passes validation by `MDState.tagFilter`,
  2616. * will be rendered in the final HTML document. Disallowed tags will be rendered
  2617. * as plain text in the resulting document.
  2618. */
  2619. class MDHTMLTagReader extends MDReader {
  2620. public function readToken(MDState $state, string $line): ?MDToken {
  2621. $tag = MDHTMLTag::fromLineStart($line, $state);
  2622. if ($tag === null) return null;
  2623. if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null;
  2624. $state->root()->tagFilter->scrubTag($tag);
  2625. return new MDToken($tag->original, MDTokenType::HTMLTag, $tag);
  2626. }
  2627. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2628. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) {
  2629. $tag = $match->tokens[0]->tag;
  2630. array_splice($tokens, $match->index, sizeof($match->tokens), new MDHTMLTagNode($tag));
  2631. return true;
  2632. }
  2633. return false;
  2634. }
  2635. }
  2636. /**
  2637. * Reads tag modifiers. Consists of curly braces with one or more CSS classes,
  2638. * IDs, or custom attributes separated by spaces to apply to the preceding
  2639. * node. Validation is performed on modifiers and only acceptable values are
  2640. * applied.
  2641. */
  2642. class MDModifierReader extends MDReader {
  2643. public function readToken(MDState $state, string $line): ?MDToken {
  2644. $modifier = MDTagModifier::fromStart($line);
  2645. if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier);
  2646. return null;
  2647. }
  2648. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2649. // Modifiers are applied elsewhere, and if they're not it's fine if they're
  2650. // rendered as the original syntax.
  2651. return false;
  2652. }
  2653. }
  2654. // -- Nodes -----------------------------------------------------------------
  2655. /**
  2656. * Base class for nodes in the assembled document tree.
  2657. */
  2658. class MDNode {
  2659. /**
  2660. * Array of CSS classes to add to the node when rendered as HTML.
  2661. * @var string[]
  2662. */
  2663. public array $cssClasses = [];
  2664. public ?string $cssId = null;
  2665. /**
  2666. * Mapping of CSS attributes to values.
  2667. * @var array
  2668. */
  2669. public array $cssStyles = [];
  2670. /**
  2671. * Mapping of arbitrary attributes and values to add to this node's top-level
  2672. * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
  2673. * `cssClasses`, `cssId`, and `cssStyles` instead.
  2674. * @var array
  2675. */
  2676. public array $attributes = [];
  2677. /**
  2678. * All child nodes in this node.
  2679. * @var MDNode[]
  2680. */
  2681. public array $children = [];
  2682. /**
  2683. * @param MDNode[]|MDNode $children
  2684. */
  2685. public function __construct(array|MDNode $children=[]) {
  2686. if (is_array($children)) {
  2687. foreach ($children as $elem) {
  2688. if (!($elem instanceof MDNode)) {
  2689. $thisClassName = get_class($this);
  2690. $elemClassName = get_class($elem);
  2691. throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got array with {$elemClassName} element");
  2692. }
  2693. }
  2694. $this->children = $children;
  2695. } elseif ($children instanceof MDNode) {
  2696. $this->children = [ $children ];
  2697. } else {
  2698. $thisClassName = get_class($this);
  2699. $elemClassName = gettype($children) == 'object' ? get_class($children) : gettype($children);
  2700. throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got {$elemClassName}");
  2701. }
  2702. }
  2703. /**
  2704. * Adds a CSS class. If already present it will not be duplicated.
  2705. */
  2706. public function addClass(string $cssClass): bool {
  2707. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  2708. array_push($this->cssClasses, $cssClass);
  2709. return true;
  2710. }
  2711. /**
  2712. * Removes a CSS class.
  2713. *
  2714. * @param {string} cssClass
  2715. * @returns {boolean} whether the class was present and removed
  2716. */
  2717. public function removeClass(string $cssClass): bool {
  2718. $beforeLength = sizeof($this->cssClasses);
  2719. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  2720. return sizeof($this->cssClasses) != $beforeLength;
  2721. }
  2722. /**
  2723. * Renders this node and any children as an HTML string. If the node has no
  2724. * content an empty string should be returned.
  2725. */
  2726. public function toHTML(MDState $state): string {
  2727. return MDNode::arrayToHTML($this->children, $state);
  2728. }
  2729. /**
  2730. * Renders this node and any children as a plain text string. The conversion
  2731. * should only render ordinary text, not attempt markdown-like formatting
  2732. * (e.g. list items should not be prefixed with asterisks, only have their
  2733. * content text returned). If the node has no renderable content an empty
  2734. * string should be returned.
  2735. */
  2736. public function toPlaintext(MDState $state): string {
  2737. return MDNode::arrayToPlaintext($this->children, $state);
  2738. }
  2739. /**
  2740. * Protected helper method that renders an HTML fragment of the attributes
  2741. * to apply to the root HTML tag representation of this node.
  2742. *
  2743. * Example result with a couple `cssClasses`, a `cssId`, and a custom
  2744. * `attributes` key-value pair:
  2745. *
  2746. * ```
  2747. * class="foo bar" id="baz" lang="en"
  2748. * ```
  2749. *
  2750. * The value includes a leading space if it's non-empty so that it can be
  2751. * concatenated directly after the tag name and before the closing `>`.
  2752. */
  2753. protected function htmlAttributes(): string {
  2754. $html = '';
  2755. if (sizeof($this->cssClasses) > 0) {
  2756. $classlist = implode(' ', $this->cssClasses);
  2757. $html .= " class=\"{$classList}\"";
  2758. }
  2759. if ($this->cssId !== null && mb_strlen($this->cssId) > 0) {
  2760. $html .= " id=\"{$this->cssId}\"";
  2761. }
  2762. $styles = [];
  2763. foreach ($this->cssStyles as $key => $value) {
  2764. array_push($styles, "{$key}: {$value};");
  2765. }
  2766. if (sizeof($styles) > 0) {
  2767. $escaped = htmlspecialchars(implode(' ', $styles));
  2768. $html .= " style=\"{$escaped}\"";
  2769. }
  2770. foreach ($this->attributes as $key => $value) {
  2771. if ($key === 'class' || $key === 'id' || $key === 'style') continue;
  2772. $cleanKey = MDUtils::scrubAttributeName($key);
  2773. if (mb_strlen($cleanKey) == 0) continue;
  2774. $cleanValue = htmlspecialchars($value);
  2775. $html .= " {$cleanKey}=\"{$cleanValue}\"";
  2776. }
  2777. return $html;
  2778. }
  2779. /**
  2780. * Protected helper that renders and concatenates the HTML of all children
  2781. * of this node. Mostly for use by subclasses in their `toHTML`
  2782. * implementations.
  2783. */
  2784. protected function childHTML(MDState $state): string {
  2785. return MDNode::arrayToHTML($this->children, $state);
  2786. }
  2787. /**
  2788. * Protected helper that renders and concatenates the plaintext of all
  2789. * children of this node.
  2790. */
  2791. protected function childPlaintext(MDState $state): string {
  2792. return MDNode::arrayToPlaintext($this->children, $state);
  2793. }
  2794. /**
  2795. * Protected helper for rendering nodes represented by simple paired HTML
  2796. * tags. Custom CSS classes and attributes will be included in the result,
  2797. * and child content will be rendered between the tags.
  2798. */
  2799. protected function simplePairedTagHTML(MDState $state, string $tagName): string {
  2800. $openTagSuffix = $this->children[0] instanceof MDBlockNode ? "\n" : "";
  2801. $closeTagPrefix = $this->children[sizeof($this->children) - 1] instanceof MDBlockNode ? "\n" : '';
  2802. $closeTagSuffix = $this instanceof MDBlockNode ? "\n" : '';
  2803. $attr = $this->htmlAttributes();
  2804. $childHTML = $this->childHTML($state);
  2805. return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}</{$tagName}>{$closeTagSuffix}";
  2806. }
  2807. /**
  2808. * Calls the given callback function with every child node, recursively.
  2809. * Nodes are visited depth-first.
  2810. */
  2811. public function visitChildren(callable $fn) {
  2812. foreach ($this->children as $child) {
  2813. $fn($child);
  2814. $child->visitChildren($fn);
  2815. }
  2816. }
  2817. /**
  2818. * Helper for rendering and concatenating HTML from an array of `MDNode`s.
  2819. *
  2820. * @param MDNode[] $nodes
  2821. * @param MDState $state
  2822. * @return string HTML string
  2823. */
  2824. public static function arrayToHTML(array $nodes, MDState $state): string {
  2825. return implode('', array_map(function($node) use ($state) {
  2826. return $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : '');
  2827. }, $nodes));
  2828. }
  2829. /**
  2830. * Helper for rendering and concatenating plaintext from an array of `MDNode`s.
  2831. *
  2832. * @param MDNode[] $nodes
  2833. * @param MDState $state
  2834. * @return string plaintext
  2835. */
  2836. public static function arrayToPlaintext(array $nodes, MDState $state): string {
  2837. return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes));
  2838. }
  2839. /**
  2840. * Recursively searches and replaces nodes in a tree. The given `replacer`
  2841. * is passed every node in the tree. If `replacer` returns a new `MDNode`
  2842. * the original will be replaced with it. If the function returns `null` no
  2843. * change will be made to that node. Traversal is depth-first.
  2844. *
  2845. * @param {MDState} state
  2846. * @param {MDNode[]} nodes
  2847. * @param {function} replacer - takes a node as an argument, returns either
  2848. * a new node or `null` to leave it unchanged
  2849. */
  2850. public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) {
  2851. for ($i = 0; $i < sizeof($nodes); $i++) {
  2852. $originalNode = $nodes[$i];
  2853. $replacement = $replacer($originalNode);
  2854. if ($replacement instanceof MDNode) {
  2855. array_splice($nodes, $i, 1, [$replacement]);
  2856. } else {
  2857. self::replaceNodes($state, $originalNode->children, $replacer);
  2858. }
  2859. }
  2860. }
  2861. }
  2862. /**
  2863. * Marker subclass that indicates a node represents block syntax.
  2864. */
  2865. class MDBlockNode extends MDNode {}
  2866. /**
  2867. * Paragraph block.
  2868. */
  2869. class MDParagraphNode extends MDBlockNode {
  2870. public function toHTML(MDState $state): string {
  2871. return $this->simplePairedTagHTML($state, 'p');
  2872. }
  2873. }
  2874. /**
  2875. * A heading block with a level from 1 to 6.
  2876. */
  2877. class MDHeadingNode extends MDBlockNode {
  2878. public int $level;
  2879. public function __construct(int $level, array $children) {
  2880. parent::__construct($children);
  2881. if (!is_int($level) || ($level < 1 || $level > 6)) {
  2882. $thisClassName = get_class($this);
  2883. throw new Error("{$thisClassName} requires heading level 1 to 6");
  2884. }
  2885. $this->level = $level;
  2886. }
  2887. public function toHTML(MDState $state): string {
  2888. return $this->simplePairedTagHTML($state, "h{$this->level}");
  2889. }
  2890. }
  2891. /**
  2892. * A sub-text block with smaller, less prominent text.
  2893. */
  2894. class MDSubtextNode extends MDBlockNode {
  2895. public function toHTML(MDState $state): string {
  2896. $this->addClass('subtext');
  2897. return $this->simplePairedTagHTML($state, 'div');
  2898. }
  2899. }
  2900. /**
  2901. * Node for a horizontal dividing line.
  2902. */
  2903. class MDHorizontalRuleNode extends MDBlockNode {
  2904. public function toHTML(MDState $state): string {
  2905. return "<hr" . $this->htmlAttributes() . ">";
  2906. }
  2907. }
  2908. /**
  2909. * A block quote, usually rendered indented from other text.
  2910. */
  2911. class MDBlockquoteNode extends MDBlockNode {
  2912. public function toHTML(MDState $state): string {
  2913. return $this->simplePairedTagHTML($state, 'blockquote');
  2914. }
  2915. }
  2916. /**
  2917. * A bulleted list. Contains `MDListItemNode` children.
  2918. */
  2919. class MDUnorderedListNode extends MDBlockNode {
  2920. /** @var MDListItemNode[] $children */
  2921. public function toHTML(MDState $state): string {
  2922. return $this->simplePairedTagHTML($state, 'ul');
  2923. }
  2924. }
  2925. /**
  2926. * A numbered list. Contains `MDListItemNode` children.
  2927. */
  2928. class MDOrderedListNode extends MDBlockNode {
  2929. /** @var MDListItemNode[] $children */
  2930. public int $startOrdinal;
  2931. /**
  2932. * @param MDListItemNode[] $children
  2933. * @param ?int $startOrdinal
  2934. */
  2935. public function __construct(array $children, ?int $startOrdinal=null) {
  2936. parent::__construct($children);
  2937. $this->startOrdinal = $startOrdinal;
  2938. }
  2939. public function toHTML(MDState $state): string {
  2940. if ($this->startOrdinal !== null && $this->startOrdinal != 1) {
  2941. $this->attributes['start'] = strval($this->startOrdinal);
  2942. }
  2943. return $this->simplePairedTagHTML($state, 'ol');
  2944. }
  2945. }
  2946. /**
  2947. * An item in a bulleted or numbered list.
  2948. */
  2949. class MDListItemNode extends MDBlockNode {
  2950. public int $ordinal;
  2951. /**
  2952. * @param MDNode|MDNode[] $children
  2953. * @param ?int $ordinal
  2954. */
  2955. public function __construct(array $children, ?int $ordinal=null) {
  2956. parent::__construct($children);
  2957. $this->ordinal = $ordinal;
  2958. }
  2959. public function toHTML(MDState $state): string {
  2960. return $this->simplePairedTagHTML($state, 'li');
  2961. }
  2962. }
  2963. /**
  2964. * A block of preformatted computer code. Inner markdown is ignored.
  2965. */
  2966. class MDCodeBlockNode extends MDBlockNode {
  2967. public string $text;
  2968. /**
  2969. * The programming language of the content.
  2970. */
  2971. public ?string $language;
  2972. public function __construct(string $text, ?string $language=null) {
  2973. super([]);
  2974. $this->text = $text;
  2975. $this->language = $language;
  2976. }
  2977. public function toHTML(MDState $state): string {
  2978. $languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : '';
  2979. return "<pre" . $this->htmlAttributes() . "><code{$languageModifier}>" .
  2980. htmlentities($this->text) . "</code></pre>\n";
  2981. }
  2982. }
  2983. /**
  2984. * A table node with a single header row and any number of body rows.
  2985. *
  2986. * If modifying the rows, use the `headerRow` and `bodyRows` accessors,
  2987. * otherwise `children` may get out of sync.
  2988. */
  2989. class MDTableNode extends MDBlockNode {
  2990. /** @var MDTableRowNode[] $children */
  2991. public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; }
  2992. public function bodyRows(): array { return array_slice($this->children, 1); }
  2993. /**
  2994. * How to align each column. Columns beyond the length of the array or with
  2995. * corresponding `null` elements will have no alignment set. Values should
  2996. * be valid CSS `text-align` values.
  2997. *
  2998. * @var string[]
  2999. */
  3000. public array $columnAlignments = [];
  3001. /**
  3002. * @param MDTableRowNode $headerRow
  3003. * @param MDTableRowNode[] $bodyRows
  3004. */
  3005. public function __construct(MDTableRow $headerRow, array $bodyRows) {
  3006. parent::__construct(array_merge([ $headerRow ], $bodyRows));
  3007. }
  3008. public function applyAlignments() {
  3009. foreach ($this->children as $child) {
  3010. $this->applyAlignmentsToRow($child);
  3011. }
  3012. }
  3013. private function applyAlignmentsToRow(MDTableRowNode $row) {
  3014. foreach ($row->children as $columnIndex => $cell) {
  3015. $alignment = $this->columnAlignments[$columnIndex] ?? null;
  3016. $this->applyAlignmentToCell($cell, $alignment);
  3017. }
  3018. }
  3019. public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) {
  3020. if ($alignment) {
  3021. $cell->cssStyles['text-align'] = $alignment;
  3022. } else {
  3023. unset($cell->cssStyles['text-align']);
  3024. }
  3025. }
  3026. public function toHTML(MDState $state): string {
  3027. $this->applyAlignments();
  3028. $html = '';
  3029. $html .= "<table" . $this->htmlAttributes() . ">\n";
  3030. $html .= '<thead>\n';
  3031. $html .= $this->headerRow->toHTML($state) . "\n";
  3032. $html .= "</thead>\n";
  3033. $html .= "<tbody>\n";
  3034. $html .= MDNode::toHTML($this->bodyRows, $state) . "\n";
  3035. $html .= "</tbody>\n";
  3036. $html .= "</table>\n";
  3037. return $html;
  3038. }
  3039. }
  3040. /**
  3041. * Node for one row (header or body) in a table.
  3042. */
  3043. class MDTableRowNode extends MDBlockNode {
  3044. /** @var MDTableCellNode[] $children */
  3045. public function toHTML(MDState $state): string {
  3046. return $this->simplePairedTagHTML($state, 'tr');
  3047. }
  3048. }
  3049. /**
  3050. * Node for one cell in a table row.
  3051. */
  3052. class MDTableCellNode extends MDBlockNode {
  3053. public function toHTML(MDState $state): string {
  3054. return $this->simplePairedTagHTML($state, 'td');
  3055. }
  3056. }
  3057. /**
  3058. * Node for a header cell in a header table row.
  3059. */
  3060. class MDTableHeaderCellNode extends MDBlockNode {
  3061. public function toHTML(MDState $state): string {
  3062. return $this->simplePairedTagHTML($state, 'th');
  3063. }
  3064. }
  3065. /**
  3066. * Definition list with `MDDefinitionListTermNode` and
  3067. * `MDDefinitionListDefinitionNode` children.
  3068. */
  3069. class MDDefinitionListNode extends MDBlockNode {
  3070. public function toHTML(MDState $state): string {
  3071. return $this->simplePairedTagHTML($state, 'dl');
  3072. }
  3073. }
  3074. /**
  3075. * A word or term in a definition list.
  3076. */
  3077. class MDDefinitionListTermNode extends MDBlockNode {
  3078. public function toHTML(MDState $state): string {
  3079. return $this->simplePairedTagHTML($state, 'dt');
  3080. }
  3081. }
  3082. /**
  3083. * The definition of a word or term in a definition list. Should follow a
  3084. * definition term, or another definition to serve as an alternate.
  3085. */
  3086. class MDDefinitionListDefinitionNode extends MDBlockNode {
  3087. public function toHTML(MDState $state): string {
  3088. return $this->simplePairedTagHTML($state, 'dd');
  3089. }
  3090. }
  3091. /**
  3092. * Block at the bottom of a document listing all the footnotes with their
  3093. * content.
  3094. */
  3095. class MDFootnoteListNode extends MDBlockNode {
  3096. private function footnoteId(MDState $state, string $symbol): int {
  3097. $lookup = $state->root()->userInfo['footnoteIds'];
  3098. if (!$lookup) return null;
  3099. return $lookup[$symbol] ?? null;
  3100. }
  3101. public function toHTML(MDState $state): string {
  3102. $footnotes = $state->userInfo['footnotes'];
  3103. $symbolOrder = array_keys($footnotes);
  3104. if (sizeof($footnotes) == 0) return '';
  3105. $footnoteUniques = $state->root()->footnoteInstances;
  3106. $html = '';
  3107. $html .= '<div class="footnotes">';
  3108. $html .= '<ol>';
  3109. foreach ($symbolOrder as $symbol) {
  3110. $content = $footnotes[$symbol];
  3111. if (!$content) continue;
  3112. $footnoteId = $this->footnoteId($state, $symbol);
  3113. $contentHTML = MDNode::toHTML($content, $state);
  3114. $html .= "<li value=\"{$footnoteId}\" id=\"{$state->root()->elementIdPrefix}footnote_{$footnoteId}\">{$contentHTML}";
  3115. $uniques = $footnoteUniques[$symbol];
  3116. if ($uniques) {
  3117. foreach ($uniques as $unique) {
  3118. $html .= " <a href=\"#{$state->root()->elementIdPrefix}footnoteref_{$unique}\" class=\"footnote-backref\">↩︎</a>";
  3119. }
  3120. }
  3121. $html .= "</li>\n";
  3122. }
  3123. $html .= '</ol>';
  3124. $html .= '</div>';
  3125. return html;
  3126. }
  3127. public function toPlaintext(MDState $state): string {
  3128. $footnotes = $state->userInfo['footnotes'];
  3129. $symbolOrder = array_keys($footnotes);
  3130. if (sizeof($footnotes) == 0) return '';
  3131. $text = '';
  3132. foreach ($symbolOrder as $symbol) {
  3133. $content = $footnotes[$symbol];
  3134. if (!$content) continue;
  3135. $text .= "{$symbol}. " . $this->childPlaintext(state) . "\n";
  3136. }
  3137. return trim($text);
  3138. }
  3139. }
  3140. /**
  3141. * Marker subclass that indicates a node represents inline syntax.
  3142. */
  3143. class MDInlineNode extends MDNode {}
  3144. /**
  3145. * Contains plain text. Special HTML characters are escaped when rendered.
  3146. */
  3147. class MDTextNode extends MDInlineNode {
  3148. public string $text;
  3149. public function __construct(string $text) {
  3150. parent::__construct([]);
  3151. if (!is_string($text) || mb_strlen($text) == 0) throw new Error("Meh!");
  3152. $this->text = $text;
  3153. }
  3154. public function toHTML(MDState $state): string {
  3155. return htmlentities($this->text);
  3156. }
  3157. public function toPlaintext(MDState $state): string {
  3158. return $this->text;
  3159. }
  3160. }
  3161. /**
  3162. * Contains plain text which is rendered with HTML entities when rendered to
  3163. * be marginally more difficult for web scapers to decipher. Used for
  3164. * semi-sensitive info like email addresses.
  3165. */
  3166. class MDObfuscatedTextNode extends MDTextNode {
  3167. public function toHTML(MDState $state): string {
  3168. return MDUtils::escapeObfuscated($this->text);
  3169. }
  3170. }
  3171. /**
  3172. * Emphasized (italicized) content.
  3173. */
  3174. class MDEmphasisNode extends MDInlineNode {
  3175. public function toHTML(MDState $state): string {
  3176. return $this->simplePairedTagHTML($state, 'em');
  3177. }
  3178. }
  3179. /**
  3180. * Strong (bold) content.
  3181. */
  3182. class MDStrongNode extends MDInlineNode {
  3183. public function toHTML(MDState $state): string {
  3184. return $this->simplePairedTagHTML($state, 'strong');
  3185. }
  3186. }
  3187. /**
  3188. * Content rendered with a line through it.
  3189. */
  3190. class MDStrikethroughNode extends MDInlineNode {
  3191. public function toHTML(MDState $state): string {
  3192. return $this->simplePairedTagHTML($state, 's');
  3193. }
  3194. }
  3195. /**
  3196. * Underlined content.
  3197. */
  3198. class MDUnderlineNode extends MDInlineNode {
  3199. public function toHTML(MDState $state): string {
  3200. return $this->simplePairedTagHTML($state, 'u');
  3201. }
  3202. }
  3203. /**
  3204. * Highlighted content. Usually rendered with a bright colored background.
  3205. */
  3206. class MDHighlightNode extends MDInlineNode {
  3207. public function toHTML(MDState $state): string {
  3208. return $this->simplePairedTagHTML($state, 'mark');
  3209. }
  3210. }
  3211. /**
  3212. * Superscripted content.
  3213. */
  3214. class MDSuperscriptNode extends MDInlineNode {
  3215. public function toHTML(MDState $state): string {
  3216. return $this->simplePairedTagHTML($state, 'sup');
  3217. }
  3218. }
  3219. /**
  3220. * Subscripted content.
  3221. */
  3222. class MDSubscriptNode extends MDInlineNode {
  3223. public function toHTML(MDState $state): string {
  3224. return $this->simplePairedTagHTML($state, 'sub');
  3225. }
  3226. }
  3227. /**
  3228. * Inline plaintext indicating computer code.
  3229. */
  3230. class MDCodeNode extends MDInlineNode {
  3231. public string $text;
  3232. public function __construct(string $text) {
  3233. parent::__construct([]);
  3234. $this->text = $text;
  3235. }
  3236. public function toHTML(MDState $state): string {
  3237. return "<code" . $this->htmlAttributes() . ">" . MDUtils::escapeHTML($this->text) . "</code>";
  3238. }
  3239. }
  3240. /**
  3241. * A footnote symbol in a document. Denoted as a superscripted number that can
  3242. * be clicked to go to its content at the bottom of the document.
  3243. */
  3244. class MDFootnoteNode extends MDInlineNode {
  3245. /**
  3246. * Symbol the author used to match up the footnote to its content definition.
  3247. */
  3248. public string $symbol;
  3249. /**
  3250. * The superscript symbol rendered in HTML. May be the same or different
  3251. * than `symbol`.
  3252. */
  3253. public ?string $displaySymbol = null;
  3254. /**
  3255. * Unique ID for the footnote definition.
  3256. */
  3257. public ?int $footnoteId = null;
  3258. /**
  3259. * Unique number for backlinking to a footnote occurrence. Populated by
  3260. * `MDFootnoteReader.postProcess`.
  3261. */
  3262. public ?int $occurrenceId = null;
  3263. public function __construct(string $symbol, ?string $title=null) {
  3264. parent::__construct([]);
  3265. $this->symbol = $symbol;
  3266. if ($title) $this->attributes['title'] = $title;
  3267. }
  3268. public function toHTML(MDState $state): string {
  3269. if ($this->differentiator !== null) {
  3270. return "<sup class=\"footnote\" id=\"{$state->root()->elementIdPrefix}footnoteref_{$this->occurrenceId}\"" . $this->htmlAttributes() . ">" .
  3271. "<a href=\"#{$state->root()->elementIdPrefix}footnote_{$this->footnoteId}\">" . htmlentities($this->displaySymbol ?? $this->symbol) . "</a></sup>";
  3272. }
  3273. return "<!--FNREF:{{$this->symbol}}-->";
  3274. }
  3275. }
  3276. /**
  3277. * A clickable hypertext link.
  3278. */
  3279. class MDLinkNode extends MDInlineNode {
  3280. public string $href;
  3281. /**
  3282. * @param string $href
  3283. * @param MDNode[]|MDNode $children
  3284. */
  3285. public function __construct(string $href, array|MDNode $children, ?string $title=null) {
  3286. parent::__construct($children);
  3287. $this->href = $href;
  3288. if ($title !== null) $this->attributes['title'] = $title;
  3289. }
  3290. public function toHTML(MDState $state): string {
  3291. if (str_starts_with($this->href, 'mailto:')) {
  3292. $escapedLink = MDUtils::escapeObfuscated($this->href);
  3293. } else {
  3294. $escapedLink = htmlentities($this->href);
  3295. }
  3296. return "<a href=\"{$escapedLink}\"" . $this->htmlAttributes() . ">" . $this->childHTML($state) . "</a>";
  3297. }
  3298. }
  3299. /**
  3300. * A clickable hypertext link where the URL is defined elsewhere by reference.
  3301. */
  3302. class MDReferencedLinkNode extends MDLinkNode {
  3303. public string $reference;
  3304. public function __construct(string $reference, array|MDNode $children) {
  3305. parent::__construct('', $children);
  3306. $this->reference = $reference;
  3307. }
  3308. public function toHTML(MDState $state): string {
  3309. if ($this->href === '') {
  3310. $url = $state->urlForReference($this->reference);
  3311. if ($url) $this->href = $url;
  3312. $title = $state->urlTitleForReference($this->reference);
  3313. if ($title) $this->attributes['title'] = $title;
  3314. }
  3315. return $super->toHTML($state);
  3316. }
  3317. }
  3318. /**
  3319. * An inline image.
  3320. */
  3321. class MDImageNode extends MDInlineNode {
  3322. public string $src;
  3323. public ?string $alt;
  3324. public function __construct(string $src, ?string $alt) {
  3325. super([]);
  3326. $this->src = $src;
  3327. $this->alt = $alt;
  3328. }
  3329. public function toHTML(MDState $state): string {
  3330. $html = "<img src=\"" . htmlentities($this->src) . "\"";
  3331. if ($this->alt) $html .= " alt=\"" . htmlentities($this->alt) . "\"";
  3332. $html .= $this->htmlAttributes() . ">";
  3333. return $html;
  3334. }
  3335. }
  3336. /**
  3337. * An inline image where the URL is defined elsewhere by reference.
  3338. */
  3339. class MDReferencedImageNode extends MDImageNode {
  3340. public string $reference;
  3341. public function __construct(string $reference, ?string $alt=null) {
  3342. parent::__construct('', $alt, []);
  3343. $this->reference = $reference;
  3344. }
  3345. public function toHTML(MDState $state): string {
  3346. if ($this->src === '') {
  3347. $url = $state->urlForReference($this->reference);
  3348. if ($url !== null) $this->src = $url;
  3349. $title = $state->urlTitleForReference($this->reference);
  3350. if ($title !== null) $this->attributes['title'] = $title;
  3351. }
  3352. return super.toHTML(state);
  3353. }
  3354. }
  3355. /**
  3356. * An abbreviation that can be hovered over to see its full expansion.
  3357. */
  3358. class MDAbbreviationNode extends MDInlineNode {
  3359. /** @type {string} */
  3360. public string $abbreviation;
  3361. /**
  3362. * @param {string} abbreviation
  3363. * @param {string} definition
  3364. */
  3365. public function __construct(string $abbreviation, string $definition) {
  3366. super([]);
  3367. $this->abbreviation = $abbreviation;
  3368. $this->attributes['title'] = $definition;
  3369. }
  3370. public function toHTML(MDState $state): string {
  3371. return "<abbr" . $this->htmlAttributes() . ">" . htmlentities($this->abbreviation) . "</abbr>";
  3372. }
  3373. }
  3374. /**
  3375. * A line break that is preserved when rendered to HTML.
  3376. */
  3377. class MDLineBreakNode extends MDInlineNode {
  3378. public function toHTML(MDState $state): string {
  3379. return '<br>';
  3380. }
  3381. public function toPlaintext(MDState $state): string {
  3382. return "\n";
  3383. }
  3384. }
  3385. /**
  3386. * A verbatim HTML tag. May be altered to strip out disallowed attributes or
  3387. * CSS values.
  3388. */
  3389. class MDHTMLTagNode extends MDInlineNode {
  3390. public MDHTMLTag $tag;
  3391. public function __construct(MDHTMLTag $tag) {
  3392. parent::__construct([]);
  3393. $this->tag = $tag;
  3394. }
  3395. public function toHTML(MDState $state): string {
  3396. return "{$this->tag}";
  3397. }
  3398. }
  3399. // -- Main class ------------------------------------------------------------
  3400. /**
  3401. * Markdown parser.
  3402. */
  3403. class Markdown {
  3404. /**
  3405. * Set of standard readers to handle common syntax.
  3406. * @type {MDReader[]}
  3407. */
  3408. public static function standardReaders(): array {
  3409. if (self::$sharedStandardReaders === null) {
  3410. self::$sharedStandardReaders = [
  3411. new MDUnderlinedHeadingReader(),
  3412. new MDHashHeadingReader(),
  3413. new MDBlockQuoteReader(),
  3414. new MDHorizontalRuleReader(),
  3415. new MDUnorderedListReader(),
  3416. new MDOrderedListReader(),
  3417. new MDFencedCodeBlockReader(),
  3418. new MDIndentedCodeBlockReader(),
  3419. new MDParagraphReader(),
  3420. new MDStrongReader(),
  3421. new MDEmphasisReader(),
  3422. new MDCodeSpanReader(),
  3423. new MDImageReader(),
  3424. new MDLinkReader(),
  3425. new MDHTMLTagReader(),
  3426. ];
  3427. }
  3428. return self::$sharedStandardReaders;
  3429. }
  3430. private static ?array $sharedStandardReaders = null;
  3431. /**
  3432. * All supported readers except `MDLineBreakReader`.
  3433. * @type {MDReader[]}
  3434. */
  3435. public static function allReaders(): array {
  3436. if (self::$sharedAllReaders === null) {
  3437. $sharedAllReaders = array_merge(self::standardReaders(), [
  3438. new MDSubtextReader(),
  3439. new MDTableReader(),
  3440. new MDDefinitionListReader(),
  3441. new MDFootnoteReader(),
  3442. new MDAbbreviationReader(),
  3443. new MDUnderlineReader(),
  3444. new MDSubscriptReader(),
  3445. new MDStrikethroughReader(),
  3446. new MDHighlightReader(),
  3447. new MDSuperscriptReader(),
  3448. new MDReferencedImageReader(),
  3449. new MDReferencedLinkReader(),
  3450. new MDModifierReader(),
  3451. ]);
  3452. }
  3453. return $sharedAllReaders;
  3454. }
  3455. private static ?array $sharedAllReaders = null;
  3456. /**
  3457. * Shared instance of a parser with standard syntax.
  3458. */
  3459. public static function standardParser(): Markdown {
  3460. if (self::$sharedStandardMarkdown === null) {
  3461. self::$sharedStandardMarkdown = new Markdown(self::standardReaders());
  3462. }
  3463. return self::$sharedStandardMarkdown;
  3464. }
  3465. private static ?Markdown $sharedStandardMarkdown = null;
  3466. /**
  3467. * Shared instance of a parser with all supported syntax.
  3468. */
  3469. public static function completeParser(): Markdown {
  3470. if (self::$sharedCompleteParser === null) {
  3471. self::$sharedCompleteParser = new Markdown(self::allReaders());
  3472. }
  3473. return self::$sharedCompleteParser;
  3474. }
  3475. public static ?Markdown $sharedCompleteParser = null;
  3476. /**
  3477. * Filter for what non-markdown HTML is permitted. HTML generated as a
  3478. * result of markdown is unaffected.
  3479. */
  3480. public MDHTMLFilter $tagFilter;
  3481. /** @var MDReader[] */
  3482. private array $readers;
  3483. /** @var MDReader[] */
  3484. private array $readersByBlockPriority;
  3485. /** @var MDReader[] */
  3486. private array $readersByTokenPriority;
  3487. private array $readersBySubstitutePriority;
  3488. /**
  3489. * Creates a Markdown parser with the given syntax readers.
  3490. *
  3491. * @param MDReader[] $readers
  3492. */
  3493. public function __construct(?array $readers=null) {
  3494. $this->readers = $readers ?? self::allReaders();
  3495. $this->readersByBlockPriority = MDReader::sortReaderForBlocks($this->readers);
  3496. $this->readersByTokenPriority = MDReader::sortReadersForTokenizing($this->readers);
  3497. $this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($this->readers);
  3498. $this->tagFilter = new MDHTMLFilter();
  3499. }
  3500. /**
  3501. * Converts a markdown string to an HTML string.
  3502. *
  3503. * @param string $markdown
  3504. * @param string $elementIdPrefix Optional prefix for generated element
  3505. * `id`s and links to them. For differentiating multiple markdown docs in
  3506. * the same HTML page.
  3507. * @return string HTML
  3508. */
  3509. public function toHTML(string $markdown, string $elementIdPrefix='') {
  3510. $lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown);
  3511. try {
  3512. return $this->parse($lines, $elementIdPrefix);
  3513. } catch (Error $e) {
  3514. $this->investigateException($lines, $elementIdPrefix);
  3515. throw $e;
  3516. }
  3517. }
  3518. /**
  3519. * @param string[] $lines
  3520. * @param string $elementIdPrefix
  3521. */
  3522. private function parse(array $lines, string $elementIdPrefix) {
  3523. $state = new MDState($lines);
  3524. $state->readersByBlockPriority = $this->readersByBlockPriority;
  3525. $state->readersByTokenPriority = $this->readersByTokenPriority;
  3526. $state->readersBySubstitutePriority = $this->readersBySubstitutePriority;
  3527. $state->tagFilter = $this->tagFilter;
  3528. $state->elementIdPrefix = $elementIdPrefix;
  3529. foreach ($this->readers as $reader) {
  3530. $reader->preProcess($state);
  3531. }
  3532. $nodes = $state->readBlocks();
  3533. foreach ($this->readers as $reader) {
  3534. $reader->postProcess($state, $nodes);
  3535. }
  3536. return MDNode::arrayToHTML($nodes, $state);
  3537. }
  3538. /**
  3539. * Keeps removing first and last lines of markdown to locate the source of
  3540. * an exception and prints the minimal snippet.
  3541. *
  3542. * @param string[] $lines
  3543. * @param string $elementIdPrefix
  3544. */
  3545. private function investigateException(array $lines, string $elementIdPrefix) {
  3546. $startIndex = 0;
  3547. $endIndex = sizeof($lines);
  3548. // Keep stripping away first line until an exception stops being thrown
  3549. for ($i = 0; $i < sizeof($lines); $i++) {
  3550. try {
  3551. $this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix);
  3552. break;
  3553. } catch (Error $e0) {
  3554. $startIndex = $i;
  3555. }
  3556. }
  3557. // Keep stripping away last line until an exception stops being thrown
  3558. for ($i = sizeof($lines); $i > $startIndex; $i--) {
  3559. try {
  3560. $this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix);
  3561. break;
  3562. } catch (Error $e0) {
  3563. $endIndex = $i;
  3564. }
  3565. }
  3566. $problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex));
  3567. print("This portion of markdown caused an unexpected exception: {$problematicMarkdown}");
  3568. }
  3569. }
  3570. ?>