PHP and Javascript implementations of a simple markdown parser
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

markdown.php 119KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Static utilities.
  5. */
  6. class MDUtils {
  7. // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
  8. public static $baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
  9. // Modified from https://emailregex.com/ to remove capture groups.
  10. public static $baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
  11. /**
  12. * Encodes characters as HTML numeric entities to make it marginally more
  13. * difficult for web scrapers to grab sensitive info. If `text` starts with
  14. * `mailto:` only the email address following it will be obfuscated.
  15. */
  16. public static function escapeObfuscated(string $text): string {
  17. if (str_starts_with($text, 'mailto:')) {
  18. return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
  19. }
  20. $html = '';
  21. $l = mb_strlen($text);
  22. for ($p = 0; $p < $l; $p++) {
  23. $cp = mb_ord(mb_substr($text, $p, 1));
  24. $html .= "&#{{$cp}}";
  25. }
  26. return $html;
  27. }
  28. /**
  29. * Removes illegal characters from an HTML attribute name.
  30. */
  31. public static function scrubAttributeName(string $name): string {
  32. return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
  33. }
  34. /**
  35. * Strips one or more leading indents from a line or lines of markdown. An
  36. * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
  37. * spaces) are treated like one indent level.
  38. *
  39. * @param string|string[] $line
  40. * @param int $levels
  41. * @return string|string[]
  42. */
  43. public static function stripIndent(string|array &$line, int $levels=1): string|array {
  44. $regex = "^(?: {1,4}|\\t){{$levels}}";
  45. return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
  46. }
  47. /**
  48. * Counts the number of indent levels in a line of text. Partial indents
  49. * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly`
  50. * is `true`.
  51. */
  52. public static function countIndents(string &$line, bool $fullIndentsOnly=false): int {
  53. // normalize indents to tabs
  54. $t = mb_ereg_replace($fullIndentsOnly ? "(?: {4}|\\t)" : "(?: {1,4}|\\t)", "\t", $line);
  55. // remove content after indent
  56. $t = mb_ereg_replace("^(\\t*)(.*?)$", "\\1", $t);
  57. // count tabs
  58. return mb_strlen($t);
  59. }
  60. /**
  61. * Returns a copy of an array without any whitespace-only lines at the end.
  62. *
  63. * @param string[] $lines
  64. * @return string[]
  65. */
  66. public static function withoutTrailingBlankLines(array &$lines): array {
  67. $stripped = $lines;
  68. while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
  69. array_pop($stripped);
  70. }
  71. return $stripped;
  72. }
  73. /**
  74. * Tests if an array of lines contains at least one blank. A blank line
  75. * can contain whitespace.
  76. *
  77. * @param string[] $lines
  78. */
  79. public static function containsBlankLine(array &$lines): bool {
  80. foreach ($lines as $line) {
  81. if (mb_strlen(trim($line)) == 0) return true;
  82. }
  83. return false;
  84. }
  85. public static function equalAssocArrays(array &$a, array &$b) {
  86. return empty(array_diff_assoc($a, $b));
  87. }
  88. }
  89. /**
  90. * Token type enum for `MDToken`.
  91. */
  92. enum MDTokenType {
  93. case Text;
  94. /**
  95. * Only used for the leading and trailing whitespace around a run of text,
  96. * not every single whitespace character.
  97. */
  98. case Whitespace;
  99. case Underscore;
  100. case Asterisk;
  101. case Slash;
  102. case Tilde;
  103. case Bang;
  104. case Backtick;
  105. case Equal;
  106. case Caret;
  107. case Label; // content=label
  108. case URL; // content=URL, extra=title
  109. case Email; // content=email address, extra=title
  110. case SimpleLink; // content=URL
  111. case SimpleEmail; // content=email address
  112. case Footnote; // content=symbol
  113. case Modifier; // modifier=MDTagModifier
  114. case HTMLTag; // tag=MDHTMLTag
  115. /** Wildcard for `MDToken::findFirstTokens` */
  116. case META_AnyNonWhitespace;
  117. /** Wildcard for `MDToken::findFirstTokens` */
  118. case META_OptionalWhitespace;
  119. }
  120. /**
  121. * Search results from `MDToken.findFirstTokens`.
  122. */
  123. class MDTokenMatch {
  124. /** @var MDToken{} */
  125. public array $tokens;
  126. public int $index;
  127. /**
  128. * @param MDToken[] $tokens
  129. * @param int $index
  130. */
  131. public function __construct(array $tokens, int $index) {
  132. $this->tokens = $tokens;
  133. $this->index = $index;
  134. }
  135. }
  136. /**
  137. * Search results from `MDToken.findPairedTokens`.
  138. */
  139. class MDPairedTokenMatch {
  140. /** @var MDToken[] */
  141. public array $startTokens;
  142. /** @var MDToken[] */
  143. public array $contentTokens;
  144. /** @var MDToken[] */
  145. public array $endTokens;
  146. public int $startIndex;
  147. public int $contentIndex;
  148. public int $endIndex;
  149. public int $totalLength;
  150. public function __construct(array $startTokens, array $contentTokens,
  151. array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
  152. int $totalLength) {
  153. $this->startTokens = $startTokens;
  154. $this->contentTokens = $contentTokens;
  155. $this->endTokens = $endTokens;
  156. $this->startIndex = $startIndex;
  157. $this->contentIndex = $contentIndex;
  158. $this->endIndex = $endIndex;
  159. $this->totalLength = $totalLength;
  160. }
  161. }
  162. /**
  163. * One lexical unit in inline markdown syntax parsing.
  164. */
  165. class MDToken {
  166. /**
  167. * The original verbatim token string. Required as a plaintext fallback if
  168. * the token remains unresolved.
  169. */
  170. public string $original;
  171. public MDTokenType $type;
  172. public ?string $content = null;
  173. public ?string $extra = null;
  174. public ?MDHTMLTag $tag = null;
  175. public ?MDTagModifier $modifier = null;
  176. /**
  177. * Creates a token.
  178. *
  179. * @param string $original verbatim token string
  180. * @param MDTokenType $type token type
  181. * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
  182. * the token
  183. * @param string|null $extra additional content
  184. */
  185. public function __construct(string $original, MDTokenType $type,
  186. string|MDTagModifier|MDHTMLTag|null $content=null,
  187. ?string $extra=null) {
  188. $this->original = $original;
  189. $this->type = $type;
  190. if ($content instanceof MDTagModifier) {
  191. $this->modifier = $content;
  192. } elseif ($content instanceof MDHTMLTag) {
  193. $this->tag = $content;
  194. } else {
  195. $this->content = $content;
  196. }
  197. $this->extra = $extra;
  198. }
  199. public function __toString(): string {
  200. $classname = get_class($this);
  201. return "({$classname} type={$this->type} content={$this->content})";
  202. }
  203. /**
  204. * Attempts to parse a label token from the beginning of `line`. A label is
  205. * of the form `[content]`. If found, returns an array:
  206. * - `0`: the entire label including brackets
  207. * - `1`: the content of the label
  208. *
  209. * @param string $line
  210. * @return ?string[] match groups or null if not found
  211. */
  212. public static function tokenizeLabel(string $line): ?array {
  213. if (!str_starts_with($line, '[')) return null;
  214. $parenCount = 0;
  215. $bracketCount = 0;
  216. $l = mb_strlen($line);
  217. for ($p = 1; $p < $l; $p++) {
  218. $ch = mb_substr($line, $p, 1);
  219. if ($ch == '\\') {
  220. $p++;
  221. } elseif ($ch == '(') {
  222. $parenCount++;
  223. } elseif ($ch == ')') {
  224. $parenCount--;
  225. if ($parenCount < 0) return null;
  226. } elseif ($ch == '[') {
  227. $bracketCount++;
  228. } elseif ($ch == ']') {
  229. if ($bracketCount > 0) {
  230. $bracketCount--;
  231. } else {
  232. return [ mb_substr($line, 0, $p + 1), mb_substr($line, 1, $p - 1) ];
  233. }
  234. }
  235. }
  236. return null;
  237. }
  238. private static $urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
  239. private static $urlRegex = '^\\((\\S+?)\\)'; // 1=URL
  240. /**
  241. * Attempts to parse a URL token from the beginning of `line`. A URL token
  242. * is of the form `(url)` or `(url "title")`. If found, returns an array:
  243. * - `0`: the entire URL token including parentheses
  244. * - `1`: the URL
  245. * - `2`: the optional title, or `null`
  246. *
  247. * @param string $line
  248. * @return ?array token tuple
  249. */
  250. public static function tokenizeURL(string $line): ?array {
  251. $groups = [];
  252. if (mb_eregi(self::$urlWithTitleRegex, $line, $groups)) {
  253. if (self::tokenizeEmail($line)) return null; // make sure it's not better described as an email address
  254. return $groups;
  255. }
  256. if (mb_eregi(self::$urlRegex, $line, $groups)) {
  257. if (self::tokenizeEmail($line)) return null;
  258. return [ $groups[0], $groups[1], null ];
  259. }
  260. return null;
  261. }
  262. /**
  263. * Attempts to parse an email address from the beginning of `line`. An
  264. * email address is of the form `(user@example.com)` or
  265. * `(user@example.com "link title")`. If found, returns an array:
  266. * - `0`: the entire token including parentheses
  267. * - `1`: the email address
  268. * - `2`: the optional link title, or `null`
  269. *
  270. * @param string $line
  271. * @return ?string[] token tuple
  272. */
  273. public static function tokenizeEmail(string $line): ?array {
  274. $groups;
  275. if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
  276. $line, $groups)) {
  277. return $groups;
  278. }
  279. if (mb_eregi("^\\(\\s*(" . MDUtils::$baseEmailRegex . ")\\s*\\)", $line, $groups)) {
  280. return [ $groups[0], $groups[1], null ];
  281. }
  282. return null;
  283. }
  284. /**
  285. * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
  286. * If found, returns a `MDTokenMatch`, otherwise `null`.
  287. *
  288. * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
  289. * are special supported token types. Note that `META_OptionalWhitespace`
  290. * may give a result with a variable number of tokens.
  291. *
  292. * @param (MDToken|MDNode)[] $tokensToSearch - mixed array of `MDToken` and
  293. * `MDNode` elements
  294. * @param MDTokenType[] $pattern - contiguous run of token types to find
  295. * @param int $startIndex - token index to begin searching (defaults to 0)
  296. * @return ?MDTokenMatch match object, or `null` if not found
  297. */
  298. public static function findFirstTokens(array $tokensToSearch, array $pattern, int $startIndex=0): ?MDTokenMatch {
  299. $matched = [];
  300. for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
  301. $matchedAll = true;
  302. $matched = [];
  303. $patternOffset = 0;
  304. for ($p = 0; $p < mb_strlen($pattern); $p++) {
  305. $t0 = $t + $p + $patternOffset;
  306. if ($t0 >= sizeof($tokensToSearch)) return null;
  307. $token = $tokensToSearch[$t0];
  308. $elem = $pattern[$p];
  309. if ($elem == MDTokenType::META_OptionalWhitespace) {
  310. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  311. array_push($matched, $token);
  312. } else {
  313. $patternOffset--;
  314. }
  315. } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
  316. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  317. $matchedAll = false;
  318. break;
  319. }
  320. array_push($matched, $token);
  321. } else {
  322. if (!($token instanceof MDToken) || $token->type != $elem) {
  323. $matchedAll = false;
  324. break;
  325. }
  326. array_push($matched, $token);
  327. }
  328. }
  329. if ($matchedAll) {
  330. return new MDTokenMatch($matched, $t);
  331. }
  332. }
  333. return null;
  334. }
  335. /**
  336. * Searches an array of MDToken for a given starting pattern and ending
  337. * pattern and returns match info about both and the tokens in between.
  338. *
  339. * If `contentValidator` is specified, it will be called with the content
  340. * tokens of a potential match. If the validator returns `true`, the result
  341. * will be accepted and returned by this method. If the validator returns
  342. * `false`, this method will keep looking for another matching pair. If no
  343. * validator is given the first match will be returned regardless of content.
  344. *
  345. * If a match is found, a `MDPairedTokenMatch` is returned with details
  346. * of the opening tokens, closing tokens, and content tokens between. Otherwise
  347. * `null` is returned.
  348. *
  349. * @param MDToken[] $tokensToSearch - array of `MDToken` to search in
  350. * @param MDTokenType[] $startPattern - array of `MDTokenType` to find first
  351. * @param MDTokenType[] $endPattern - array of `MDTokenType` to find positioned after `startPattern`
  352. * @param ?callable $contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching
  353. * @param number $startIndex - token index where searching should begin
  354. * @return ?MDPairedTokenMatch match, or `null`
  355. */
  356. public static function findPairedTokens(array $tokensToSearch,
  357. array $startPattern, array $endPattern, ?callable $contentValidator=null,
  358. int $startIndex=0): ?MDPairedTokenMatch {
  359. for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
  360. $startMatch = findFirstTokens($tokensToSearch, $startPattern, $s);
  361. if ($startMatch === null) return null;
  362. $endStart = $startMatch->index + sizeof($startMatch->tokens);
  363. while ($endStart < sizeof($tokensToSearch)) {
  364. $endMatch = findFirstTokens($tokensToSearch, $endPattern, $endStart);
  365. if ($endMatch === null) break;
  366. $contentStart = $startMatch->index + sizeof($startMatch->tokens);
  367. $contentLength = $endMatch->index - $contentStart;
  368. $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
  369. if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
  370. return new MDPairedTokenMatch($startMatch->tokens,
  371. $contents,
  372. $endMatch->tokens,
  373. $startMatch->index,
  374. $startMatch->index + sizeof($startMatch->tokens),
  375. $endMatch->index,
  376. $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
  377. } else {
  378. // Contents rejected. Try next end match.
  379. $endStart = $endMatch->index + 1;
  380. }
  381. }
  382. // No end matches. Increment start match.
  383. $s = $startMatch->index;
  384. }
  385. return null;
  386. }
  387. public function equals($other) {
  388. if (!($other instanceof MDToken)) return false;
  389. if ($other->original !== $this->original) return false;
  390. if ($other->type != $this->type) return false;
  391. if ($other->content !== $this->content) return false;
  392. if ($other->extra !== $this->extra) return false;
  393. if ($other->tag !== $this->tag) return false;
  394. if ($other->modifier != $this->modifier) return false;
  395. return true;
  396. }
  397. }
  398. /**
  399. * Parsing and rendering state. Passed around throughout the parsing process.
  400. *
  401. * States are hierarchical. A sub-state can be created by calling `.copy()` with
  402. * a new array of lines. The sub-state points back to its parent state. This
  403. * is done to parse inner content of a syntax as its own standalone document.
  404. *
  405. * If a custom `MDReader` implementation wants to store data in this object,
  406. * always do so on `state.root` to ensure it's stored on the original state,
  407. * not a child state. Otherwise data may be lost when the sub-state is discarded.
  408. */
  409. class MDState {
  410. /**
  411. * Ascends the parent chain to the root `MDState` instance. This should be
  412. * used when referencing most stored fields except `lines` and `p`.
  413. */
  414. public function root(): MDState {
  415. return $this->parent ? $this->parent->root() : $this;
  416. }
  417. /**
  418. * Lines of the markdown document. The current line index is pointed to by `p`.
  419. *
  420. * @var string[]
  421. */
  422. public array $lines;
  423. /**
  424. * The current line in `lines`.
  425. */
  426. public function currentLine(): ?string {
  427. return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
  428. }
  429. /**
  430. * Current line pointer into array `lines`.
  431. */
  432. public int $p = 0;
  433. private ?MDState $parent = null;
  434. /**
  435. * Array of `MDReader`s sorted by block reading priority.
  436. * @var MDReader[]
  437. */
  438. public array $readersByBlockPriority = [];
  439. /**
  440. * Array of `MDReader`s sorted by tokenization priority.
  441. * @var MDReader[]
  442. */
  443. public array $readersByTokenPriority = [];
  444. /**
  445. * Array of tuples of `pass:number` and `MDReader` sorted by substitution
  446. * priority.
  447. * @var array[]
  448. */
  449. public array $readersBySubstitutePriority = [];
  450. /**
  451. * Prefix to include in any generated `id` attributes on HTML elements.
  452. * Useful for keeping elements unique in multiple parsed documents in the
  453. * same HTML page.
  454. */
  455. public string $elementIdPrefix = '';
  456. /**
  457. * Filter for removing unapproved HTML tags, attributes, and values.
  458. */
  459. public MDHTMLFilter $tagFilter;
  460. private static string $textWhitespaceRegex = '^(\\s*)(?:(\\S|\\S.*\\S)(\\s*?))?$'; // 1=leading WS, 2=text, 3=trailing WS
  461. /**
  462. * @param string[] $lines - lines of markdown text
  463. */
  464. public function __construct(array $lines) {
  465. $this->lines = $lines;
  466. }
  467. /**
  468. * Creates a copy of this state with new lines. Useful for parsing nested
  469. * content.
  470. *
  471. * @param string[] $lines
  472. * @return MDState copied sub-state
  473. */
  474. public function copy(array $lines) {
  475. $cp = new MDState($lines);
  476. $cp->parent = $this;
  477. return $cp;
  478. }
  479. /**
  480. * Tests if there are at least `minCount` lines available to read. If `p`
  481. * is not provided it will be relative to `this.p`.
  482. */
  483. public function hasLines(int $minCount, ?int $p=null): bool {
  484. $relativeTo = ($p === null) ? $this->p : $p;
  485. return $relativeTo + $minCount <= sizeof($this->lines);
  486. }
  487. /**
  488. * Reads and returns an array of blocks from the current line pointer.
  489. *
  490. * @return MDBlockNode[] parsed blocks
  491. */
  492. public function readBlocks(): array {
  493. $blocks = [];
  494. while ($this->hasLines(1)) {
  495. $block = $this->readNextBlock();
  496. if ($block) {
  497. array_push($blocks, $block);
  498. } else {
  499. break;
  500. }
  501. }
  502. return $blocks;
  503. }
  504. /**
  505. * Creates a simple `MDBlockNode` if no other registered blocks match.
  506. */
  507. private function readFallbackBlock(): ?MDBlockNode {
  508. if ($this->p >= sizeof($this->lines)) return null;
  509. $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
  510. if (sizeof($lines) == 0) return null;
  511. $this->p = sizeof($this->lines);
  512. return $this->inlineMarkdownToNode(implode("\n", $lines));
  513. }
  514. /**
  515. * Attempts to read one block from the current line pointer. The pointer
  516. * will be positioned just after the end of the block.
  517. */
  518. private function readNextBlock(): ?MDBlockNode {
  519. while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
  520. $this->p++;
  521. }
  522. if (!$this->hasLines(1)) return null;
  523. foreach ($this->root()->readersByBlockPriority as $reader) {
  524. $startP = $this->p;
  525. $block = $reader->readBlock($this);
  526. if ($block) {
  527. if ($this->p == $startP) {
  528. $readerClassName = get_class($reader);
  529. $blockClassName = get_class($block);
  530. throw new Error("{$readerClassName} returned an " +
  531. "{$blockClassName} without incrementing MDState.p. " +
  532. "This could lead to an infinite loop.");
  533. }
  534. return $block;
  535. }
  536. }
  537. $fallback = $this->readFallbackBlock();
  538. return $fallback;
  539. }
  540. /**
  541. * @param string $line
  542. * @return MDToken[]
  543. */
  544. private function inlineMarkdownToTokens(string $line): array {
  545. if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
  546. $tokens = [];
  547. $text = '';
  548. $expectLiteral = false;
  549. /**
  550. * Flushes accumulated content in `text` to `tokens`.
  551. */
  552. function endText() {
  553. if (mb_strlen($text) == 0) return;
  554. $textGroups = null;
  555. if (mb_eregi(MDState::$textWhitespaceRegex, $text, $textGroups)) {
  556. if (mb_strlen($textGroups[1]) > 0) {
  557. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
  558. }
  559. if ($textGroups[2] && mb_strlen($textGroups[2]) > 0) {
  560. $tokens.push(new MDToken($textGroups[2], MDTokenType::Text, $textGroups[2]));
  561. }
  562. if ($textGroups[3] && mb_strlen($textGroups[3]) > 0) {
  563. $tokens.push(new MDToken($textGroups[3], MDTokenType::Whitespace, $textGroups[3]));
  564. }
  565. } else {
  566. array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
  567. }
  568. $text = '';
  569. }
  570. for ($p = 0; $p < mb_strlen(line); $p++) {
  571. $ch = mb_substr($line, p, 1);
  572. $remainder = mb_substr($line, $p);
  573. if ($expectLiteral) {
  574. $text .= $ch;
  575. $expectLiteral = false;
  576. continue;
  577. }
  578. if ($ch == '\\') {
  579. $expectLiteral = true;
  580. continue;
  581. }
  582. $found = false;
  583. foreach ($this->root()->readersByTokenPriority as $reader) {
  584. $token = $reader->readToken($this, $remainder);
  585. if ($token === null) continue;
  586. endText();
  587. array_push($tokens, $token);
  588. if ($token->original == null || mb_strlen($token->original) == 0) {
  589. $readerClassName = get_class($reader);
  590. throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
  591. }
  592. $p += mb_strlen($token->original) - 1;
  593. $found = true;
  594. break;
  595. }
  596. if (!$found) {
  597. $text += $ch;
  598. }
  599. }
  600. endText();
  601. return $tokens;
  602. }
  603. /**
  604. * Converts a line of markdown to an `MDInlineNode`.
  605. *
  606. * @param string|string[] $line
  607. * @return MDInlineNode
  608. */
  609. public function inlineMarkdownToNode(string|array $line): MDInlineNode {
  610. $nodes = $this->inlineMarkdownToNodes($line);
  611. return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
  612. }
  613. /**
  614. * Converts a line of markdown to an array of `MDInlineNode`s.
  615. *
  616. * @param string|string[] $line
  617. * @return MDInlineNode[]
  618. */
  619. public function inlineMarkdownToNodes(string|array $line): array {
  620. $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
  621. return $this->tokensToNodes($tokens);
  622. }
  623. /**
  624. * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
  625. * of only `MDInlineNode` via repeated `MDReader` substition.
  626. *
  627. * @param (MDToken|MDInlineNode)[] $tokens
  628. * @return MDInlineNode[]
  629. */
  630. public function tokensToNodes(array $tokens): array {
  631. $nodes = $tokens;
  632. // Perform repeated substitutions, converting sequences of tokens into
  633. // nodes, until no more substitutions can be made.
  634. $anyChanges = false;
  635. do {
  636. $anyChanges = false;
  637. foreach ($this->root->readersBySubstitutePriority as $readerTuple) {
  638. /** @var int */
  639. $pass = $readerTuple[0];
  640. /** @var MDReader */
  641. $reader = $readerTuple[1];
  642. $changed = $reader->substituteTokens($this, $pass, $nodes);
  643. if (!$changed) continue;
  644. $anyChanges = true;
  645. break;
  646. }
  647. } while ($anyChanges);
  648. // Convert any remaining tokens to text nodes. Also apply any inline
  649. // CSS modifiers.
  650. $lastNode = null;
  651. $me = $this;
  652. $nodes = array_map(function($node) use ($lastNode, $me) {
  653. if ($node instanceof MDToken) {
  654. /** @var MDToken */
  655. $token = $node;
  656. if ($token->type == MDTokenType::Modifier && $lastNode) {
  657. $me->root()->tagFilter->scrubModifier($token->modifier);
  658. $token->modifier->applyTo($lastNode);
  659. $lastNode = null;
  660. return new MDTextNode('');
  661. }
  662. $lastNode = null;
  663. return new MDTextNode($token->original);
  664. } elseif ($node instanceof MDNode) {
  665. $lastNode = ($node instanceof MDTextNode) ? null : $node;
  666. return $node;
  667. } else {
  668. $nodeClassName = get_class($node);
  669. throw new Error("Unexpected node type {$nodeClassName}");
  670. }
  671. }, $nodes);
  672. return $nodes;
  673. }
  674. /**
  675. * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
  676. * and `MDReferencedImageReader`.
  677. * @var array symbol -> URL
  678. */
  679. private array $referenceToURL = [];
  680. /**
  681. * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
  682. * and `MDReferencedImageReader`.
  683. * @var array symbol -> title string
  684. */
  685. private array $referenceToTitle = [];
  686. /**
  687. * Defines a URL by reference symbol.
  688. */
  689. public function defineURL(string $reference, string $url, ?string $title=null) {
  690. $this->root->referenceToURL[mb_strtolower($reference)] = $url;
  691. if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
  692. }
  693. /**
  694. * Returns the URL associated with a reference symbol.
  695. */
  696. public function urlForReference(string $reference): ?string {
  697. return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
  698. }
  699. /**
  700. * Returns the link title associated with a reference symbol.
  701. */
  702. public function urlTitleForReference(string $reference): ?string {
  703. return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
  704. }
  705. }
  706. /**
  707. * Defines a set of allowable HTML tags, attributes, and CSS.
  708. */
  709. class MDHTMLFilter {
  710. /**
  711. * Mapping of permitted lowercase tag names to objects containing allowable
  712. * attributes for those tags. Does not need to include those attributes
  713. * defined in `allowableGlobalAttributes`.
  714. *
  715. * Values are objects with allowable lowercase attribute names mapped to
  716. * allowable value patterns. A `*` means any value is acceptable. Multiple
  717. * allowable values can be joined together with `|`. These special symbols
  718. * represent certain kinds of values and can be used in combination or in
  719. * place of literal values.
  720. *
  721. * - `{classlist}`: A list of legal CSS classnames, separated by spaces
  722. * - `{int}`: An integer
  723. * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
  724. * - `{style}`: One or more CSS declarations, separated by semicolons (simple
  725. * `key: value;` syntax only)
  726. * - `{url}`: A URL
  727. * @type {object}
  728. */
  729. public array $allowableTags = [
  730. 'address' => [
  731. 'cite' => '{url}',
  732. ],
  733. 'h1' => [],
  734. 'h2' => [],
  735. 'h3' => [],
  736. 'h4' => [],
  737. 'h5' => [],
  738. 'h6' => [],
  739. 'blockquote' => [],
  740. 'dl' => [],
  741. 'dt' => [],
  742. 'dd' => [],
  743. 'div' => [],
  744. 'hr' => [],
  745. 'ul' => [],
  746. 'ol' => [
  747. 'start' => '{int}',
  748. 'type' => 'a|A|i|I|1',
  749. ],
  750. 'li' => [
  751. 'value' => '{int}',
  752. ],
  753. 'p' => [],
  754. 'pre' => [],
  755. 'table' => [],
  756. 'thead' => [],
  757. 'tbody' => [],
  758. 'tfoot' => [],
  759. 'tr' => [],
  760. 'td' => [],
  761. 'th' => [],
  762. 'a' => [
  763. 'href' => '{url}',
  764. 'target' => '*',
  765. ],
  766. 'abbr' => [],
  767. 'b' => [],
  768. 'br' => [],
  769. 'cite' => [],
  770. 'code' => [],
  771. 'data' => [
  772. 'value' => '*',
  773. ],
  774. 'dfn' => [],
  775. 'em' => [],
  776. 'i' => [],
  777. 'kbd' => [],
  778. 'mark' => [],
  779. 'q' => [
  780. 'cite' => '{url}',
  781. ],
  782. 's' => [],
  783. 'samp' => [],
  784. 'small' => [],
  785. 'span' => [],
  786. 'strong' => [],
  787. 'sub' => [],
  788. 'sup' => [],
  789. 'time' => [
  790. 'datetime' => '*',
  791. ],
  792. 'u' => [],
  793. 'var' => [],
  794. 'wbr' => [],
  795. 'img' => [
  796. 'alt' => '*',
  797. 'href' => '{url}',
  798. ],
  799. 'figure' => [],
  800. 'figcaption' => [],
  801. 'del' => [],
  802. 'ins' => [],
  803. 'details' => [],
  804. 'summary' => [],
  805. ];
  806. /**
  807. * Mapping of allowable lowercase global attributes to their permitted
  808. * values. Uses same value pattern syntax as described in `allowableTags`.
  809. * @type {object}
  810. */
  811. public array $allowableGlobalAttributes = [
  812. 'class' => '{classlist}',
  813. 'data-*' => '*',
  814. 'dir' => 'ltr|rtl|auto',
  815. 'id' => '*',
  816. 'lang' => '*',
  817. 'style' => '{style}',
  818. 'title' => '*',
  819. 'translate' => 'yes|no|{none}',
  820. ];
  821. /**
  822. * Mapping of allowable CSS style names to their allowable value patterns.
  823. * Multiple values can be delimited with `|` characters. Limited support
  824. * so far.
  825. *
  826. * Recognized special values:
  827. * - `{color}`: A hex or named color
  828. *
  829. * @type {object}
  830. */
  831. public array $allowableStyleKeys = [
  832. 'background-color' => '{color}',
  833. 'color' => '{color}',
  834. ];
  835. /**
  836. * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
  837. * itself has already been whitelisted.
  838. *
  839. * @param {MDHTMLTag} tag - HTML tag
  840. */
  841. public function scrubTag(MDHTMLTag $tag) {
  842. foreach ($tag->attributes as $name => $value) {
  843. if (!$this->isValidAttributeName($tag->tagName, $name)) {
  844. unset($tag->attributes[$name]);
  845. }
  846. if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
  847. unset($tag->attributes[$name]);
  848. }
  849. }
  850. }
  851. /**
  852. * Scrubs all forbidden attributes from an HTML modifier.
  853. *
  854. * @param MDTagModifier $modifier
  855. * @param ?string $tagName HTML tag name, if known, otherwise only
  856. * global attributes will be permitted
  857. */
  858. public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
  859. if (sizeof($modifier->cssClasses) > 0) {
  860. $classList = implode(' ', $modifier->cssClasses);
  861. if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
  862. $modifier->cssClasses = [];
  863. }
  864. }
  865. if ($modifier->cssId !== null) {
  866. if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
  867. $modifier->cssId = null;
  868. }
  869. }
  870. if (!$this->isValidAttributeName($tagName, 'style')) {
  871. $modifier->cssStyles = [];
  872. } else {
  873. foreach ($modifier->cssStyles as $key => $val) {
  874. if (!$this->isValidStyleValue($key, $val)) {
  875. unset($modifier->cssStyles[$key]);
  876. }
  877. }
  878. }
  879. foreach ($modifier->attributes as $key => $val) {
  880. if (!$this->isValidAttributeValue($tagName, $key, $val)) {
  881. unset($modifier->attributes[$key]);
  882. }
  883. }
  884. }
  885. /**
  886. * Tests if an HTML tag name is permitted.
  887. */
  888. public function isValidTagName(string $tagName): bool {
  889. return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
  890. }
  891. /**
  892. * Tests if an HTML attribute name is permitted.
  893. */
  894. public function isValidAttributeName(?string $tagName, string $attributeName): bool {
  895. $lcAttributeName = mb_strtolower($attributeName);
  896. if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
  897. return true;
  898. }
  899. foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
  900. if (!str_ends_with($pattern, '*')) continue;
  901. $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
  902. if (str_starts_with($lcAttributeName, $patternPrefix)) {
  903. return true;
  904. }
  905. }
  906. if ($tagName === null) return false;
  907. $lcTagName = mb_strtolower($tagName);
  908. $tagAttributes = $this->allowableTags[$lcTagName];
  909. if ($tagAttributes !== null) {
  910. return ($tagAttributes[$lcAttributeName] ?? null) !== null;
  911. }
  912. return false;
  913. }
  914. /**
  915. * Tests if an attribute value is allowable.
  916. */
  917. public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
  918. $lcAttributeName = mb_strtolower($attributeName);
  919. $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
  920. if ($globalPattern !== null) {
  921. return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
  922. }
  923. foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
  924. if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
  925. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  926. }
  927. }
  928. if ($tagName === null) return false;
  929. $lcTagName = mb_strtolower($tagName);
  930. $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
  931. if ($tagAttributes === null) return false;
  932. $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
  933. if ($valuePattern === null) return false;
  934. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  935. }
  936. private static string $permissiveURLRegex = '^\\S+$';
  937. private static string $integerRegex = '^[\\-]?\\d+$';
  938. private static string $classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
  939. private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
  940. $options = explode('|', $pattern);
  941. foreach ($options as $option) {
  942. switch ($option) {
  943. case '*':
  944. return true;
  945. case '{classlist}':
  946. if (mb_eregi(self::classListRegex, $value)) return true;
  947. break;
  948. case '{int}':
  949. if (mb_eregi(self::integerRegex, $value)) return true;
  950. break;
  951. case '{none}':
  952. if ($value === true) return true;
  953. break;
  954. case '{style}':
  955. if ($this->isValidStyleDeclaration($value)) return true;
  956. break;
  957. case '{url}':
  958. if (mb_eregi(self::permissiveURLRegex, $value)) return true;
  959. break;
  960. default:
  961. if ($value === $option) return true;
  962. break;
  963. }
  964. }
  965. return false;
  966. }
  967. /**
  968. * Tests if a string of one or more style `key: value;` declarations is
  969. * fully allowable.
  970. */
  971. public function isValidStyleDeclaration(string $styles): bool {
  972. $settings = explode(';', $styles);
  973. foreach ($settings as $setting) {
  974. if (mb_strlen(trim($setting)) == 0) continue;
  975. $parts = explode(':', $setting);
  976. if (sizeof($parts) != 2) return false;
  977. $name = trim($parts[0]);
  978. if (!$this->isValidStyleKey($name)) return false;
  979. $value = trim($parts[1]);
  980. if (!$this->isValidStyleValue($name, $value)) return false;
  981. }
  982. return true;
  983. }
  984. /**
  985. * Tests if a CSS style key is allowable.
  986. */
  987. public function isValidStyleKey(string $key): bool {
  988. return ($this->allowableStyleKeys[$key] ?? null) !== null;
  989. }
  990. /**
  991. * Tests if a CSS style value is allowable.
  992. */
  993. public function isValidStyleValue(string $key, string $value): bool {
  994. $pattern = $this->allowableStyleKeys[$key] ?? null;
  995. if ($pattern === null) return false;
  996. $options = explode('|', $pattern);
  997. foreach ($options as $option) {
  998. switch ($option) {
  999. case '{color}':
  1000. if ($this->isValidCSSColor($value)) return true;
  1001. default:
  1002. if ($value === $option) return true;
  1003. }
  1004. }
  1005. return false;
  1006. }
  1007. private static string $styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
  1008. private function isValidCSSColor(string $value): bool {
  1009. return mb_eregi(self::$styleColorRegex, $value);
  1010. }
  1011. }
  1012. /**
  1013. * Represents a single HTML tag. Paired tags are represented separately.
  1014. */
  1015. class MDHTMLTag {
  1016. /**
  1017. * Verbatim string of the original parsed tag. Not modified. Should be
  1018. * considered unsafe for inclusion in the final document. Use `toString()`
  1019. * instead.
  1020. */
  1021. public string $original;
  1022. public string $tagName;
  1023. public bool $isCloser;
  1024. /**
  1025. * Map of attribute names to value strings.
  1026. */
  1027. public array $attributes;
  1028. /**
  1029. * @param string $original
  1030. * @param string $tagName
  1031. * @param bool $isCloser
  1032. * @param array $attributes
  1033. */
  1034. public function __construct(string $original, string $tagName, bool $isCloser,
  1035. array $attributes) {
  1036. $this->original = $original;
  1037. $this->tagName = $tagName;
  1038. $this->isCloser = $isCloser;
  1039. $this->attributes = $attributes;
  1040. }
  1041. public function __toString(): string {
  1042. if ($this->isCloser) {
  1043. return "</{$this->tagName}>";
  1044. }
  1045. $html = '<';
  1046. $html .= $this->tagName;
  1047. foreach ($this->attributes as $key => $value) {
  1048. $safeName = MDUtils::scrubAttributeName($key);
  1049. if ($value === true) {
  1050. $html .= " {$safeName}";
  1051. } else {
  1052. $escapedValue = MDUtils::escapeHTML("{$value}");
  1053. $html .= " {$safeName}=\"{$escapedValue}\"";
  1054. }
  1055. }
  1056. $html .= '>';
  1057. return $html;
  1058. }
  1059. public function equals($other): bool {
  1060. if (!($other instanceof MDHTMLTag)) return false;
  1061. if ($other->tagName != $this->tagName) return false;
  1062. if ($other->isCloser != $this->isCloser) return false;
  1063. return MDUtils::equal($other->attributes, $this->attributes);
  1064. }
  1065. private static string $htmlTagNameFirstRegex = '[a-z]';
  1066. private static string $htmlTagNameMedialRegex = '[a-z0-9]';
  1067. private static string $htmlAttributeNameFirstRegex = '[a-z]';
  1068. private static string $htmlAttributeNameMedialRegex = '[a-z0-9-]';
  1069. private static string $whitespaceCharRegex = '\\s';
  1070. /**
  1071. * Checks the start of the given string for presence of an HTML tag.
  1072. */
  1073. public static function fromLineStart(string $line): ?MDHTMLTag {
  1074. $expectOpenBracket = 0;
  1075. $expectCloserOrName = 1;
  1076. $expectName = 2;
  1077. $expectAttributeNameOrEnd = 3;
  1078. $expectEqualsOrAttributeOrEnd = 4;
  1079. $expectAttributeValue = 5;
  1080. $expectCloseBracket = 6;
  1081. $isCloser = false;
  1082. $tagName = '';
  1083. $attributeName = '';
  1084. $attributeValue = '';
  1085. $attributeQuote = null;
  1086. $attributes = [];
  1087. $fullTag = null;
  1088. $endAttribute = function(bool $unescape=false) use (&$attributes, &$attributeName, &$attributeValue, &$attributeQuote) {
  1089. if (mb_strlen($attributeName) > 0) {
  1090. if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
  1091. $attributes[$attributeName] = $unescape ? html_entity_decode($attributeValue, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') : $attributeValue;
  1092. } else {
  1093. $attributes[$attributeName] = true;
  1094. }
  1095. }
  1096. $attributeName = '';
  1097. $attributeValue = '';
  1098. $attributeQuote = null;
  1099. };
  1100. $expect = $expectOpenBracket;
  1101. for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
  1102. $ch = mb_substr($line, $p, 1);
  1103. $isWhitespace = mb_eregi(self::$whitespaceCharRegex, $ch);
  1104. switch ($expect) {
  1105. case $expectOpenBracket:
  1106. if ($ch != '<') return null;
  1107. $expect = $expectCloserOrName;
  1108. break;
  1109. case $expectCloserOrName:
  1110. if ($ch == '/') {
  1111. $isCloser = true;
  1112. } else {
  1113. $p--;
  1114. }
  1115. $expect = $expectName;
  1116. break;
  1117. case $expectName:
  1118. if (mb_strlen($tagName) == 0) {
  1119. if (!mb_eregi(self::$htmlTagNameFirstRegex, $ch)) return null;
  1120. $tagName .= $ch;
  1121. } else {
  1122. if (mb_eregi(self::$htmlTagNameMedialRegex, $ch)) {
  1123. $tagName .= $ch;
  1124. } else {
  1125. $p--;
  1126. $expect = ($isCloser) ? $expectCloseBracket : $expectAttributeNameOrEnd;
  1127. }
  1128. }
  1129. break;
  1130. case $expectAttributeNameOrEnd:
  1131. if (mb_strlen($attributeName) == 0) {
  1132. if ($isWhitespace) {
  1133. // skip whitespace
  1134. } elseif ($ch == '/') {
  1135. $expect = $expectCloseBracket;
  1136. } elseif ($ch == '>') {
  1137. $fullTag = mb_substr($line, 0, $p + 1);
  1138. break;
  1139. } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
  1140. $attributeName .= $ch;
  1141. } else {
  1142. return null;
  1143. }
  1144. } elseif ($isWhitespace) {
  1145. $expect = $expectEqualsOrAttributeOrEnd;
  1146. } elseif ($ch == '/') {
  1147. $endAttribute();
  1148. $expect = $expectCloseBracket;
  1149. } elseif ($ch == '>') {
  1150. $endAttribute();
  1151. $fullTag = mb_substr($line, 0, $p + 1);
  1152. break;
  1153. } elseif ($ch == '=') {
  1154. $expect = $expectAttributeValue;
  1155. } elseif (mb_eregi(self::$htmlAttributeNameMedialRegex, $ch)) {
  1156. $attributeName .= $ch;
  1157. } else {
  1158. return null;
  1159. }
  1160. break;
  1161. case $expectEqualsOrAttributeOrEnd:
  1162. if ($ch == '=') {
  1163. $expect = $expectAttributeValue;
  1164. } elseif ($isWhitespace) {
  1165. // skip whitespace
  1166. } elseif ($ch == '/') {
  1167. $expect = $expectCloseBracket;
  1168. } elseif ($ch == '>') {
  1169. $fullTag = mb_substr($line, 0, $p + 1);
  1170. break;
  1171. } elseif (mb_eregi(self::$htmlAttributeNameFirstRegex, $ch)) {
  1172. $endAttribute();
  1173. $expect = $expectAttributeNameOrEnd;
  1174. $p--;
  1175. }
  1176. break;
  1177. case $expectAttributeValue:
  1178. if (mb_strlen($attributeValue) == 0) {
  1179. if ($attributeQuote === null) {
  1180. if ($isWhitespace) {
  1181. // skip whitespace
  1182. } elseif ($ch == '"' || $ch == "'") {
  1183. $attributeQuote = $ch;
  1184. } else {
  1185. $attributeQuote = ''; // explicitly unquoted
  1186. $p--;
  1187. }
  1188. } else {
  1189. if ($ch === $attributeQuote) {
  1190. // Empty string
  1191. $endAttribute($attributeQuote != '');
  1192. $expect = $expectAttributeNameOrEnd;
  1193. } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
  1194. return null;
  1195. } else {
  1196. $attributeValue .= $ch;
  1197. }
  1198. }
  1199. } else {
  1200. if ($ch === $attributeQuote) {
  1201. $endAttribute($attributeQuote != '');
  1202. $expect = $expectAttributeNameOrEnd;
  1203. } elseif ($attributeQuote === '' && $isWhitespace) {
  1204. $endAttribute();
  1205. $expect = $expectAttributeNameOrEnd;
  1206. } else {
  1207. $attributeValue .= $ch;
  1208. }
  1209. }
  1210. break;
  1211. case $expectCloseBracket:
  1212. if ($isWhitespace) {
  1213. // ignore whitespace
  1214. } elseif ($ch == '>') {
  1215. $fullTag = mb_substr($line, 0, $p + 1);
  1216. break;
  1217. }
  1218. break;
  1219. }
  1220. }
  1221. if ($fullTag === null) return null;
  1222. $endAttribute();
  1223. return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
  1224. }
  1225. }
  1226. /**
  1227. * Represents HTML modifications to a node, such as CSS classes to add or
  1228. * additional attributes. See `MDHTMLFilter.scrubModifier()` to remove disallowed
  1229. * values.
  1230. */
  1231. class MDTagModifier {
  1232. /**
  1233. * Verbatim markdown syntax. Unmodified by changes to other properties.
  1234. */
  1235. public string $original;
  1236. /** @var string[] */
  1237. public array $cssClasses = [];
  1238. public ?string $cssId = null;
  1239. public array $cssStyles = [];
  1240. public array $attributes = [];
  1241. private static $baseClassRegex = '\\.([a-z_\\-][a-z0-9_\\-]*?)';
  1242. private static $baseIdRegex = '#([a-z_\\-][a-z0-9_\\-]*?)';
  1243. private static $baseAttributeRegex = '([a-z0-9]+?)=([^\\s\\}]+?)';
  1244. private static $baseRegex = '\\{([^}]+?)}';
  1245. private static $leadingClassRegex = '^\\{([^}]+?)}';
  1246. private static $trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
  1247. private static $classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
  1248. private static $idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
  1249. private static $attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
  1250. public function applyTo(MDNode $node) {
  1251. if ($node instanceof MDNode) {
  1252. foreach ($this->cssClasses as $cssClass) {
  1253. $node->addClass($cssClass);
  1254. }
  1255. if ($this->cssId) $node->cssId = $this->cssId;
  1256. foreach ($this->attributes as $name => $value) {
  1257. $node->attributes[$name] = $value;
  1258. }
  1259. foreach ($this->cssStyles as $name => $value) {
  1260. $node->cssStyles[$name] = $value;
  1261. }
  1262. }
  1263. }
  1264. /**
  1265. * Adds a CSS class. If already present it will not be duplicated.
  1266. */
  1267. public function addClass(string $cssClass): bool {
  1268. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  1269. array_push($this->cssClasses, $cssClass);
  1270. return true;
  1271. }
  1272. /**
  1273. * Removes a CSS class.
  1274. */
  1275. public function removeClass(string $cssClass): bool {
  1276. $beforeLength = sizeof($this->cssClasses);
  1277. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  1278. return sizeof($this->cssClasses) != beforeLength;
  1279. }
  1280. public function equals($other): bool {
  1281. if (!($other instanceof MDTagModifier)) return false;
  1282. if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
  1283. if ($other->cssId !== $this->cssId) return false;
  1284. if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
  1285. return true;
  1286. }
  1287. public function __toString(): string {
  1288. return $this->original;
  1289. }
  1290. private static function styleToObject(string $styleValue): array {
  1291. $pairs = explode(';', $styleValue);
  1292. $styles = [];
  1293. foreach ($pairs as $pair) {
  1294. $keyAndValue = explode(':', $pair);
  1295. if (sizeof($keyAndValue) != 2) continue;
  1296. $styles[$keyAndValue[0]] = $keyAndValue[1];
  1297. }
  1298. return $styles;
  1299. }
  1300. private static function fromContents(string $contents): ?MDTagModifier {
  1301. $modifierTokens = mb_split('\\s+', $contents);
  1302. $mod = new MDTagModifier();
  1303. $mod->original = "{{$contents}}";
  1304. foreach ($modifierTokens as $token) {
  1305. if (trim($token) == '') continue;
  1306. if (mb_eregi(self::$classRegex, $token, $groups)) {
  1307. $mod->addClass($groups[1]);
  1308. } elseif (mb_eregi(self::$idRegex, $token, $groups)) {
  1309. $mod->cssId = $groups[1];
  1310. } elseif (mb_eregi(self::$attributeRegex, $token, $groups)) {
  1311. if ($groups[1] == 'style') {
  1312. $mod->cssStyles = self::styleToObject($groups[2]);
  1313. } else {
  1314. $mod->attributes[$groups[1]] = $groups[2];
  1315. }
  1316. } else {
  1317. return null;
  1318. }
  1319. }
  1320. return $mod;
  1321. }
  1322. /**
  1323. * Extracts block modifier from end of a line. Always returns a 2-element
  1324. * tuple array:
  1325. * - `0`: the line without the modifier
  1326. * - `1`: an `MDTagModifier` if found or `null` if not
  1327. *
  1328. * @param string $line
  1329. * @param ?MDState $state
  1330. * @return array tuple with remaining line and `MDTagModifier` or `null`
  1331. */
  1332. public static function fromLine(string $line, ?MDState $state): array {
  1333. if ($state) {
  1334. $found = false;
  1335. foreach ($state->root()->readersByBlockPriority as $reader) {
  1336. if ($reader instanceof MDModifierReader) {
  1337. $found = true;
  1338. break;
  1339. }
  1340. }
  1341. if (!$found) return [ $line, null ];
  1342. }
  1343. if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return [ $line, null ];
  1344. $bareLine = $groups[1];
  1345. $mod = self::fromContents($groups[2]);
  1346. return [ $bareLine, $mod ];
  1347. }
  1348. /**
  1349. * Attempts to extract modifier from head of string.
  1350. */
  1351. public static function fromStart(string $line): ?MDTagModifier {
  1352. if (!mb_eregi(self::$leadingClassRegex, $line, $groups)) return null;
  1353. return self::fromContents($groups[1]);
  1354. }
  1355. /**
  1356. * Discards any modifiers from a line and returns what remains.
  1357. */
  1358. public static function strip(string $line): string {
  1359. if (!mb_eregi(self::$trailingClassRegex, $line, $groups)) return $line;
  1360. return $groups[1];
  1361. }
  1362. }
  1363. // -- Readers ---------------------------------------------------------------
  1364. /**
  1365. * Base class for readers of various markdown syntax. A `Markdown` instance can
  1366. * be created with any combination of subclasses of these to customize the
  1367. * flavor of markdown parsed.
  1368. *
  1369. * @see {@link custom.md} for details on subclassing
  1370. */
  1371. class MDReader {
  1372. /**
  1373. * Called before processing begins. `state.lines` is populated and the
  1374. * line pointer `state.p` will be at `0`.
  1375. *
  1376. * Default implementation does nothing.
  1377. */
  1378. public function preProcess(MDState $state) {}
  1379. /**
  1380. * Attempts to read an `MDBlockNode` subclass at the current line pointer
  1381. * `state.p`. Only matches if the block pattern starts at the line pointer,
  1382. * not elsewhere in the `state.lines` array. If a block is found, `state.p`
  1383. * should be incremented to the next line _after_ the block structure and
  1384. * a `MDBlockNode` subclass instance is returned. If no block is found,
  1385. * returns `null`.
  1386. *
  1387. * Default implementation always returns `null`.
  1388. */
  1389. public function readBlock(MDState $state): ?MDBlockNode { return null; }
  1390. /**
  1391. * Attempts to read an inline token from the beginning of `line`. Only the
  1392. * start of the given `line` is considered. If a matching token is found, an
  1393. * `MDToken` is returned. Otherwise `null` is returned.
  1394. *
  1395. * Default implementation always returns `null`.
  1396. */
  1397. public function readToken(MDState $state, string $line): ?MDToken { return null; }
  1398. /**
  1399. * Attempts to find a pattern anywhere in `tokens` and perform a _single_
  1400. * in-place substitution with one or more `MDNode` subclass instances.
  1401. * If a substitution is performed, must return `true`, otherwise `false`.
  1402. *
  1403. * Default implementation always returns `false`.
  1404. *
  1405. * @param MDState $state
  1406. * @param int $pass what substitution pass this is, starting with 1
  1407. * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and `MDInlineNode` elements
  1408. * @return bool `true` if a substitution was performed, `false` if not
  1409. */
  1410. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return false; }
  1411. /**
  1412. * Called after all parsing has completed. An array `blocks` is passed of
  1413. * all the top-level `MDBlockNode` elements in the document which this
  1414. * method can traverse or alter in-place via `.splice` operations if
  1415. * necessary.
  1416. *
  1417. * `MDNode.visitChildren` is useful for recursively looking for certain
  1418. * `MDNode` instances. `MDNode.replaceNodes` is useful for swapping in
  1419. * replacements.
  1420. *
  1421. * Default implementation does nothing.
  1422. *
  1423. * @param MDState $state
  1424. * @param MDBlockNode[] $blocks
  1425. */
  1426. public function postProcess(MDState $state, array &$blocks) {}
  1427. /**
  1428. * Can be overridden to influence ordering of this reader with respect to
  1429. * another during the block parsing phase. Return `-1` to be ordered before
  1430. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1431. * Only return non-`0` values to resolve specific conflicts.
  1432. *
  1433. * Default implementation always returns `0` (no preference).
  1434. *
  1435. * @param MDReader $other
  1436. * @return int a negative, positive, or 0 value to be ordered before,
  1437. * after, or anwhere relative to `other`, respectively
  1438. */
  1439. public function compareBlockOrdering(MDReader $other): int {
  1440. return 0;
  1441. }
  1442. /**
  1443. * Can be overridden to influence ordering of this reader with respect to
  1444. * another during the tokenizing phase. Return `-1` to be ordered before
  1445. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1446. * Only return non-`0` values to resolve specific conflicts.
  1447. *
  1448. * Default implementation always returns `0` (no preference).
  1449. *
  1450. * @param MDReader $other
  1451. * @return int a negative, positive, or 0 value to be ordered before,
  1452. * after, or anwhere relative to `other`, respectively
  1453. */
  1454. public function compareTokenizeOrdering(MDReader $other): int {
  1455. return 0;
  1456. }
  1457. /**
  1458. * Can be overridden to influence ordering of this reader with respect to
  1459. * another during the substitution phase. Return `-1` to be ordered before
  1460. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1461. * Only return non-`0` values to resolve specific conflicts.
  1462. *
  1463. * Readers are sorted within each substitution pass. All pass 1 readers are
  1464. * processed first, then all pass 2 readers, etc. The number of passes this
  1465. * reader participates in is dictated by `substitionPassCount`.
  1466. *
  1467. * Default implementation always returns `0` (no preference).
  1468. *
  1469. * @param MDReader $other
  1470. * @param int $pass substitution pass, with numbering starting at `1`
  1471. * @return int a negative, positive, or 0 value to be ordered before,
  1472. * after, or anwhere relative to `other`, respectively
  1473. */
  1474. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  1475. return 0;
  1476. }
  1477. /**
  1478. * How many substitution passes this reader requires. Substitution allows
  1479. * all pass 1 readers to process first, then all pass 2 readers, etc.
  1480. */
  1481. public function substitutionPassCount(): int { return 1; }
  1482. /**
  1483. * For sorting readers with ordering preferences. The `compare` methods
  1484. * don't have the properties of normal sorting compares so need to sort
  1485. * differently.
  1486. *
  1487. * @param MDReader[] $arr array to sort
  1488. * @param callable $compareFn comparison function, taking two array element
  1489. * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
  1490. * respectively
  1491. * @param callable $idFn function for returning a unique hashable id for
  1492. * the array element
  1493. * @return MDReader[] sorted array
  1494. */
  1495. private static function kahnTopologicalSort(array $arr, callable $compareFn, callable $idFn): array {
  1496. $graph = [];
  1497. $inDegrees = [];
  1498. $valuesById = [];
  1499. // Build the graph and compute in-degrees
  1500. foreach ($arr as $elem) {
  1501. $id = $idFn($elem);
  1502. $graph[$id] = [];
  1503. $inDegrees[$id] = 0;
  1504. $valuesById[$id] = $elem;
  1505. }
  1506. for ($i = 0; $i < sizeof($arr); $i++) {
  1507. $elemA = $arr[$i];
  1508. $idA = $idFn($elemA);
  1509. for ($j = 0; $j < sizeof($arr); $j++) {
  1510. if ($i === $j) continue;
  1511. $elemB = $arr[$j];
  1512. $idB = $idFn($elemB);
  1513. $comparisonResult = $compareFn($elemA, $elemB);
  1514. if ($comparisonResult < 0) {
  1515. array_push($graph[$idA], push($idB));
  1516. $inDegrees[$idB]++;
  1517. } elseif ($comparisonResult > 0) {
  1518. array_push($graph[$idB], $idA);
  1519. $inDegrees[$idA]++;
  1520. }
  1521. }
  1522. }
  1523. // Initialize the queue with zero-inDegree nodes
  1524. $queue = [];
  1525. foreach ($inDegrees as $elemId) {
  1526. if ($inDegrees[$elemId] === 0) {
  1527. array_push($queue, $elemId);
  1528. }
  1529. }
  1530. // Process the queue and build the topological order list
  1531. $sorted = [];
  1532. while (sizeof($queue) > 0) {
  1533. $elemId = array_shift($queue);
  1534. array_push($sorted, $valuesById[$elemId]);
  1535. unset($valuesById[$elemId]);
  1536. foreach ($graph[$elemId] as $neighbor) {
  1537. $inDegrees[$neighbor]--;
  1538. if ($inDegrees[$neighbor] === 0) {
  1539. array_push($queue, $neighbor);
  1540. }
  1541. }
  1542. }
  1543. // Anything left over can go at the end. No ordering dependencies.
  1544. foreach ($valuesById as $elemId => $value) {
  1545. array_push($sorted, $value);
  1546. }
  1547. return $sorted;
  1548. }
  1549. /**
  1550. * Returns a sorted array of readers by their block priority preferences.
  1551. *
  1552. * @param MDReader[] $readers
  1553. * @return MDReader[] sorted readers
  1554. */
  1555. public static function sortReaderForBlocks(array &$readers) {
  1556. $sorted = $readers;
  1557. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1558. return $a->compareBlockOrdering($b);
  1559. }, fn($elem) => get_class($elem));
  1560. }
  1561. /**
  1562. * Returns a sorted array of readers by their tokenization priority preferences.
  1563. *
  1564. * @param MDReader[] $readers
  1565. * @return MDReader[] sorted readers
  1566. */
  1567. public static function sortReadersForTokenizing(array $readers): array {
  1568. $sorted = $readers;
  1569. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1570. return $a->compareTokenizeOrdering($b);
  1571. }, fn($elem) => get_class($elem));
  1572. }
  1573. /**
  1574. * Returns a sorted array of tuples (arrays) containing the substitution
  1575. * pass number and reader instance, sorted by their substitution priority
  1576. * preferences.
  1577. *
  1578. * For readers with `substitutionPassCount` > `1`, the same reader will
  1579. * appear multiple times in the resulting array, one per pass.
  1580. *
  1581. * @param MDReader[] $readers
  1582. * @return MDReader[] sorted array of tuples with the pass number and
  1583. * reader instance in each
  1584. */
  1585. public static function sortReadersForSubstitution(array $readers): array {
  1586. $tuples = [];
  1587. $maxPass = 1;
  1588. foreach ($readers as $reader) {
  1589. $passCount = $reader->substitutionPassCount();
  1590. for ($pass = 1; $pass <= $passCount; $pass++) {
  1591. array_push($tuples, [ $pass, $reader ]);
  1592. }
  1593. $maxPass = max($maxPass, $pass);
  1594. }
  1595. $result = [];
  1596. for ($pass = 1; $pass <= $maxPass; $pass++) {
  1597. $readersThisPass = array_filter(tuples, fn($tup) => $tup[0] == $pass);
  1598. $passResult = self::kahnTopologicalSort($readersThisPass, function(MDReader $a, MDReader $b): int {
  1599. $aReader = $a[1];
  1600. $bReader = $b[1];
  1601. return $aReader->compareSubstituteOrdering($bReader, $pass);
  1602. }, fn($elem) => get_class($elem[1]));
  1603. $result = array_merge($result, $passResult);
  1604. }
  1605. return $result;
  1606. }
  1607. }
  1608. /**
  1609. * Reads markdown blocks for headings denoted with the underline syntax.
  1610. *
  1611. * Supports `MDTagModifier` suffixes.
  1612. */
  1613. class MDUnderlinedHeadingReader extends MDReader {
  1614. public function readBlock(MDState $state): ?MDBlockNode {
  1615. $p = $state->p;
  1616. if (!$state->hasLines(2)) return null;
  1617. $modifier;
  1618. $contentLine = trim($state->lines[$p++]);
  1619. [$contentLine, $modifier] = MDTagModifier.fromLine(contentLine, state);
  1620. $underLine = trim($state->lines[$p++]);
  1621. if ($contentLine == '') return null;
  1622. if (mb_eregi('^=+$', $underLine)) {
  1623. $state->p = $p;
  1624. $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
  1625. if ($modifier) $modifier->applyTo($block);
  1626. return $block;
  1627. }
  1628. if (mb_eregi('^\-+$', $underLine)) {
  1629. $state->p = $p;
  1630. $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
  1631. if ($modifier) $modifier->applyTo($block);
  1632. return $block;
  1633. }
  1634. return null;
  1635. }
  1636. }
  1637. /**
  1638. * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
  1639. * to 6 are supported.
  1640. *
  1641. * Supports `MDTagModifier` suffixes.
  1642. */
  1643. class MDHashHeadingReader extends MDReader {
  1644. private static $hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content
  1645. public function readBlock(MDState $state): ?MDBlockNode {
  1646. $p = $state->p;
  1647. $line = $state->lines[$p++];
  1648. $modifier;
  1649. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1650. if (!mb_eregi(self::hashHeadingRegex, $line, $groups)) return null;
  1651. $state->p = $p;
  1652. $level = mb_strlen($groups[1]);
  1653. $content = $groups[2];
  1654. $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
  1655. if ($modifier) $modifier->applyTo($block);
  1656. return $block;
  1657. }
  1658. }
  1659. /**
  1660. * Reads subtext blocks. Subtext is smaller, fainter text for things like
  1661. * disclaimers or sources.
  1662. *
  1663. * Supports `MDTagModifier` suffixes.
  1664. */
  1665. class MDSubtextReader extends MDReader {
  1666. private static $subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content
  1667. public function readBlock(MDState $state): ?MDBlockNode {
  1668. $p = $state->p;
  1669. $line = $state->lines[$p++];
  1670. $modifier;
  1671. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1672. if (!mb_eregi(self::subtextRegex, $line, $groups)) return null;
  1673. $state->p = $p;
  1674. $content = $groups[1];
  1675. $block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
  1676. if ($modifier) $modifier->applyTo($block);
  1677. return $block;
  1678. }
  1679. public function compareBlockOrdering(MDReader $other): int {
  1680. if ($other instanceof MDUnorderedListReader) {
  1681. return -1;
  1682. }
  1683. return 0;
  1684. }
  1685. }
  1686. /**
  1687. * Reads markdown blocks for blockquoted text.
  1688. */
  1689. class MDBlockQuoteReader extends MDReader {
  1690. public function readBlock(MDState $state): ?MDBlockNode {
  1691. $blockquoteLines = [];
  1692. $p = $state->p;
  1693. while ($p < sizeof($state->lines)) {
  1694. $line = $state->lines[$p++];
  1695. if (str_starts_with($line, ">")) {
  1696. array_push($blockquoteLines, $line);
  1697. } else {
  1698. break;
  1699. }
  1700. }
  1701. if (sizeof($blockquoteLines) == 0) return null;
  1702. $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '', mb_substr($line, 1)), $blockquoteLines);
  1703. $substate = $state->copy($contentLines);
  1704. $quotedBlocks = $substate->readBlocks();
  1705. $state->p = $p;
  1706. return new MDBlockquoteNode($quotedBlocks);
  1707. }
  1708. }
  1709. /**
  1710. * Internal abstract base class for ordered and unordered lists.
  1711. */
  1712. class _MDListReader extends MDReader {
  1713. private static function readItemLines(MDState $state, int $firstLineStartPos): array {
  1714. $p = $state->p;
  1715. $lines = [];
  1716. $seenBlankLine = false;
  1717. $stripTrailingBlankLines = true;
  1718. while ($state->hasLines(1, $p)) {
  1719. $isFirstLine = ($p == $state->p);
  1720. $line = $state->lines[$p++];
  1721. if ($isFirstLine) {
  1722. $line = mb_substr($line, $firstLineStartPos);
  1723. }
  1724. if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
  1725. // Found next list item
  1726. $stripTrailingBlankLines = false; // because this signals extra spacing intended
  1727. break;
  1728. }
  1729. $isBlankLine = trim($line) == '';
  1730. $isIndented = mb_eregi('^\\s+\\S', $line);
  1731. if ($isBlankLine) {
  1732. $seenBlankLine = true;
  1733. } elseif (!$isIndented && $seenBlankLine) {
  1734. // Post-list content
  1735. break;
  1736. }
  1737. array_push($lines, $line);
  1738. }
  1739. $lines = MDUtils::withoutTrailingBlankLines($lines);
  1740. return MDUtils::stripIndent($lines);
  1741. }
  1742. protected function readListItemContent(MDState $state, int $firstLineStartPos): MDBlockNode {
  1743. $itemLines = $this->readItemLines($state, $firstLineStartPos);
  1744. $state->p += max(sizeof($itemLines), 1);
  1745. if (sizeof($itemLines) == 1) {
  1746. return $state->inlineMarkdownToNode($itemLines[0]);
  1747. }
  1748. $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
  1749. if ($hasBlankLines) {
  1750. $substate = $state->copy($itemLines);
  1751. $blocks = $substate->readBlocks();
  1752. return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
  1753. }
  1754. // Multiline content with no blank lines. Search for new block
  1755. // boundaries without the benefit of a blank line to demarcate it.
  1756. for ($p = 1; $p < sizeof($itemLines); $p++) {
  1757. $line = $itemLines[p];
  1758. if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
  1759. // Nested list found
  1760. $firstBlock = $state->inlineMarkdownToNode(implode("\n", array_slice($itemLines, 0, $p)));
  1761. $substate = $state->copy(array_slice($itemLines, $p));
  1762. $blocks = $substate->readBlocks();
  1763. return array_merge([ $firstBlock, $blocks ]);
  1764. }
  1765. }
  1766. // Ok, give up and just do a standard block read
  1767. {
  1768. $substate = $state->copy($itemLines);
  1769. $blocks = $substate->readBlocks();
  1770. return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
  1771. }
  1772. }
  1773. public function readBlock(MDState $state): ?MDBlockNode {
  1774. throw new Error(`Abstract readBlock must be overridden in ${this.constructor.name}`);
  1775. }
  1776. }
  1777. /**
  1778. * Block reader for unordered (bulleted) lists.
  1779. */
  1780. class MDUnorderedListReader extends _MDListReader {
  1781. private static string $unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content
  1782. private function readUnorderedListItem(MDState $state): ?MDListItemNode {
  1783. $p = $state->p;
  1784. $line = $state->lines[$p];
  1785. if (!mb_eregi(self::$unorderedListRegex, $line, $groups)) return null;
  1786. $firstLineOffset = mb_strlen($groups[1]);
  1787. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
  1788. }
  1789. public function readBlock(MDState $state): ?MDBlockNode {
  1790. $items = [];
  1791. $item = null;
  1792. do {
  1793. $item = $this->readUnorderedListItem($state);
  1794. if ($item) array_push($items, $item);
  1795. } while ($item);
  1796. if (sizeof($items) == 0) return null;
  1797. return new MDUnorderedListNode($items);
  1798. }
  1799. }
  1800. /**
  1801. * Block reader for ordered (numbered) lists. The number of the first item is
  1802. * used to begin counting. The subsequent items increase by 1, regardless of
  1803. * their value.
  1804. */
  1805. class MDOrderedListReader extends _MDListReader {
  1806. private static string $orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content
  1807. private function readOrderedListItem(MDState $state): ?MDListItemNode {
  1808. $p = $state->p;
  1809. $line = $state->lines[$p];
  1810. if (!mb_eregi(self::$orderedListRegex, $line, $groups)) return null;
  1811. $ordinal = intval($groups[1]);
  1812. $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
  1813. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
  1814. }
  1815. public function readBlock(MDState $state): ?MDBlockNode {
  1816. $items = [];
  1817. $item = null;
  1818. do {
  1819. $item = $this->readOrderedListItem($state);
  1820. if ($item) array_push($items, $item);
  1821. } while ($item);
  1822. if (sizeof($items)) return null;
  1823. return new MDOrderedListNode($items, $items[0]->ordinal);
  1824. }
  1825. }
  1826. /**
  1827. * Block reader for code blocks denoted by pairs of triple tickmarks. If
  1828. * a programming language name, _xyz_, immediately follows the backticks, a
  1829. * `language-xyz` CSS class will be added to the resulting `<code>`
  1830. * element.
  1831. *
  1832. * Supports `MDTagModifier` suffix.
  1833. */
  1834. class MDFencedCodeBlockReader extends MDReader {
  1835. public function readBlock(MDState $state): ?MDBlockNode {
  1836. if (!$state->hasLines(2)) return null;
  1837. $p = $state->p;
  1838. $openFenceLine = $state->lines[$p++];
  1839. [$openFenceLine, $modifier] = MDTagModifier->fromLine($openFenceLine, $state);
  1840. if (!mb_eregi('```\s*([a-z0-9]*)\s*$', $openFenceLine, $groups)) return null;
  1841. $language = mb_strlen($groups[1]) > 0 ? $groups[1] : null;
  1842. $codeLines = [];
  1843. while ($state->hasLines(1, $p)) {
  1844. $line = $state->lines[$p++];
  1845. if (trim($line) == '```') {
  1846. $state->p = $p;
  1847. $block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
  1848. if ($modifier) $modifier->applyTo($block);
  1849. return $block;
  1850. }
  1851. array_push($codeLines, $line);
  1852. }
  1853. return null;
  1854. }
  1855. }
  1856. /**
  1857. * Block reader for code blocks denoted by indenting text.
  1858. */
  1859. class MDIndentedCodeBlockReader extends MDReader {
  1860. public function readBlock(MDState $state): ?MDBlockNode {
  1861. $p = $state->p;
  1862. $codeLines = [];
  1863. while ($state->hasLines(1, $p)) {
  1864. $line = $state->lines[$p++];
  1865. if (MDUtils::countIndents($line, true) < 1) {
  1866. $p--;
  1867. break;
  1868. }
  1869. array_push($codeLines, MDUtils::stripIndent($line));
  1870. }
  1871. if (sizeof($codeLines) == 0) return null;
  1872. $state->p = $p;
  1873. return new MDCodeBlockNode(implode("\n", $codeLines));
  1874. }
  1875. }
  1876. /**
  1877. * Block reader for horizontal rules. Composed of three or more hypens or
  1878. * asterisks on a line by themselves, with or without intermediate whitespace.
  1879. */
  1880. class MDHorizontalRuleReader extends MDReader {
  1881. private static string $horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
  1882. public function readBlock(MDState $state): ?MDBlockNode {
  1883. $p = $state->p;
  1884. $line = $state->lines[$p++];
  1885. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1886. if (mb_eregi(self::horizontalRuleRegex, $line)) {
  1887. $state->p = $p;
  1888. $block = new MDHorizontalRuleNode();
  1889. if ($modifier) $modifier->applyTo($block);
  1890. return $block;
  1891. }
  1892. return null;
  1893. }
  1894. public function compareBlockOrdering(MDReader $other): int {
  1895. if ($other instanceof MDUnorderedListReader) {
  1896. return -1;
  1897. }
  1898. return 0;
  1899. }
  1900. }
  1901. /**
  1902. * Block reader for tables.
  1903. *
  1904. * Supports `MDTagModifier` suffix.
  1905. */
  1906. class MDTableReader extends MDReader {
  1907. private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
  1908. if (!$state->hasLines(1)) return null;
  1909. $p = $state->p;
  1910. $line = MDTagModifier::strip(trim($state->lines[$p++]));
  1911. if (!mb_eregi('.*\\|.*', $line)) return null;
  1912. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1913. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1914. $cellTokens = explode('|', $line);
  1915. $cells = array_map(function($token) use ($isHeader) {
  1916. $content = $state->inlineMarkdownToNode(trim($token));
  1917. return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
  1918. }, $cellTokens);
  1919. $state->p = $p;
  1920. return new MDTableRowNode($cells);
  1921. }
  1922. /**
  1923. * @param string $line
  1924. * @return string[]
  1925. */
  1926. private function parseColumnAlignments(string $line): array {
  1927. $line = trim($line);
  1928. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1929. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1930. return array_map(function($token) {
  1931. if (str_starts_with($token, ':')) {
  1932. if (str_ends_with($token, ':')) {
  1933. return 'center';
  1934. }
  1935. return 'left';
  1936. } elseif (str_ends_with($token, ':')) {
  1937. return 'right';
  1938. }
  1939. return null;
  1940. }, mb_split('\\s*\\|\\s*', $line));
  1941. }
  1942. private static string $tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
  1943. public function readBlock(MDState $state): ?MDBlockNode {
  1944. if (!$state->hasLines(2)) return null;
  1945. $startP = $state->p;
  1946. $firstLine = $state->lines[$startP];
  1947. $modifier = MDTagModifier::fromLine($firstLine, $state)[1];
  1948. $headerRow = $this->readTableRow($state, true);
  1949. if ($headerRow === null) {
  1950. $state->p = $startP;
  1951. return null;
  1952. }
  1953. $dividerLine = $state->lines[$state->p++];
  1954. if (!mb_eregi(self::$tableDividerRegex, $dividerLine, $dividerGroups)) {
  1955. $state->p = $startP;
  1956. return null;
  1957. }
  1958. $columnAlignments = $this->parseColumnAlignments($dividerLine);
  1959. $bodyRows = [];
  1960. while ($state->hasLines(1)) {
  1961. $row = $this->readTableRow($state, false);
  1962. if ($row === null) break;
  1963. array_push($bodyRows, $row);
  1964. }
  1965. $table = new MDTableNode($headerRow, $bodyRows);
  1966. $table->columnAlignments = $columnAlignments;
  1967. if ($modifier) $modifier->applyTo($table);
  1968. return $table;
  1969. }
  1970. }
  1971. /**
  1972. * Block reader for definition lists. Definitions go directly under terms starting
  1973. * with a colon.
  1974. */
  1975. class MDDefinitionListReader extends MDReader {
  1976. public function readBlock(MDState $state): ?MDBlockNode {
  1977. $p = $state->p;
  1978. $groups;
  1979. $termCount = 0;
  1980. $definitionCount = 0;
  1981. $defLines = [];
  1982. while ($state->hasLines(1, $p)) {
  1983. $line = $state->lines[$p++];
  1984. if (trim($line) === '') {
  1985. break;
  1986. }
  1987. if (mb_eregi('^\\s+', $line)) {
  1988. if (sizeof($defLines) == 0) return null;
  1989. $defLines[sizeof($defLines) - 1] .= "\n" . $line;
  1990. } elseif (mb_eregi('^:\\s+', $line)) {
  1991. array_push($defLines, $line);
  1992. $definitionCount++;
  1993. } else {
  1994. array_push($defLines, $line);
  1995. $termCount++;
  1996. }
  1997. }
  1998. if ($termCount == 0 || $definitionCount == 0) return null;
  1999. $blocks = array_map(function($line) {
  2000. if (mb_eregi('^:\\s+(.*?)$', $line)) {
  2001. return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
  2002. } else {
  2003. return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
  2004. }
  2005. }, $defLines);
  2006. $state->p = $p;
  2007. return new MDDefinitionListNode($blocks);
  2008. }
  2009. }
  2010. /**
  2011. * Block reader for defining footnote contents. Footnotes can be defined anywhere
  2012. * in the document but will always be rendered at the end of a page or end of
  2013. * the document.
  2014. */
  2015. class MDFootnoteReader extends MDReader {
  2016. private static string $footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
  2017. private static string $footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
  2018. /**
  2019. * @param MDState $state
  2020. * @param string $symbol
  2021. * @param MDNode[] $footnote
  2022. */
  2023. private function defineFootnote(MDState $state, string $symbol, array $footnote) {
  2024. $footnotes = $state->root()['footnotes'] ?? [];
  2025. $footnotes[$symbol] = $footnote;
  2026. $state->root()['footnotes'] = $footnotes;
  2027. }
  2028. private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
  2029. $footnoteInstances = $state->root()['footnoteInstances'];
  2030. $instances = $footnoteInstances[$symbol] ?? [];
  2031. array_push($instances, $unique);
  2032. $footnoteInstances[$symbol] = $instances;
  2033. }
  2034. private function idForFootnoteSymbol(MDState $state, string $symbol): int {
  2035. $footnoteIds = $state->root()['footnoteIds'];
  2036. $existing = $footnoteIds[$symbol];
  2037. if ($existing) return $existing;
  2038. $nextFootnoteId = $state->root()['nextFootnoteId'];
  2039. $id = $nextFootnoteId++;
  2040. $footnoteIds[$symbol] = $id;
  2041. $state->root()['nextFootnoteId'] = $nextFootnoteId;
  2042. return $id;
  2043. }
  2044. public function preProcess(MDState $state) {
  2045. $state->root()['footnoteInstances'] = [];
  2046. $state->root()['footnotes'] = [];
  2047. $state->root()['footnoteIds'] = [];
  2048. $state->root()['nextFootnoteId'] = 1;
  2049. }
  2050. public function readBlock(MDState $state): ?MDBlockNode {
  2051. $p = $state->p;
  2052. if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
  2053. $symbol = $groups[1];
  2054. $def = $groups[2];
  2055. while ($state->hasLines(1, $p)) {
  2056. $line = $state->lines[$p++];
  2057. if (mb_eregi('^\\s+', $line)) {
  2058. $def += "\n" . $line;
  2059. } else {
  2060. $p--;
  2061. break;
  2062. }
  2063. }
  2064. $content = $state->inlineMarkdownToNodes($def);
  2065. $this->defineFootnote($state, $symbol, $content);
  2066. $state->p = $p;
  2067. return new MDNode(); // empty
  2068. }
  2069. public function readToken(MDState $state, string $line): ?MDToken {
  2070. $groups;
  2071. if (mb_eregi(self::$footnoteWithTitleRegex, $line, $groups)) {
  2072. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
  2073. }
  2074. if (mb_eregi(MDFootnoteReader::footnoteRegex, $line, $groups)) {
  2075. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
  2076. }
  2077. return null;
  2078. }
  2079. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2080. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
  2081. $symbol = $match->tokens[0]->content;
  2082. array_splice($tokens, $match->index, 1, new MDFootnoteNode($symbol));
  2083. return true;
  2084. }
  2085. return false;
  2086. }
  2087. /**
  2088. * @param {MDState} state
  2089. * @param {MDBlockNode[]} blocks
  2090. */
  2091. public function postProcess(MDState $state, array &$blocks) {
  2092. $nextOccurrenceId = 1;
  2093. foreach ($blocks as $block) {
  2094. $block->visitChildren(function($node) use (&$nextOccurrenceId) {
  2095. if (!($node instanceof MDFootnoteNode)) return;
  2096. $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
  2097. $node->occurrenceId = $nextOccurrenceId++;
  2098. $node->displaySymbol = strval($node->footnoteId);
  2099. $this->$registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
  2100. });
  2101. }
  2102. if (sizeof($state->footnotes) == 0) return;
  2103. array_push($blocks, new MDFootnoteListNode());
  2104. }
  2105. public function compareBlockOrdering(MDReader $other): int {
  2106. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2107. return -1;
  2108. }
  2109. return 0;
  2110. }
  2111. public function compareTokenizeOrdering(MDReader $other): int {
  2112. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2113. return -1;
  2114. }
  2115. return 0;
  2116. }
  2117. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2118. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2119. return -1;
  2120. }
  2121. return 0;
  2122. }
  2123. }
  2124. /**
  2125. * Block reader for abbreviation definitions. Anywhere the abbreviation appears
  2126. * in plain text will have its definition available when hovering over it.
  2127. * Definitions can appear anywhere in the document. Their content should only
  2128. * contain simple text, not markdown.
  2129. */
  2130. class MDAbbreviationReader extends MDReader {
  2131. private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
  2132. $state->root()->abbreviations[$abbreviation] = $definition;
  2133. $regex = "\\b(" . preg_quote($abbreviation) . ")\\b";
  2134. $state->root()->abbreviationRegexes[$abbreviation] = $regex;
  2135. }
  2136. public function preProcess(MDState $state) {
  2137. $state->root()['abbreviations'] = [];
  2138. $state->root()['abbreviationRegexes'] = [];
  2139. }
  2140. public function readBlock(MDState $state): ?MDBlockNode {
  2141. $p = $state->p;
  2142. $line = $state->lines[$p++];
  2143. if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
  2144. $abbrev = $groups[1];
  2145. $def = $groups[2];
  2146. $this->defineAbbreviation($state, $abbrev, $def);
  2147. $state->p = $p;
  2148. return new MDNode(); // empty
  2149. }
  2150. /**
  2151. * @param MDState $state
  2152. * @param MDNode[] $blocks
  2153. */
  2154. public function postProcess(MDState $state, array &$blocks) {
  2155. $abbreviations = $state->root()['abbreviations'];
  2156. $regexes = $state->root()['abbreviationRegexes'];
  2157. MDNode::replaceNodes($state, $blocks, function($original) {
  2158. if (!($original instanceof MDTextNode)) return null;
  2159. $changed = false;
  2160. $elems = [ $original->text ]; // mix of strings and MDNodes
  2161. for ($i = 0; $i < sizeof($elems); $i++) {
  2162. $text = $elems[i];
  2163. if (!is_string($text)) continue;
  2164. foreach ($abbreviations as $abbreviation) {
  2165. $index = strpos($text, $abbreviation);
  2166. if ($index === false) break;
  2167. $prefix = substr($text, 0, $index);
  2168. $suffix = substr($text, $index + strlen($abbreviation));
  2169. $definition = $abbreviations[$abbreviation];
  2170. array_splice($elems, $i, 1, [ $prefix, new MDAbbreviationNode($abbreviation, $definition), $suffix ]);
  2171. $i = -1; // start over
  2172. $changed = true;
  2173. break;
  2174. }
  2175. }
  2176. if (!$changed) return null;
  2177. $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem);
  2178. return new MDNode($nodes);
  2179. });
  2180. }
  2181. }
  2182. /**
  2183. * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
  2184. * whitespace-only) line. This reader is prioritized after every other reader
  2185. * since there is no distinguishing syntax.
  2186. */
  2187. class MDParagraphReader extends MDReader {
  2188. public function readBlock(MDState $state): ?MDBlockNode {
  2189. $paragraphLines = [];
  2190. $p = $state->p;
  2191. while ($state->hasLines(1, $p)) {
  2192. $line = $state->lines[$p++];
  2193. if (trim($line) === '') {
  2194. break;
  2195. }
  2196. array_push($paragraphLines, $line);
  2197. }
  2198. if ($state->p == 0 && $p >= sizeof($state->lines)) {
  2199. // If it's the entire document don't wrap it in a paragraph
  2200. return null;
  2201. }
  2202. if (sizeof($paragraphLines) > 0) {
  2203. $state->p = $p;
  2204. $content = implode("\n", $paragraphLines);
  2205. return new MDParagraphNode($state->inlineMarkdownToNodes($content));
  2206. }
  2207. return null;
  2208. }
  2209. public function compareBlockOrdering(MDReader $other): int {
  2210. return 1; // always dead last
  2211. }
  2212. }
  2213. /**
  2214. * Abstract base class for readers that look for one or two delimiting tokens
  2215. * on either side of some content. E.g. `**strong**`.
  2216. */
  2217. class MDSimplePairInlineReader extends MDReader {
  2218. // Passes:
  2219. // 1. Syntaxes with two delimiting tokens, interior tokens of the same
  2220. // kind must be even in number
  2221. // 2. Syntaxes with one delimiting token, interior tokens of the same
  2222. // kind must be even in number
  2223. // 3. Syntaxes with two delimiting tokens, any tokens inside
  2224. // 4. Syntaxes with one delimiting token, any tokens inside
  2225. public function substitutionPassCount(): int { return 4; }
  2226. /**
  2227. * Attempts a substitution of a matched pair of delimiting token types.
  2228. * If successful, the substitution is performed on `tokens` and `true` is
  2229. * returned, otherwise `false` is returned and the array is untouched.
  2230. *
  2231. * If `this.substitutionPassCount` is greater than 1, the first pass
  2232. * will reject matches with the delimiting character inside the content
  2233. * tokens. If the reader uses a single pass or a subsequent pass is performed
  2234. * with multiple pass any contents will be accepted.
  2235. *
  2236. * @param MDState $state
  2237. * @param int $pass pass number, starting with `1`
  2238. * @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on
  2239. * @param string $nodeClass class of the node to return if matched
  2240. * @param MDTokenType $delimiter delimiting token
  2241. * @param int $count how many times the token is repeated to form the delimiter
  2242. * @param bool $plaintext whether to invoke `nodeClass` with a verbatim
  2243. * content string instead of parsed `MDNode`s
  2244. * @return bool `true` if substitution was performed, `false` if not
  2245. */
  2246. public function attemptPair(MDState $state, int $pass, array &$tokens, string $nodeClass, MDTokenType $delimiter, int $count=1, bool $plaintext=false): bool {
  2247. // We do four passes. #1: doubles without inner tokens, #2: singles
  2248. // without inner tokens, #3: doubles with paired inner tokens,
  2249. // #4: singles with paired inner tokens
  2250. if ($count == 1 && $pass != 2 && $pass != 4) return false;
  2251. if ($count > 1 && $pass != 1 && $pass != 3) return false;
  2252. $delimiters = [];
  2253. array_fill(0, $count, $delimiter);
  2254. $isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1;
  2255. $match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters, function($content) {
  2256. $firstType = $content[0] instanceof MDToken ? $content[0]->type : null;
  2257. $lastType = $content[sizeof($content) - 1] instanceof MDToken ? $content[sizeof($content) - 1]->type : null;
  2258. if ($firstType == MDTokenType::Whitespace) return false;
  2259. if ($lastType == MDTokenType::Whitespace) return false;
  2260. foreach ($content as $token) {
  2261. // Don't allow nesting
  2262. if (get_class($token) == $nodeClass) return false;
  2263. }
  2264. if ($isFirstOfMultiplePasses) {
  2265. $innerCount = 0;
  2266. foreach ($content as $token) {
  2267. if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++;
  2268. }
  2269. if (($innerCount % 2) != 0) return false;
  2270. }
  2271. return true;
  2272. });
  2273. if ($match === null) return false;
  2274. $content = ($plaintext)
  2275. ? implode('', array_map(fn($token) => $token->original, $match->contentTokens))
  2276. : $state->tokensToNodes($match->contentTokens);
  2277. $ref = new ReflectionClass($nodeClass);
  2278. $node = $ref->newInstanceArgs([ $content ]);
  2279. array_splice($tokens, $match->startIndex, $match->totalLength, [ $node ]);
  2280. return true;
  2281. }
  2282. }
  2283. /**
  2284. * Reader for emphasis syntax. Denoted with a single underscore on either side of
  2285. * some text (preferred) or a single asterisk on either side.
  2286. */
  2287. class MDEmphasisReader extends MDSimplePairInlineReader {
  2288. public function readToken(MDState $state, string $line): ?MDToken {
  2289. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2290. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2291. return null;
  2292. }
  2293. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2294. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true;
  2295. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true;
  2296. return false;
  2297. }
  2298. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2299. if ($other instanceof MDStrongReader) {
  2300. return 1;
  2301. }
  2302. return 0;
  2303. }
  2304. }
  2305. /**
  2306. * Reader for strong syntax. Denoted with two asterisks on either side of some
  2307. * text (preferred) or two underscores on either side. Note that if
  2308. * `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
  2309. */
  2310. class MDStrongReader extends MDSimplePairInlineReader {
  2311. public function readToken(MDState $state, string $line): ?MDToken {
  2312. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2313. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2314. return null;
  2315. }
  2316. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2317. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true;
  2318. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true;
  2319. return false;
  2320. }
  2321. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2322. if ($other instanceof MDEmphasisReader) {
  2323. return -1;
  2324. }
  2325. return 0;
  2326. }
  2327. }
  2328. /**
  2329. * Reader for strikethrough syntax. Consists of two tildes on either side of
  2330. * some text (preferred) or single tildes on either side. Note that if
  2331. * `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
  2332. *
  2333. * The number of recognized tildes can be configured.
  2334. */
  2335. class MDStrikethroughReader extends MDSimplePairInlineReader {
  2336. /** @type {boolean} */
  2337. public bool $singleTildeEnabled = true;
  2338. /** @type {boolean} */
  2339. public bool $doubleTildeEnabled = true;
  2340. public function readToken(MDState $state, string $line): ?MDToken {
  2341. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2342. return null;
  2343. }
  2344. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2345. if ($this->singleTildeEnabled) {
  2346. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true;
  2347. }
  2348. if ($this->doubleTildeEnabled) {
  2349. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true;
  2350. }
  2351. return false;
  2352. }
  2353. }
  2354. /**
  2355. * Reader for underline syntax. Consists of two underscores on either side of
  2356. * some text. If used with `MDStrongReader` which also looks for double
  2357. * underscores, this reader will take priority.
  2358. */
  2359. class MDUnderlineReader extends MDSimplePairInlineReader {
  2360. public function readToken(MDState $state, string $line): ?MDToken {
  2361. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2362. return null;
  2363. }
  2364. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2365. return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2);
  2366. }
  2367. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2368. if ($other instanceof MDStrongReader) {
  2369. return -1;
  2370. }
  2371. return 0;
  2372. }
  2373. }
  2374. /**
  2375. * Reader for highlight syntax. Consists of pairs of equal signs on either side
  2376. * of some text.
  2377. */
  2378. class MDHighlightReader extends MDSimplePairInlineReader {
  2379. public function readToken(MDState $state, string $line): ?MDToken {
  2380. if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal);
  2381. return null;
  2382. }
  2383. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2384. return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2);
  2385. }
  2386. }
  2387. /**
  2388. * Reader for inline code syntax. Consists of one or two delimiting backticks
  2389. * around text. The contents between the backticks will be rendered verbatim,
  2390. * ignoring any inner markdown syntax. To include a backtick inside, escape it
  2391. * with a backslash.
  2392. */
  2393. class MDCodeSpanReader extends MDSimplePairInlineReader {
  2394. public function readToken(MDState $state, string $line): ?MDToken {
  2395. if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick);
  2396. return null;
  2397. }
  2398. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2399. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true;
  2400. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true;
  2401. }
  2402. }
  2403. /**
  2404. * Reader for subscript syntax. Consists of single tildes on either side of
  2405. * some text. If used with `MDStrikethroughReader`, this reader will take
  2406. * precedence, and strikethrough can only be done with double tildes.
  2407. */
  2408. class MDSubscriptReader extends MDSimplePairInlineReader {
  2409. public function readToken(MDState $state, string $line): ?MDToken {
  2410. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2411. return null;
  2412. }
  2413. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2414. return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde);
  2415. }
  2416. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2417. if ($other instanceof MDStrikethroughReader) {
  2418. return -1;
  2419. }
  2420. return 0;
  2421. }
  2422. }
  2423. /**
  2424. * Reader for superscript syntax. Consists of single caret characters on either
  2425. * side of some text.
  2426. */
  2427. class MDSuperscriptReader extends MDSimplePairInlineReader {
  2428. public function readToken(MDState $state, string $line): ?MDToken {
  2429. if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret);
  2430. return null;
  2431. }
  2432. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2433. return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret);
  2434. }
  2435. }
  2436. /**
  2437. * Reads a hypertext link. Consists of link text between square brackets
  2438. * followed immediately by a URL in parentheses.
  2439. */
  2440. class MDLinkReader extends MDReader {
  2441. public function readToken(MDState $state, string $line): ?MDToken {
  2442. $simpleEmailRegex = "^<(" . MDUtils::$baseEmailRegex . ")>";
  2443. $simpleURLRegex = "^<(" . MDUtils::$baseURLRegex + ")>";
  2444. if ($groups = MDToken::tokenizeLabel($line)) {
  2445. return new MDToken($groups[0], MDTokenType::Label, $groups[1]);
  2446. }
  2447. if ($groups = MDToken::tokenizeEmail($line)) {
  2448. return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]);
  2449. }
  2450. if ($groups = MDToken::tokenizeURL($line)) {
  2451. return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]);
  2452. }
  2453. if (mb_eregi($simpleEmailRegex, $line, $groups)) {
  2454. return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]);
  2455. }
  2456. if (mb_eregi($simpleURLRegex, $line, $groups)) {
  2457. return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]);
  2458. }
  2459. return null;
  2460. }
  2461. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2462. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2463. $text = $match->tokens[0]->content;
  2464. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2465. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2466. array_splice($tokens, $match->index, sizeof($match->tokens), new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title));
  2467. return true;
  2468. }
  2469. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) {
  2470. $text = $match->tokens[0]->content;
  2471. $email = $match->tokens[sizeof($match->tokens) - 1]->content;
  2472. $url = "mailto:{$email}";
  2473. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2474. array_splice($tokens, $match->index, sizeof($match->tokens), new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title));
  2475. return true;
  2476. }
  2477. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) {
  2478. $token = $match->tokens[0];
  2479. $link = "mailto:{$token->content}";
  2480. $node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content));
  2481. array_splice($tokens, $match->index, 1, $node);
  2482. return true;
  2483. }
  2484. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) {
  2485. $token = $match->tokens[0];
  2486. $link = $token->content;
  2487. $node = new MDLinkNode($link, new MDTextNode($link));
  2488. array_splice($tokens, $match->index, 1, $node);
  2489. return true;
  2490. }
  2491. return false;
  2492. }
  2493. }
  2494. /**
  2495. * Reader for referential URL definitions. Consists of link text between square
  2496. * brackets followed immediately by a reference symbol also in square brackets.
  2497. * The URL can be defined elsewhere on a line by itself with the symbol in square
  2498. * brackets, colon, and the URL (and optional title in quotes).
  2499. */
  2500. class MDReferencedLinkReader extends MDLinkReader {
  2501. public function readBlock(MDState $state): ?MDBlockNode {
  2502. $p = $state->p;
  2503. $line = $state->lines[$p++];
  2504. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) {
  2505. $symbol = $groups[1];
  2506. $url = $groups[2];
  2507. $title = $groups[3];
  2508. } else {
  2509. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) {
  2510. $symbol = $groups[1];
  2511. $url = $groups[2];
  2512. } else {
  2513. return null;
  2514. }
  2515. }
  2516. $state->defineURL($symbol, $url, $title);
  2517. $state->p = $p;
  2518. return new MDNode([]); // empty
  2519. }
  2520. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2521. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2522. $text = $match->tokens[0]->content;
  2523. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2524. array_splice($tokens, $match->index, sizeof($match->tokens), new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text)));
  2525. return true;
  2526. }
  2527. return false;
  2528. }
  2529. }
  2530. /**
  2531. * Reader for images. Consists of an exclamation, alt text in square brackets,
  2532. * and image URL in parentheses.
  2533. */
  2534. class MDImageReader extends MDLinkReader {
  2535. public function readToken(MDState $state, string $line): ?MDToken {
  2536. $s = parent::readToken($state, $line);
  2537. if ($s) return $s;
  2538. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2539. return null;
  2540. }
  2541. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2542. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2543. $alt = $match->tokens[1]->content;
  2544. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2545. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2546. $node = new MDImageNode($url, $alt);
  2547. if ($title !== null) {
  2548. $node->attributes['title'] = $title;
  2549. }
  2550. array_splice($tokens, $match->index, sizeof($match->tokens), $node);
  2551. return true;
  2552. }
  2553. return false;
  2554. }
  2555. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2556. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2557. return -1;
  2558. }
  2559. return 0;
  2560. }
  2561. }
  2562. /**
  2563. * Reader for images with referential URL definitions. Consists of an
  2564. * exclamation, alt text in square brackets, and link symbol in square brackets.
  2565. * URL is defined the same as for `MDReferencedLinkReader`.
  2566. */
  2567. class MDReferencedImageReader extends MDReferencedLinkReader {
  2568. public function readToken(MDState $state, string $line): ?MDToken {
  2569. $s = parent::readToken($state, $line);
  2570. if ($s) return $s;
  2571. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2572. return null;
  2573. }
  2574. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2575. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang, MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2576. $alt = $match->tokens[1]->content;
  2577. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2578. array_splice($tokens, $match->index, sizeof($match->tokens), new MDReferencedImageNode($ref, $alt));
  2579. return true;
  2580. }
  2581. return false;
  2582. }
  2583. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2584. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2585. return -1;
  2586. }
  2587. return 0;
  2588. }
  2589. }
  2590. /**
  2591. * Converts line breaks within blocks into line breaks in the HTML. Not
  2592. * included in any of the default reader sets since most flavors ignore
  2593. * line breaks within blocks.
  2594. */
  2595. class MDLineBreakReader extends MDReader {
  2596. public function postProcess(MDState $state, array &$blocks) {
  2597. MDNode::replaceNodes($state, $blocks, function(MDNode $original) {
  2598. if (!($original instanceof MDTextNode)) return null;
  2599. $lines = explode("\n", $original->text);
  2600. if (sizeof($lines) == 1) return null;
  2601. $nodes = [];
  2602. foreach ($lines as $i => $line) {
  2603. if ($i > 0) {
  2604. array_push($nodes, new MDLineBreakNode());
  2605. }
  2606. array_push($nodes, new MDTextNode($line));
  2607. }
  2608. return new MDNode($nodes);
  2609. });
  2610. }
  2611. }
  2612. /**
  2613. * Reads a verbatim HTML tag, and if it passes validation by `MDState.tagFilter`,
  2614. * will be rendered in the final HTML document. Disallowed tags will be rendered
  2615. * as plain text in the resulting document.
  2616. */
  2617. class MDHTMLTagReader extends MDReader {
  2618. public function readToken(MDState $state, string $line): ?MDToken {
  2619. $tag = MDHTMLTag::fromLineStart($line, $state);
  2620. if ($tag === null) return null;
  2621. if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null;
  2622. $state->root()->tagFilter->scrubTag($tag);
  2623. return new MDToken($tag->original, MDTokenType::HTMLTag, $tag);
  2624. }
  2625. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2626. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) {
  2627. $tag = $match->tokens[0]->tag;
  2628. array_splice($tokens, $match->index, sizeof($match->tokens), new MDHTMLTagNode($tag));
  2629. return true;
  2630. }
  2631. return false;
  2632. }
  2633. }
  2634. /**
  2635. * Reads tag modifiers. Consists of curly braces with one or more CSS classes,
  2636. * IDs, or custom attributes separated by spaces to apply to the preceding
  2637. * node. Validation is performed on modifiers and only acceptable values are
  2638. * applied.
  2639. */
  2640. class MDModifierReader extends MDReader {
  2641. public function readToken(MDState $state, string $line): ?MDToken {
  2642. $modifier = MDTagModifier::fromStart($line);
  2643. if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier);
  2644. return null;
  2645. }
  2646. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2647. // Modifiers are applied elsewhere, and if they're not it's fine if they're
  2648. // rendered as the original syntax.
  2649. return false;
  2650. }
  2651. }
  2652. // -- Nodes -----------------------------------------------------------------
  2653. /**
  2654. * Base class for nodes in the assembled document tree.
  2655. */
  2656. class MDNode {
  2657. /**
  2658. * Array of CSS classes to add to the node when rendered as HTML.
  2659. * @var string[]
  2660. */
  2661. public array $cssClasses = [];
  2662. public ?string $cssId = null;
  2663. /**
  2664. * Mapping of CSS attributes to values.
  2665. * @var array
  2666. */
  2667. public array $cssStyles = [];
  2668. /**
  2669. * Mapping of arbitrary attributes and values to add to this node's top-level
  2670. * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
  2671. * `cssClasses`, `cssId`, and `cssStyles` instead.
  2672. * @var array
  2673. */
  2674. public array $attributes = [];
  2675. /**
  2676. * All child nodes in this node.
  2677. * @var MDNode[]
  2678. */
  2679. public array $children = [];
  2680. /**
  2681. * @param MDNode[]|MDNode $children
  2682. */
  2683. public function __construct(array|MDNode $children=[]) {
  2684. if (is_array($children)) {
  2685. foreach ($children as $elem) {
  2686. if (!($elem instanceof MDNode)) {
  2687. $thisClassName = get_class($this);
  2688. $elemClassName = get_class($elem);
  2689. throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got array with {$elemClassName} element");
  2690. }
  2691. }
  2692. $this->children = $children;
  2693. } else if ($children instanceof MDNode) {
  2694. $this->children = [ $children ];
  2695. } else {
  2696. $thisClassName = get_class($this);
  2697. $elemClassName = gettype($children) == 'object' ? get_class($children) : gettype($children);
  2698. throw new Error("{$thisClassName} expects children of type MDNode[] or MDNode, got {$elemClassName}");
  2699. }
  2700. }
  2701. /**
  2702. * Adds a CSS class. If already present it will not be duplicated.
  2703. */
  2704. public function addClass(string $cssClass): bool {
  2705. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  2706. array_push($this->cssClasses, $cssClass);
  2707. return true;
  2708. }
  2709. /**
  2710. * Removes a CSS class.
  2711. *
  2712. * @param {string} cssClass
  2713. * @returns {boolean} whether the class was present and removed
  2714. */
  2715. public function removeClass(string $cssClass): bool {
  2716. $beforeLength = sizeof($this->cssClasses);
  2717. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  2718. return sizeof($this->cssClasses) != $beforeLength;
  2719. }
  2720. /**
  2721. * Renders this node and any children as an HTML string. If the node has no
  2722. * content an empty string should be returned.
  2723. */
  2724. public function toHTML(MDState $state): string {
  2725. return MDNode::arrayToHTML($this->children, $state);
  2726. }
  2727. /**
  2728. * Renders this node and any children as a plain text string. The conversion
  2729. * should only render ordinary text, not attempt markdown-like formatting
  2730. * (e.g. list items should not be prefixed with asterisks, only have their
  2731. * content text returned). If the node has no renderable content an empty
  2732. * string should be returned.
  2733. */
  2734. public function toPlaintext(MDState $state): string {
  2735. return MDNode::arrayToPlaintext($this->children, $state);
  2736. }
  2737. /**
  2738. * Protected helper method that renders an HTML fragment of the attributes
  2739. * to apply to the root HTML tag representation of this node.
  2740. *
  2741. * Example result with a couple `cssClasses`, a `cssId`, and a custom
  2742. * `attributes` key-value pair:
  2743. *
  2744. * ```
  2745. * class="foo bar" id="baz" lang="en"
  2746. * ```
  2747. *
  2748. * The value includes a leading space if it's non-empty so that it can be
  2749. * concatenated directly after the tag name and before the closing `>`.
  2750. */
  2751. protected function htmlAttributes(): string {
  2752. $html = '';
  2753. if (sizeof($this->cssClasses) > 0) {
  2754. $classlist = implode(' ', $this->cssClasses);
  2755. $html .= " class=\"{$classList}\"";
  2756. }
  2757. if ($this->cssId !== null && mb_strlen($this->cssId) > 0) {
  2758. $html += " id=\"{$this->cssId}\"";
  2759. }
  2760. $styles = [];
  2761. foreach ($this->cssStyles as $key => $value) {
  2762. array_push($styles, "{$key}: {$value};");
  2763. }
  2764. if (sizeof($styles) > 0) {
  2765. $escaped = htmlspecialchars(implode(' ', $styles));
  2766. $html .= " style=\"{$escaped}\"";
  2767. }
  2768. foreach ($this->attributes as $key => $value) {
  2769. if ($key === 'class' || $key === 'id' || $key === 'style') continue;
  2770. $cleanKey = MDUtils::scrubAttributeName($key);
  2771. if (mb_strlen($cleanKey) == 0) continue;
  2772. $cleanValue = htmlspecialchars($value);
  2773. $html .= " {$cleanKey}=\"{$cleanValue}\"";
  2774. }
  2775. return $html;
  2776. }
  2777. /**
  2778. * Protected helper that renders and concatenates the HTML of all children
  2779. * of this node. Mostly for use by subclasses in their `toHTML`
  2780. * implementations.
  2781. */
  2782. protected function childHTML(MDState $state): string {
  2783. return MDNode::arrayToHTML($this->children, $state);
  2784. }
  2785. /**
  2786. * Protected helper that renders and concatenates the plaintext of all
  2787. * children of this node.
  2788. */
  2789. protected function childPlaintext(MDState $state): string {
  2790. return MDNode::arrayToPlaintext($this->children, $state);
  2791. }
  2792. /**
  2793. * Protected helper for rendering nodes represented by simple paired HTML
  2794. * tags. Custom CSS classes and attributes will be included in the result,
  2795. * and child content will be rendered between the tags.
  2796. */
  2797. protected function simplePairedTagHTML(MDState $state, string $tagName): string {
  2798. $openTagSuffix = $this->children[0] instanceof MDBlockNode ? "\n" : "";
  2799. $closeTagPrefix = $this->children[sizeof($this->children) - 1] instanceof MDBlockNode ? "\n" : '';
  2800. $closeTagSuffix = $this instanceof MDBlockNode ? "\n" : '';
  2801. $attr = $this->htmlAttributes();
  2802. $childHTML = $this->childHTML($state);
  2803. return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}</{$tagName}>{$closeTagSuffix}";
  2804. }
  2805. /**
  2806. * Calls the given callback function with every child node, recursively.
  2807. * Nodes are visited depth-first.
  2808. */
  2809. public function visitChildren(callable $fn) {
  2810. foreach ($this->children as $child) {
  2811. $fn($child);
  2812. $child->visitChildren($fn);
  2813. }
  2814. }
  2815. /**
  2816. * Helper for rendering and concatenating HTML from an array of `MDNode`s.
  2817. *
  2818. * @param MDNode[] $nodes
  2819. * @param MDState $state
  2820. * @return string HTML string
  2821. */
  2822. public static function arrayToHTML(array $nodes, MDState $state): string {
  2823. return implode('', array_map(function($node) {
  2824. return $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : '');
  2825. }, $nodes));
  2826. }
  2827. /**
  2828. * Helper for rendering and concatenating plaintext from an array of `MDNode`s.
  2829. *
  2830. * @param MDNode[] $nodes
  2831. * @param MDState $state
  2832. * @return string plaintext
  2833. */
  2834. public static function arrayToPlaintext(array $nodes, MDState $state): string {
  2835. return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes));
  2836. }
  2837. /**
  2838. * Recursively searches and replaces nodes in a tree. The given `replacer`
  2839. * is passed every node in the tree. If `replacer` returns a new `MDNode`
  2840. * the original will be replaced with it. If the function returns `null` no
  2841. * change will be made to that node. Traversal is depth-first.
  2842. *
  2843. * @param {MDState} state
  2844. * @param {MDNode[]} nodes
  2845. * @param {function} replacer - takes a node as an argument, returns either
  2846. * a new node or `null` to leave it unchanged
  2847. */
  2848. public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) {
  2849. for ($i = 0; $i < sizeof($nodes); $i++) {
  2850. $originalNode = $nodes[$i];
  2851. $replacement = $replacer($originalNode);
  2852. if ($replacement instanceof MDNode) {
  2853. array_splice($nodes, $i, 1, [$replacement]);
  2854. } else {
  2855. self::replaceNodes($state, $originalNode->children, $replacer);
  2856. }
  2857. }
  2858. }
  2859. }
  2860. /**
  2861. * Marker subclass that indicates a node represents block syntax.
  2862. */
  2863. class MDBlockNode extends MDNode {}
  2864. /**
  2865. * Paragraph block.
  2866. */
  2867. class MDParagraphNode extends MDBlockNode {
  2868. public function toHTML(MDState $state): string {
  2869. return $this->simplePairedTagHTML($state, 'p');
  2870. }
  2871. }
  2872. /**
  2873. * A heading block with a level from 1 to 6.
  2874. */
  2875. class MDHeadingNode extends MDBlockNode {
  2876. public int $level;
  2877. public function __construct(int $level, array $children) {
  2878. parent::__construct($children);
  2879. if (!is_int($level) || ($level < 1 || $level > 6)) {
  2880. $thisClassName = get_class($this);
  2881. throw new Error("{$thisClassName} requires heading level 1 to 6");
  2882. }
  2883. $this->level = $level;
  2884. }
  2885. public function toHTML(MDState $state): string {
  2886. return $this->simplePairedTagHTML($state, "h{$this->level}");
  2887. }
  2888. }
  2889. /**
  2890. * A sub-text block with smaller, less prominent text.
  2891. */
  2892. class MDSubtextNode extends MDBlockNode {
  2893. public function toHTML(MDState $state): string {
  2894. $this->addClass('subtext');
  2895. return $this->simplePairedTagHTML($state, 'div');
  2896. }
  2897. }
  2898. /**
  2899. * Node for a horizontal dividing line.
  2900. */
  2901. class MDHorizontalRuleNode extends MDBlockNode {
  2902. public function toHTML(MDState $state): string {
  2903. return "<hr" . $this->htmlAttributes() . ">";
  2904. }
  2905. }
  2906. /**
  2907. * A block quote, usually rendered indented from other text.
  2908. */
  2909. class MDBlockquoteNode extends MDBlockNode {
  2910. public function toHTML(MDState $state): string {
  2911. return $this->simplePairedTagHTML($state, 'blockquote');
  2912. }
  2913. }
  2914. /**
  2915. * A bulleted list. Contains `MDListItemNode` children.
  2916. */
  2917. class MDUnorderedListNode extends MDBlockNode {
  2918. /** @var MDListItemNode[] $children */
  2919. public function toHTML(MDState $state): string {
  2920. return $this->simplePairedTagHTML($state, 'ul');
  2921. }
  2922. }
  2923. /**
  2924. * A numbered list. Contains `MDListItemNode` children.
  2925. */
  2926. class MDOrderedListNode extends MDBlockNode {
  2927. /** @var MDListItemNode[] $children */
  2928. public int $startOrdinal;
  2929. /**
  2930. * @param MDListItemNode[] $children
  2931. * @param ?int $startOrdinal
  2932. */
  2933. public function __construct(array $children, ?int $startOrdinal=null) {
  2934. parent::__construct($children);
  2935. $this->startOrdinal = $startOrdinal;
  2936. }
  2937. public function toHTML(MDState $state): string {
  2938. if ($this->startOrdinal !== null && $this->startOrdinal != 1) {
  2939. $this->attributes['start'] = strval($this->startOrdinal);
  2940. }
  2941. return $this->simplePairedTagHTML($state, 'ol');
  2942. }
  2943. }
  2944. /**
  2945. * An item in a bulleted or numbered list.
  2946. */
  2947. class MDListItemNode extends MDBlockNode {
  2948. public int $ordinal;
  2949. /**
  2950. * @param MDNode|MDNode[] $children
  2951. * @param ?int $ordinal
  2952. */
  2953. public function __construct(array $children, ?int $ordinal=null) {
  2954. parent::__construct($children);
  2955. $this->ordinal = $ordinal;
  2956. }
  2957. public function toHTML(MDState $state): string {
  2958. return $this->simplePairedTagHTML($state, 'li');
  2959. }
  2960. }
  2961. /**
  2962. * A block of preformatted computer code. Inner markdown is ignored.
  2963. */
  2964. class MDCodeBlockNode extends MDBlockNode {
  2965. public string $text;
  2966. /**
  2967. * The programming language of the content.
  2968. */
  2969. public ?string $language;
  2970. public function __construct(string $text, ?string $language=null) {
  2971. super([]);
  2972. $this->text = $text;
  2973. $this->language = $language;
  2974. }
  2975. public function toHTML(MDState $state): string {
  2976. $languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : '';
  2977. return "<pre" . $this->htmlAttributes() . "><code{$languageModifier}>" +
  2978. MDUtils.escapeHTML($this->text) . "</code></pre>\n";
  2979. }
  2980. }
  2981. /**
  2982. * A table node with a single header row and any number of body rows.
  2983. *
  2984. * If modifying the rows, use the `headerRow` and `bodyRows` accessors,
  2985. * otherwise `children` may get out of sync.
  2986. */
  2987. class MDTableNode extends MDBlockNode {
  2988. /** @var MDTableRowNode[] $children */
  2989. public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; }
  2990. public function bodyRows(): array { return array_slice($this->children, 1); }
  2991. /**
  2992. * How to align each column. Columns beyond the length of the array or with
  2993. * corresponding `null` elements will have no alignment set. Values should
  2994. * be valid CSS `text-align` values.
  2995. *
  2996. * @var string[]
  2997. */
  2998. public array $columnAlignments = [];
  2999. /**
  3000. * @param MDTableRowNode $headerRow
  3001. * @param MDTableRowNode[] $bodyRows
  3002. */
  3003. public function __construct(MDTableRow $headerRow, array $bodyRows) {
  3004. parent::__construct(array_merge([ $headerRow ], $bodyRows));
  3005. }
  3006. public function applyAlignments() {
  3007. foreach ($this->children as $child) {
  3008. $this->applyAlignmentsToRow($child);
  3009. }
  3010. }
  3011. private function applyAlignmentsToRow(MDTableRowNode $row) {
  3012. foreach ($row->children as $columnIndex => $cell) {
  3013. $alignment = $this->columnAlignments[$columnIndex] ?? null;
  3014. $this->applyAlignmentToCell($cell, $alignment);
  3015. }
  3016. }
  3017. public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) {
  3018. if ($alignment) {
  3019. $cell->cssStyles['text-align'] = $alignment;
  3020. } else {
  3021. unset($cell->cssStyles['text-align']);
  3022. }
  3023. }
  3024. public function toHTML(MDState $state): string {
  3025. $this->applyAlignments();
  3026. $html = '';
  3027. $html .= "<table" . $this->htmlAttributes() . ">\n";
  3028. $html .= '<thead>\n';
  3029. $html .= $this->headerRow->toHTML($state) . "\n";
  3030. $html .= "</thead>\n";
  3031. $html .= "<tbody>\n";
  3032. $html .= MDNode::toHTML($this->bodyRows, $state) + "\n";
  3033. $html .= "</tbody>\n";
  3034. $html .= "</table>\n";
  3035. return $html;
  3036. }
  3037. }
  3038. /**
  3039. * Node for one row (header or body) in a table.
  3040. */
  3041. class MDTableRowNode extends MDBlockNode {
  3042. /** @var MDTableCellNode[] $children */
  3043. public function toHTML(MDState $state): string {
  3044. return $this->simplePairedTagHTML($state, 'tr');
  3045. }
  3046. }
  3047. /**
  3048. * Node for one cell in a table row.
  3049. */
  3050. class MDTableCellNode extends MDBlockNode {
  3051. public function toHTML(MDState $state): string {
  3052. return $this->simplePairedTagHTML($state, 'td');
  3053. }
  3054. }
  3055. /**
  3056. * Node for a header cell in a header table row.
  3057. */
  3058. class MDTableHeaderCellNode extends MDBlockNode {
  3059. public function toHTML(MDState $state): string {
  3060. return $this->simplePairedTagHTML($state, 'th');
  3061. }
  3062. }
  3063. /**
  3064. * Definition list with `MDDefinitionListTermNode` and
  3065. * `MDDefinitionListDefinitionNode` children.
  3066. */
  3067. class MDDefinitionListNode extends MDBlockNode {
  3068. public function toHTML(MDState $state): string {
  3069. return $this->simplePairedTagHTML($state, 'dl');
  3070. }
  3071. }
  3072. /**
  3073. * A word or term in a definition list.
  3074. */
  3075. class MDDefinitionListTermNode extends MDBlockNode {
  3076. public function toHTML(MDState $state): string {
  3077. return $this->simplePairedTagHTML($state, 'dt');
  3078. }
  3079. }
  3080. /**
  3081. * The definition of a word or term in a definition list. Should follow a
  3082. * definition term, or another definition to serve as an alternate.
  3083. */
  3084. class MDDefinitionListDefinitionNode extends MDBlockNode {
  3085. public function toHTML(MDState $state): string {
  3086. return $this->simplePairedTagHTML($state, 'dd');
  3087. }
  3088. }
  3089. /**
  3090. * Block at the bottom of a document listing all the footnotes with their
  3091. * content.
  3092. */
  3093. class MDFootnoteListNode extends MDBlockNode {
  3094. private function footnoteId(MDState $state, string $symbol): int {
  3095. $lookup = $state->root()['footnoteIds'];
  3096. if (!$lookup) return null;
  3097. return $lookup[$symbol] ?? null;
  3098. }
  3099. public function toHTML(MDState $state): string {
  3100. $footnotes = $state->footnotes;
  3101. $symbolOrder = array_keys($footnotes);
  3102. if (sizeof($footnotes) == 0) return '';
  3103. $footnoteUniques = $state->root()->footnoteInstances;
  3104. $html = '';
  3105. $html .= '<div class="footnotes">';
  3106. $html .= '<ol>';
  3107. foreach ($symbolOrder as $symbol) {
  3108. $content = $footnotes[$symbol];
  3109. if (!$content) continue;
  3110. $footnoteId = $this->footnoteId($state, $symbol);
  3111. $contentHTML = MDNode::toHTML($content, $state);
  3112. $html .= "<li value=\"{$footnoteId}\" id=\"{$state->root()->elementIdPrefix}footnote_{$footnoteId}\">{$contentHTML}";
  3113. $uniques = $footnoteUniques[$symbol];
  3114. if ($uniques) {
  3115. foreach ($uniques as $unique) {
  3116. $html .= " <a href=\"#{$state->root->elementIdPrefix}footnoteref_{$unique}\" class=\"footnote-backref\">↩︎</a>";
  3117. }
  3118. }
  3119. $html .= "</li>\n";
  3120. }
  3121. $html .= '</ol>';
  3122. $html .= '</div>';
  3123. return html;
  3124. }
  3125. public function toPlaintext(MDState $state): string {
  3126. $footnotes = $state->footnotes;
  3127. $symbolOrder = array_keys($footnotes);
  3128. if (sizeof($footnotes) == 0) return '';
  3129. $text = '';
  3130. foreach ($symbolOrder as $symbol) {
  3131. $content = $footnotes[$symbol];
  3132. if (!$content) continue;
  3133. $text .= "{$symbol}. " . $this->childPlaintext(state) . "\n";
  3134. }
  3135. return trim($text);
  3136. }
  3137. }
  3138. /**
  3139. * Marker subclass that indicates a node represents inline syntax.
  3140. */
  3141. class MDInlineNode extends MDNode {}
  3142. /**
  3143. * Contains plain text. Special HTML characters are escaped when rendered.
  3144. */
  3145. class MDTextNode extends MDInlineNode {
  3146. public string $text;
  3147. public function __construct(string $text) {
  3148. parent::__construct([]);
  3149. $this->text = $text;
  3150. }
  3151. public function toHTML(MDState $state): string {
  3152. return htmlentities($this->text);
  3153. }
  3154. public function toPlaintext(MDState $state): string {
  3155. return $this->text;
  3156. }
  3157. }
  3158. /**
  3159. * Contains plain text which is rendered with HTML entities when rendered to
  3160. * be marginally more difficult for web scapers to decipher. Used for
  3161. * semi-sensitive info like email addresses.
  3162. */
  3163. class MDObfuscatedTextNode extends MDTextNode {
  3164. public function toHTML(MDState $state): string {
  3165. return MDUtils::escapeObfuscated($this->text);
  3166. }
  3167. }
  3168. /**
  3169. * Emphasized (italicized) content.
  3170. */
  3171. class MDEmphasisNode extends MDInlineNode {
  3172. public function toHTML(MDState $state): string {
  3173. return $this->simplePairedTagHTML($state, 'em');
  3174. }
  3175. }
  3176. /**
  3177. * Strong (bold) content.
  3178. */
  3179. class MDStrongNode extends MDInlineNode {
  3180. public function toHTML(MDState $state): string {
  3181. return $this->simplePairedTagHTML($state, 'strong');
  3182. }
  3183. }
  3184. /**
  3185. * Content rendered with a line through it.
  3186. */
  3187. class MDStrikethroughNode extends MDInlineNode {
  3188. public function toHTML(MDState $state): string {
  3189. return $this->simplePairedTagHTML($state, 's');
  3190. }
  3191. }
  3192. /**
  3193. * Underlined content.
  3194. */
  3195. class MDUnderlineNode extends MDInlineNode {
  3196. public function toHTML(MDState $state): string {
  3197. return $this->simplePairedTagHTML($state, 'u');
  3198. }
  3199. }
  3200. /**
  3201. * Highlighted content. Usually rendered with a bright colored background.
  3202. */
  3203. class MDHighlightNode extends MDInlineNode {
  3204. public function toHTML(MDState $state): string {
  3205. return $this->simplePairedTagHTML($state, 'mark');
  3206. }
  3207. }
  3208. /**
  3209. * Superscripted content.
  3210. */
  3211. class MDSuperscriptNode extends MDInlineNode {
  3212. public function toHTML(MDState $state): string {
  3213. return $this->simplePairedTagHTML($state, 'sup');
  3214. }
  3215. }
  3216. /**
  3217. * Subscripted content.
  3218. */
  3219. class MDSubscriptNode extends MDInlineNode {
  3220. public function toHTML(MDState $state): string {
  3221. return $this->simplePairedTagHTML($state, 'sub');
  3222. }
  3223. }
  3224. /**
  3225. * Inline plaintext indicating computer code.
  3226. */
  3227. class MDCodeNode extends MDInlineNode {
  3228. public string $text;
  3229. public function __construct(string $text) {
  3230. parent::__construct([]);
  3231. $this->text = $text;
  3232. }
  3233. public function toHTML(MDState $state): string {
  3234. return "<code" . $this->htmlAttributes() . ">" . MDUtils::escapeHTML($this->text) . "</code>";
  3235. }
  3236. }
  3237. /**
  3238. * A footnote symbol in a document. Denoted as a superscripted number that can
  3239. * be clicked to go to its content at the bottom of the document.
  3240. */
  3241. class MDFootnoteNode extends MDInlineNode {
  3242. /**
  3243. * Symbol the author used to match up the footnote to its content definition.
  3244. */
  3245. public string $symbol;
  3246. /**
  3247. * The superscript symbol rendered in HTML. May be the same or different
  3248. * than `symbol`.
  3249. */
  3250. public ?string $displaySymbol = null;
  3251. /**
  3252. * Unique ID for the footnote definition.
  3253. */
  3254. public ?int $footnoteId = null;
  3255. /**
  3256. * Unique number for backlinking to a footnote occurrence. Populated by
  3257. * `MDFootnoteReader.postProcess`.
  3258. */
  3259. public ?int $occurrenceId = null;
  3260. public function __construct(string $symbol, ?string $title=null) {
  3261. parent::__construct([]);
  3262. $this->symbol = $symbol;
  3263. if ($title) $this->attributes['title'] = $title;
  3264. }
  3265. public function toHTML(MDState $state): string {
  3266. if ($this->differentiator !== null) {
  3267. return "<sup class=\"footnote\" id=\"{$state->root()->elementIdPrefix}footnoteref_{$this->occurrenceId}\"" . $this->htmlAttributes() . ">" .
  3268. "<a href=\"#{$state->root()->elementIdPrefix}footnote_{$this->footnoteId}\">" . htmlentities($this->displaySymbol ?? $this->symbol) . "</a></sup>";
  3269. }
  3270. return "<!--FNREF:{{$this->symbol}}-->";
  3271. }
  3272. }
  3273. /**
  3274. * A clickable hypertext link.
  3275. */
  3276. class MDLinkNode extends MDInlineNode {
  3277. public string $href;
  3278. /**
  3279. * @param string $href
  3280. * @param MDNode[]|MDNode $children
  3281. */
  3282. public function __construct(string $href, array|MDNode $children, ?string $title=null) {
  3283. parent::__construct($children);
  3284. $this->href = $href;
  3285. if ($title !== null) $this->attributes['title'] = $title;
  3286. }
  3287. public function toHTML(MDState $state): string {
  3288. if (str_starts_with($this->href, 'mailto:')) {
  3289. $escapedLink = MDUtils::escapeObfuscated($this->href);
  3290. } else {
  3291. $escapedLink = htmlentities($this->href);
  3292. }
  3293. return "<a href=\"{$escapedLink}\"" . $this->htmlAttributes() . ">" . $this->childHTML($state) . "</a>";
  3294. }
  3295. }
  3296. /**
  3297. * A clickable hypertext link where the URL is defined elsewhere by reference.
  3298. */
  3299. class MDReferencedLinkNode extends MDLinkNode {
  3300. public string $reference;
  3301. public function __construct(string $reference, array|MDNode $children) {
  3302. parent::__construct('', $children);
  3303. $this->reference = $reference;
  3304. }
  3305. public function toHTML(MDState $state): string {
  3306. if ($this->href === '') {
  3307. $url = $state->urlForReference($this->reference);
  3308. if ($url) $this->href = $url;
  3309. $title = $state->urlTitleForReference($this->reference);
  3310. if ($title) $this->attributes['title'] = $title;
  3311. }
  3312. return $super->toHTML($state);
  3313. }
  3314. }
  3315. /**
  3316. * An inline image.
  3317. */
  3318. class MDImageNode extends MDInlineNode {
  3319. public string $src;
  3320. public ?string $alt;
  3321. public function __construct(string $src, ?string $alt) {
  3322. super([]);
  3323. $this->src = $src;
  3324. $this->alt = $alt;
  3325. }
  3326. public function toHTML(MDState $state): string {
  3327. $html = "<img src=\"" . htmlentities($this->src) . "\"";
  3328. if ($this->alt) $html .= " alt=\"" . htmlentities($this->alt) . "\"";
  3329. $html .= $this->htmlAttributes() . ">";
  3330. return $html;
  3331. }
  3332. }
  3333. /**
  3334. * An inline image where the URL is defined elsewhere by reference.
  3335. */
  3336. class MDReferencedImageNode extends MDImageNode {
  3337. public string $reference;
  3338. public function __construct(string $reference, ?string $alt=null) {
  3339. parent::__construct('', $alt, []);
  3340. $this->reference = $reference;
  3341. }
  3342. public function toHTML(MDState $state): string {
  3343. if ($this->src === '') {
  3344. $url = $state->urlForReference($this->reference);
  3345. if ($url !== null) $this->src = $url;
  3346. $title = $state->urlTitleForReference($this->reference);
  3347. if ($title !== null) $this->attributes['title'] = $title;
  3348. }
  3349. return super.toHTML(state);
  3350. }
  3351. }
  3352. /**
  3353. * An abbreviation that can be hovered over to see its full expansion.
  3354. */
  3355. class MDAbbreviationNode extends MDInlineNode {
  3356. /** @type {string} */
  3357. public string $abbreviation;
  3358. /**
  3359. * @param {string} abbreviation
  3360. * @param {string} definition
  3361. */
  3362. public function __construct(string $abbreviation, string $definition) {
  3363. super([]);
  3364. $this->abbreviation = $abbreviation;
  3365. $this->attributes['title'] = $definition;
  3366. }
  3367. public function toHTML(MDState $state): string {
  3368. return "<abbr" . $this->htmlAttributes() . ">" . htmlentities($this->abbreviation) . "</abbr>";
  3369. }
  3370. }
  3371. /**
  3372. * A line break that is preserved when rendered to HTML.
  3373. */
  3374. class MDLineBreakNode extends MDInlineNode {
  3375. public function toHTML(MDState $state): string {
  3376. return '<br>';
  3377. }
  3378. public function toPlaintext(MDState $state): string {
  3379. return "\n";
  3380. }
  3381. }
  3382. /**
  3383. * A verbatim HTML tag. May be altered to strip out disallowed attributes or
  3384. * CSS values.
  3385. */
  3386. class MDHTMLTagNode extends MDInlineNode {
  3387. public MDHTMLTag $tag;
  3388. public function __construct(MDHTMLTag $tag) {
  3389. parent::__construct([]);
  3390. $this->tag = $tag;
  3391. }
  3392. public function toHTML(MDState $state): string {
  3393. return "{$this->tag}";
  3394. }
  3395. }
  3396. // -- Main class ------------------------------------------------------------
  3397. /**
  3398. * Markdown parser.
  3399. */
  3400. class Markdown {
  3401. /**
  3402. * Set of standard readers to handle common syntax.
  3403. * @type {MDReader[]}
  3404. */
  3405. public static function standardReaders(): array {
  3406. if (self::$sharedStandardReaders === null) {
  3407. self::$sharedStandardReaders = [
  3408. new MDUnderlinedHeadingReader(),
  3409. new MDHashHeadingReader(),
  3410. new MDBlockQuoteReader(),
  3411. new MDHorizontalRuleReader(),
  3412. new MDUnorderedListReader(),
  3413. new MDOrderedListReader(),
  3414. new MDFencedCodeBlockReader(),
  3415. new MDIndentedCodeBlockReader(),
  3416. new MDParagraphReader(),
  3417. new MDStrongReader(),
  3418. new MDEmphasisReader(),
  3419. new MDCodeSpanReader(),
  3420. new MDImageReader(),
  3421. new MDLinkReader(),
  3422. new MDHTMLTagReader(),
  3423. ];
  3424. }
  3425. return self::$sharedStandardReaders;
  3426. }
  3427. private static ?array $sharedStandardReaders = null;
  3428. /**
  3429. * All supported readers except `MDLineBreakReader`.
  3430. * @type {MDReader[]}
  3431. */
  3432. public static function allReaders(): array {
  3433. if (self::$sharedAllReaders === null) {
  3434. $sharedAllReaders = array_merge(self::standardReaders(), [
  3435. new MDSubtextReader(),
  3436. new MDTableReader(),
  3437. new MDDefinitionListReader(),
  3438. new MDFootnoteReader(),
  3439. new MDAbbreviationReader(),
  3440. new MDUnderlineReader(),
  3441. new MDSubscriptReader(),
  3442. new MDStrikethroughReader(),
  3443. new MDHighlightReader(),
  3444. new MDSuperscriptReader(),
  3445. new MDReferencedImageReader(),
  3446. new MDReferencedLinkReader(),
  3447. new MDModifierReader(),
  3448. ]);
  3449. }
  3450. return $sharedAllReaders;
  3451. }
  3452. private static ?array $sharedAllReaders = null;
  3453. /**
  3454. * Shared instance of a parser with standard syntax.
  3455. */
  3456. public static function standardParser(): Markdown {
  3457. if (self::$sharedStandardMarkdown === null) {
  3458. self::$sharedStandardMarkdown = new Markdown(self::standardReaders());
  3459. }
  3460. return self::$sharedStandardMarkdown;
  3461. }
  3462. private static ?Markdown $sharedStandardMarkdown = null;
  3463. /**
  3464. * Shared instance of a parser with all supported syntax.
  3465. */
  3466. public static function completeParser(): Markdown {
  3467. if (self::$sharedCompleteParser === null) {
  3468. self::$sharedCompleteParser = new Markdown(self::allReaders());
  3469. }
  3470. return self::$sharedCompleteParser;
  3471. }
  3472. public static ?Markdown $sharedCompleteParser = null;
  3473. /**
  3474. * Filter for what non-markdown HTML is permitted. HTML generated as a
  3475. * result of markdown is unaffected.
  3476. */
  3477. public MDHTMLFilter $tagFilter;
  3478. /** @var MDReader[] */
  3479. private array $readers;
  3480. /** @var MDReader[] */
  3481. private array $readersByBlockPriority;
  3482. /** @var MDReader[] */
  3483. private array $readersByTokenPriority;
  3484. private array $readersBySubstitutePriority;
  3485. /**
  3486. * Creates a Markdown parser with the given syntax readers.
  3487. *
  3488. * @param MDReader[] $readers
  3489. */
  3490. public function __construct(?array $readers) {
  3491. $this->readers = $readers ?? self::allReaders();
  3492. $this->readersByBlockPriority = MDReader::sortReaderForBlocks($readers);
  3493. $this->readersByTokenPriority = MDReader::sortReadersForTokenizing($readers);
  3494. $this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($readers);
  3495. $this->tagFilter = new MDHTMLFilter();
  3496. }
  3497. /**
  3498. * Converts a markdown string to an HTML string.
  3499. *
  3500. * @param string $markdown
  3501. * @param string $elementIdPrefix Optional prefix for generated element
  3502. * `id`s and links to them. For differentiating multiple markdown docs in
  3503. * the same HTML page.
  3504. * @return string HTML
  3505. */
  3506. public function toHTML(string $markdown, string $elementIdPrefix='') {
  3507. $lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown);
  3508. try {
  3509. return $this->parse($lines, $elementIdPrefix);
  3510. } catch (Error $e) {
  3511. $this->investigateException($lines, $elementIdPrefix);
  3512. throw $e;
  3513. }
  3514. }
  3515. /**
  3516. * @param string[] $lines
  3517. * @param string $elementIdPrefix
  3518. */
  3519. private function parse(array $lines, string $elementIdPrefix) {
  3520. $state = new MDState($lines);
  3521. $state->readersByBlockPriority = $this->readersByBlockPriority;
  3522. $state->readersByTokenPriority = $this->readersByTokenPriority;
  3523. $state->readersBySubstitutePriority = $this->readersBySubstitutePriority;
  3524. $state->tagFilter = $this->tagFilter;
  3525. $state->elementIdPrefix = $elementIdPrefix;
  3526. foreach ($this.readers as $reader) {
  3527. $reader->preProcess($state);
  3528. }
  3529. $nodes = $state->readBlocks();
  3530. foreach ($this->readers as $reader) {
  3531. $reader->postProcess($state, $nodes);
  3532. }
  3533. return MDNode::arrayToHTML($nodes, $state);
  3534. }
  3535. /**
  3536. * Keeps removing first and last lines of markdown to locate the source of
  3537. * an exception and prints the minimal snippet.
  3538. *
  3539. * @param string[] $lines
  3540. * @param string $elementIdPrefix
  3541. */
  3542. private function investigateException(array $lines, string $elementIdPrefix) {
  3543. $startIndex = 0;
  3544. $endIndex = sizeof($lines);
  3545. // Keep stripping away first line until an exception stops being thrown
  3546. for ($i = 0; $i < sizeof($lines); $i++) {
  3547. try {
  3548. $this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix);
  3549. break;
  3550. } catch (Error $e0) {
  3551. $startIndex = $i;
  3552. }
  3553. }
  3554. // Keep stripping away last line until an exception stops being thrown
  3555. for ($i = sizeof($lines); $i > $startIndex; $i--) {
  3556. try {
  3557. $this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix);
  3558. break;
  3559. } catch (Error $e0) {
  3560. $endIndex = $i;
  3561. }
  3562. }
  3563. $problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex));
  3564. print("This portion of markdown caused an unexpected exception: {$problematicMarkdown}");
  3565. }
  3566. }
  3567. ?>