PHP and Javascript implementations of a simple markdown parser
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

markdown.php 124KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Static utilities.
  5. */
  6. class MDUtils {
  7. // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
  8. public const baseURLRegex = '(?:(?:(?:[a-z]{3,9}:(?:\\/\\/)?)(?:[\\-;:&=\\+\\$,\\w]+@)?[a-z0-9\\.\\-]+|(?:www\\.|[\\-;:&=\\+\\$,\\w]+@)[a-z0-9\\.\\-]+)(?:(?:\\/[\\+~%\\/\\.\\w\\-_]*)?\\??(?:[\\-\\+=&;%@\\.\\w_]*)#?(?:[\\.\\!\\/\\\\\\w]*))?)';
  9. // Modified from https://emailregex.com/ to remove capture groups.
  10. public const baseEmailRegex = '(?:(?:[^<>()\\[\\]\\\\.,;:\\s@"]+(?:\\.[^<>()\\[\\]\\\\.,;:\\s@"]+)*)|(?:".+"))@(?:(?:\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}])|(?:(?:[a-z\\-0-9]+\\.)+[a-z]{2,}))';
  11. /**
  12. * Escapes special HTML characters.
  13. *
  14. * @param string $str string to escape
  15. * @param bool $encodeNewlinesAsBreaks whether to convert newline characters to `<br>` tags
  16. * @return string escaped HTML
  17. */
  18. public static function escapeHTML($str, $encodeNewlinesAsBreaks=false) {
  19. if (!is_string($str)) return '';
  20. $html = $str;
  21. $html = mb_ereg_replace('&', '&amp;', $html);
  22. $html = mb_ereg_replace('<', '&lt;', $html);
  23. $html = mb_ereg_replace('>', '&gt;', $html);
  24. $html = mb_ereg_replace('"', '&quot;', $html);
  25. if ($encodeNewlinesAsBreaks) {
  26. $html = str_replace("\n", "<br>\n", $html);
  27. }
  28. return $html;
  29. }
  30. /**
  31. * Encodes characters as HTML numeric entities to make it marginally more
  32. * difficult for web scrapers to grab sensitive info. If `$text` starts with
  33. * `mailto:` only the email address following it will be obfuscated.
  34. */
  35. public static function escapeObfuscated(string $text): string {
  36. if (str_starts_with($text, 'mailto:')) {
  37. return 'mailto:' . self::escapeObfuscated(mb_substr($text, 7));
  38. }
  39. $html = '';
  40. $l = mb_strlen($text);
  41. for ($p = 0; $p < $l; $p++) {
  42. $cp = mb_ord(mb_substr($text, $p, 1));
  43. $html .= "&#{$cp};";
  44. }
  45. return $html;
  46. }
  47. /**
  48. * Removes illegal characters from an HTML attribute name.
  49. */
  50. public static function scrubAttributeName(string $name): string {
  51. return mb_ereg_replace('[\\t\\n\\f \\/>"\'=]+', '', $name);
  52. }
  53. /**
  54. * Strips one or more leading indents from a line or lines of markdown. An
  55. * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
  56. * spaces) are treated like one indent level.
  57. *
  58. * @param string|string[] $line
  59. * @param int $levels
  60. * @return string|string[]
  61. */
  62. public static function stripIndent(string|array $line, int $levels=1): string|array {
  63. $regex = "^(?: {1,4}|\\t){{$levels}}";
  64. return is_array($line) ? array_map(fn(string $l): string => mb_ereg_replace($regex, '', $l), $line) : mb_ereg_replace($regex, '', $line);
  65. }
  66. /**
  67. * Counts the number of indent levels in a line of text. Partial indents
  68. * (1 to 3 spaces) are counted as one indent level unless `$fullIndentsOnly`
  69. * is `true`.
  70. */
  71. public static function countIndents(string $line, bool $fullIndentsOnly=false): int {
  72. // normalize indents to tabs
  73. $t = mb_ereg_replace($fullIndentsOnly ? '(?: {4}|\\t)' : '(?: {1,4}|\\t)', "\t", $line);
  74. // remove content after indent
  75. $t = mb_ereg_replace('^(\\t*)(.*?)$', '\\1', $t);
  76. // count tabs
  77. return mb_strlen($t);
  78. }
  79. /**
  80. * Returns a copy of an array without any whitespace-only lines at the end.
  81. *
  82. * @param string[] $lines
  83. * @return string[]
  84. */
  85. public static function withoutTrailingBlankLines(array $lines): array {
  86. $stripped = $lines;
  87. while (sizeof($stripped) > 0 && mb_strlen(trim($stripped[sizeof($stripped) - 1])) == 0) {
  88. array_pop($stripped);
  89. }
  90. return $stripped;
  91. }
  92. /**
  93. * Tests if an array of lines contains at least one blank. A blank line
  94. * can contain whitespace.
  95. *
  96. * @param string[] $lines
  97. */
  98. public static function containsBlankLine(array $lines): bool {
  99. foreach ($lines as $line) {
  100. if (mb_strlen(trim($line)) == 0) return true;
  101. }
  102. return false;
  103. }
  104. /**
  105. * Returns a type or class name of a value.
  106. *
  107. * @param mixed $value
  108. * @return string
  109. */
  110. public static function typename($value): string {
  111. $tn = gettype($value);
  112. return ($tn === 'object') ? get_class($value) : $tn;
  113. }
  114. }
  115. /**
  116. * Token type enum for `MDToken`.
  117. */
  118. enum MDTokenType {
  119. case Text;
  120. /**
  121. * Only used for the leading and trailing whitespace around a run of text,
  122. * not every single whitespace character.
  123. */
  124. case Whitespace;
  125. case Underscore;
  126. case Asterisk;
  127. case Slash;
  128. case Tilde;
  129. case Bang;
  130. case Backtick;
  131. case Equal;
  132. case Caret;
  133. case Label; // content=label
  134. case URL; // content=URL, extra=title
  135. case Email; // content=email address, extra=title
  136. case SimpleLink; // content=URL
  137. case SimpleEmail; // content=email address
  138. case Footnote; // content=symbol
  139. case Modifier; // modifier=MDTagModifier
  140. case HTMLTag; // tag=MDHTMLTag
  141. /** Wildcard for `MDToken::findFirstTokens` */
  142. case META_AnyNonWhitespace;
  143. /** Wildcard for `MDToken::findFirstTokens` */
  144. case META_OptionalWhitespace;
  145. }
  146. /**
  147. * Search results from `MDToken.findFirstTokens`.
  148. */
  149. class MDTokenMatch {
  150. /** @var MDToken[] */
  151. public array $tokens;
  152. public int $index;
  153. /**
  154. * @param MDToken[] $tokens
  155. * @param int $index
  156. */
  157. public function __construct(array $tokens, int $index) {
  158. $this->tokens = $tokens;
  159. $this->index = $index;
  160. }
  161. }
  162. /**
  163. * Search results from `MDToken.findPairedTokens`.
  164. */
  165. class MDPairedTokenMatch {
  166. /** @var MDToken[] */
  167. public array $startTokens;
  168. /** @var MDToken[] */
  169. public array $contentTokens;
  170. /** @var MDToken[] */
  171. public array $endTokens;
  172. public int $startIndex;
  173. public int $contentIndex;
  174. public int $endIndex;
  175. public int $totalLength;
  176. public function __construct(array $startTokens, array $contentTokens,
  177. array $endTokens, int $startIndex, int $contentIndex, int $endIndex,
  178. int $totalLength) {
  179. $this->startTokens = $startTokens;
  180. $this->contentTokens = $contentTokens;
  181. $this->endTokens = $endTokens;
  182. $this->startIndex = $startIndex;
  183. $this->contentIndex = $contentIndex;
  184. $this->endIndex = $endIndex;
  185. $this->totalLength = $totalLength;
  186. }
  187. }
  188. /**
  189. * One lexical unit in inline markdown syntax parsing.
  190. */
  191. class MDToken {
  192. /**
  193. * The original verbatim token string. Required as a plaintext fallback if
  194. * the token remains unresolved.
  195. */
  196. public string $original;
  197. public MDTokenType $type;
  198. public ?string $content = null;
  199. public ?string $extra = null;
  200. public ?MDHTMLTag $tag = null;
  201. public ?MDTagModifier $modifier = null;
  202. /**
  203. * Creates a token.
  204. *
  205. * @param string $original verbatim token string
  206. * @param MDTokenType $type token type
  207. * @param string|MDTagModifier|MDHTMLTag|null $content primary content of
  208. * the token
  209. * @param string|null $extra additional content
  210. */
  211. public function __construct(string $original, MDTokenType $type,
  212. string|MDTagModifier|MDHTMLTag|null $content=null,
  213. ?string $extra=null) {
  214. $this->original = $original;
  215. $this->type = $type;
  216. if ($content instanceof MDTagModifier) {
  217. $this->modifier = $content;
  218. } elseif ($content instanceof MDHTMLTag) {
  219. $this->tag = $content;
  220. } else {
  221. $this->content = $content;
  222. }
  223. $this->extra = $extra;
  224. }
  225. public function __toString(): string {
  226. return "<{" . MDUtils::typename($this) . " type={$this->type->name} " .
  227. "content=\"{$this->content}\">";
  228. }
  229. /**
  230. * Attempts to parse a label token from the beginning of `$line`. A label is
  231. * of the form `[content]`. If found, returns an array:
  232. * - `0`: the entire label including brackets
  233. * - `1`: the content of the label
  234. *
  235. * @param string $line
  236. * @return ?string[] match groups or null if not found
  237. */
  238. public static function tokenizeLabel(string $line): ?array {
  239. if (!str_starts_with($line, '[')) return null;
  240. $parenCount = 0;
  241. $bracketCount = 0;
  242. $l = mb_strlen($line);
  243. for ($p = 1; $p < $l; $p++) {
  244. $ch = mb_substr($line, $p, 1);
  245. if ($ch == '\\') {
  246. $p++;
  247. } elseif ($ch == '(') {
  248. $parenCount++;
  249. } elseif ($ch == ')') {
  250. $parenCount--;
  251. if ($parenCount < 0) return null;
  252. } elseif ($ch == '[') {
  253. $bracketCount++;
  254. } elseif ($ch == ']') {
  255. if ($bracketCount > 0) {
  256. $bracketCount--;
  257. } else {
  258. $all = mb_substr($line, 0, $p + 1);
  259. $content = mb_substr($line, 1, $p - 1);
  260. return [ $all, $content ];
  261. }
  262. }
  263. }
  264. return null;
  265. }
  266. private const urlWithTitleRegex = '^\\((\\S+?)\\s+"(.*?)"\\)'; // 1=URL, 2=title
  267. private const urlRegex = '^\\((\\S+?)\\)'; // 1=URL
  268. /**
  269. * Attempts to parse a URL token from the beginning of `$line`. A URL token
  270. * is of the form `(url)` or `(url "title")`. If found, returns an array:
  271. * - `0`: the entire URL token including parentheses
  272. * - `1`: the URL
  273. * - `2`: the optional title, or `null`
  274. *
  275. * @param string $line
  276. * @return ?array token tuple
  277. */
  278. public static function tokenizeURL(string $line): ?array {
  279. $groups = [];
  280. if (mb_eregi(self::urlWithTitleRegex, $line, $groups)) {
  281. // make sure it's not better described as an email address
  282. if (self::tokenizeEmail($line)) return null;
  283. return $groups;
  284. }
  285. if (mb_eregi(self::urlRegex, $line, $groups)) {
  286. if (self::tokenizeEmail($line)) return null;
  287. return [ $groups[0], $groups[1], null ];
  288. }
  289. return null;
  290. }
  291. /**
  292. * Attempts to parse an email address from the beginning of `$line`. An
  293. * email address is of the form `(user@example.com)` or
  294. * `(user@example.com "link title")`. If found, returns an array:
  295. * - `0`: the entire token including parentheses
  296. * - `1`: the email address
  297. * - `2`: the optional link title, or `null`
  298. *
  299. * @param string $line
  300. * @return ?string[] token tuple
  301. */
  302. public static function tokenizeEmail(string $line): ?array {
  303. $groups;
  304. if (mb_eregi("^\\(\\s*(" . MDUtils::baseEmailRegex . ")\\s+\"(.*?)\"\\s*\\)",
  305. $line, $groups)) {
  306. return $groups;
  307. }
  308. if (mb_eregi("^\\(\\s*(" . MDUtils::baseEmailRegex . ")\\s*\\)", $line, $groups)) {
  309. return [ $groups[0], $groups[1], null ];
  310. }
  311. return null;
  312. }
  313. /**
  314. * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
  315. * If found, returns a `MDTokenMatch`, otherwise `null`.
  316. *
  317. * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
  318. * are special supported token types. Note that `META_OptionalWhitespace`
  319. * may give a result with a variable number of tokens.
  320. *
  321. * @param (MDToken|MDNode)[] $tokensToSearch mixed array of `MDToken` and
  322. * `MDNode` elements
  323. * @param MDTokenType[] $pattern contiguous run of token types to find
  324. * @param int $startIndex token index to begin searching (defaults to 0)
  325. * @return ?MDTokenMatch match object, or `null` if not found
  326. */
  327. public static function findFirstTokens(array $tokensToSearch, array $pattern,
  328. int $startIndex=0): ?MDTokenMatch {
  329. if (sizeof($pattern) == 0) {
  330. throw new Error("Pattern cannot be empty");
  331. }
  332. $matched = [];
  333. for ($t = $startIndex; $t < sizeof($tokensToSearch); $t++) {
  334. $matchedAll = true;
  335. $matched = [];
  336. $patternOffset = 0;
  337. for ($p = 0; $p < sizeof($pattern); $p++) {
  338. $t0 = $t + $p + $patternOffset;
  339. if ($t0 >= sizeof($tokensToSearch)) return null;
  340. $token = $tokensToSearch[$t0];
  341. $elem = $pattern[$p];
  342. if ($elem == MDTokenType::META_OptionalWhitespace) {
  343. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  344. array_push($matched, $token);
  345. } else {
  346. $patternOffset--;
  347. }
  348. } elseif ($elem == MDTokenType::META_AnyNonWhitespace) {
  349. if ($token instanceof MDToken && $token->type == MDTokenType::Whitespace) {
  350. $matchedAll = false;
  351. break;
  352. }
  353. array_push($matched, $token);
  354. } else {
  355. if (!($token instanceof MDToken) || $token->type != $elem) {
  356. $matchedAll = false;
  357. break;
  358. }
  359. array_push($matched, $token);
  360. }
  361. }
  362. if ($matchedAll) {
  363. return new MDTokenMatch($matched, $t);
  364. }
  365. }
  366. return null;
  367. }
  368. /**
  369. * Searches an array of MDToken for a given starting pattern and ending
  370. * pattern and returns match info about both and the tokens in between.
  371. *
  372. * If `$contentValidator` is specified, it will be called with the content
  373. * tokens of a potential match. If the validator returns `true`, the result
  374. * will be accepted and returned by this method. If the validator returns
  375. * `false`, this method will keep looking for another matching pair. If no
  376. * validator is given the first match will be returned regardless of content.
  377. *
  378. * If a match is found, a `MDPairedTokenMatch` is returned with details
  379. * of the opening tokens, closing tokens, and content tokens between. Otherwise
  380. * `null` is returned.
  381. *
  382. * @param MDToken[] $tokensToSearch array of `MDToken` to search in
  383. * @param MDTokenType[] $startPattern pattern to find first
  384. * @param MDTokenType[] $endPattern pattern to find positioned after
  385. * `$startPattern`
  386. * @param ?callable $contentValidator optional validator function. If
  387. * provided, will be passed an array of inner `MDToken`, and the function
  388. * can return `true` to accept the contents or `false` to keep searching
  389. * @param number $startIndex token index where searching should begin
  390. * @return ?MDPairedTokenMatch match, or `null`
  391. */
  392. public static function findPairedTokens(array $tokensToSearch,
  393. array $startPattern, array $endPattern, ?callable $contentValidator=null,
  394. int $startIndex=0): ?MDPairedTokenMatch {
  395. for ($s = $startIndex; $s < sizeof($tokensToSearch); $s++) {
  396. $startMatch = self::findFirstTokens($tokensToSearch, $startPattern, $s);
  397. if ($startMatch === null) return null;
  398. $endStart = $startMatch->index + sizeof($startMatch->tokens);
  399. while ($endStart < sizeof($tokensToSearch)) {
  400. $endMatch = self::findFirstTokens($tokensToSearch, $endPattern, $endStart);
  401. if ($endMatch === null) break;
  402. $contentStart = $startMatch->index + sizeof($startMatch->tokens);
  403. $contentLength = $endMatch->index - $contentStart;
  404. $contents = array_slice($tokensToSearch, $contentStart, $contentLength);
  405. if (sizeof($contents) > 0 && ($contentValidator === null || $contentValidator($contents))) {
  406. return new MDPairedTokenMatch($startMatch->tokens,
  407. $contents,
  408. $endMatch->tokens,
  409. $startMatch->index,
  410. $startMatch->index + sizeof($startMatch->tokens),
  411. $endMatch->index,
  412. $endMatch->index + sizeof($endMatch->tokens) - $startMatch->index);
  413. } else {
  414. // Contents rejected. Try next end match.
  415. $endStart = $endMatch->index + 1;
  416. }
  417. }
  418. // No end matches. Increment start match.
  419. $s = $startMatch->index;
  420. }
  421. return null;
  422. }
  423. public function equals($other) {
  424. if (!($other instanceof MDToken)) return false;
  425. if ($other->original !== $this->original) return false;
  426. if ($other->type != $this->type) return false;
  427. if ($other->content !== $this->content) return false;
  428. if ($other->extra !== $this->extra) return false;
  429. if ($other->tag !== $this->tag) return false;
  430. if ($other->modifier != $this->modifier) return false;
  431. return true;
  432. }
  433. }
  434. /**
  435. * Parsing and rendering state. Passed around throughout the parsing process.
  436. *
  437. * States are hierarchical. A sub-state can be created by calling `->copy()` with
  438. * a new array of lines. The sub-state points back to its parent state. This
  439. * is done to parse inner content of a syntax as its own standalone document.
  440. *
  441. * If a custom `MDReader` implementation wants to store data in this object,
  442. * always do so on `$state->root()` to ensure it's stored on the original state,
  443. * not a child state. Otherwise data may be lost when the sub-state is discarded.
  444. */
  445. class MDState {
  446. /**
  447. * Ascends the parent chain to the root `MDState` instance. This should be
  448. * used when referencing most stored fields except `$lines` and `$p`.
  449. */
  450. public function root(): MDState {
  451. return $this->parent ? $this->parent->root() : $this;
  452. }
  453. /**
  454. * Lines of the markdown document. The current line index is pointed to by `$p`.
  455. *
  456. * @var string[]
  457. */
  458. public array $lines;
  459. /**
  460. * The current line in `$lines`.
  461. */
  462. public function currentLine(): ?string {
  463. return ($this->p < sizeof($this->lines)) ? $this->lines[$this->p] : null;
  464. }
  465. /**
  466. * Current line pointer into array `$lines`.
  467. */
  468. public int $p = 0;
  469. /**
  470. * General storage for anything readers need to track during the parsing
  471. * process.
  472. */
  473. public array $userInfo = [];
  474. private ?MDState $parent = null;
  475. /**
  476. * Array of `MDReader`s sorted by block reading priority.
  477. * @var MDReader[]
  478. */
  479. public array $readersByBlockPriority = [];
  480. /**
  481. * Array of `MDReader`s sorted by tokenization priority.
  482. * @var MDReader[]
  483. */
  484. public array $readersByTokenPriority = [];
  485. /**
  486. * Array of tuples of `pass:number` and `MDReader` sorted by substitution
  487. * priority.
  488. * @var array[]
  489. */
  490. public array $readersBySubstitutePriority = [];
  491. /**
  492. * Prefix to include in any generated `id` attributes on HTML elements.
  493. * Useful for keeping elements unique in multiple parsed documents in the
  494. * same HTML page.
  495. */
  496. public string $elementIdPrefix = '';
  497. /**
  498. * Filter for removing unapproved HTML tags, attributes, and values.
  499. */
  500. public MDHTMLFilter $tagFilter;
  501. /**
  502. * @param string[] $lines lines of markdown text
  503. */
  504. public function __construct(array $lines) {
  505. $this->lines = $lines;
  506. $this->startTime = microtime(true);
  507. }
  508. /**
  509. * Creates a copy of this state with new lines. Useful for parsing nested
  510. * content.
  511. *
  512. * @param string[] $lines
  513. * @return MDState copied sub-state
  514. */
  515. public function copy(array $lines): MDState {
  516. $cp = new MDState($lines);
  517. $cp->parent = $this;
  518. return $cp;
  519. }
  520. /**
  521. * Tests if there are at least `$minCount` lines available to read. If `$p`
  522. * is not provided it will be relative to `$this->p`.
  523. */
  524. public function hasLines(int $minCount, ?int $p=null): bool {
  525. $relativeTo = ($p === null) ? $this->p : $p;
  526. return $relativeTo + $minCount <= sizeof($this->lines);
  527. }
  528. /**
  529. * Reads and returns an array of blocks from the current line pointer.
  530. *
  531. * @return MDBlockNode[] parsed blocks
  532. */
  533. public function readBlocks(): array {
  534. $blocks = [];
  535. while ($this->hasLines(1)) {
  536. $block = $this->readNextBlock();
  537. if ($block) {
  538. array_push($blocks, $block);
  539. } else {
  540. break;
  541. }
  542. }
  543. return $blocks;
  544. }
  545. /**
  546. * Creates a simple `MDBlockNode` if no other registered blocks match.
  547. */
  548. private function readFallbackBlock(): ?MDBlockNode {
  549. if ($this->p >= sizeof($this->lines)) return null;
  550. $lines = MDUtils::withoutTrailingBlankLines(array_slice($this->lines, $this->p));
  551. if (sizeof($lines) == 0) return null;
  552. $this->p = sizeof($this->lines);
  553. return new MDBlockNode($this->inlineMarkdownToNode(implode("\n", $lines)));
  554. }
  555. /**
  556. * Attempts to read one block from the current line pointer. The pointer
  557. * will be positioned just after the end of the block.
  558. */
  559. private function readNextBlock(): ?MDBlockNode {
  560. while ($this->hasLines(1) && mb_strlen(trim($this->lines[$this->p])) == 0) {
  561. $this->p++;
  562. }
  563. if (!$this->hasLines(1)) return null;
  564. foreach ($this->root()->readersByBlockPriority as $reader) {
  565. $startP = $this->p;
  566. $block = $reader->readBlock($this);
  567. if ($block) {
  568. if ($this->p == $startP) {
  569. $readerClassName = MDUtils::typename($reader);
  570. $blockClassName = MDUtils::typename($block);
  571. throw new Error("{$readerClassName} returned an " .
  572. "{$blockClassName} without incrementing MDState.p. " .
  573. "This could lead to an infinite loop.");
  574. }
  575. return $block;
  576. }
  577. }
  578. $fallback = $this->readFallbackBlock();
  579. return $fallback;
  580. }
  581. /**
  582. * @param string $line
  583. * @return MDToken[]
  584. */
  585. private function inlineMarkdownToTokens(string $line): array {
  586. if ($this->parent) return $this->parent->inlineMarkdownToTokens($line);
  587. $tokens = [];
  588. $text = '';
  589. $expectLiteral = false;
  590. /**
  591. * Flushes accumulated content in `$text` to `$tokens`.
  592. */
  593. $endText = function() use (&$tokens, &$text) {
  594. if (mb_strlen($text) === 0) return;
  595. $textGroups = [];
  596. if (mb_eregi('^(\\s+)(.*?)$', $text, $textGroups)) {
  597. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Whitespace, $textGroups[1]));
  598. $text = is_string($textGroups[2]) ? $textGroups[2] : '';
  599. }
  600. if (mb_eregi('^(.*?)(\\s+)$', $text, $textGroups)) {
  601. array_push($tokens, new MDToken($textGroups[1], MDTokenType::Text, $textGroups[1]));
  602. array_push($tokens, new MDToken($textGroups[2], MDTokenType::Whitespace, $textGroups[2]));
  603. } elseif (mb_strlen($text) > 0) {
  604. array_push($tokens, new MDToken($text, MDTokenType::Text, $text));
  605. }
  606. $text = '';
  607. };
  608. for ($p = 0; $p < mb_strlen($line); $p++) {
  609. $ch = mb_substr($line, $p, 1);
  610. $remainder = mb_substr($line, $p);
  611. if ($expectLiteral) {
  612. $text .= $ch;
  613. $expectLiteral = false;
  614. continue;
  615. }
  616. if ($ch == '\\') {
  617. $expectLiteral = true;
  618. continue;
  619. }
  620. $found = false;
  621. foreach ($this->root()->readersByTokenPriority as $reader) {
  622. $token = $reader->readToken($this, $remainder);
  623. if ($token === null) continue;
  624. $endText();
  625. array_push($tokens, $token);
  626. if ($token->original == null || mb_strlen($token->original) == 0) {
  627. $readerClassName = MDUtils::typename($reader);
  628. throw new Error(`{$readerClassName} returned a token with an empty .original. This would cause an infinite loop.`);
  629. }
  630. $p += mb_strlen($token->original) - 1;
  631. $found = true;
  632. break;
  633. }
  634. if (!$found) {
  635. $text .= $ch;
  636. }
  637. }
  638. $endText();
  639. return $tokens;
  640. }
  641. /**
  642. * Converts a line of markdown to an `MDInlineNode`.
  643. *
  644. * @param string|string[] $line
  645. * @return MDInlineNode
  646. */
  647. public function inlineMarkdownToNode(string|array $line): MDInlineNode {
  648. $nodes = $this->inlineMarkdownToNodes($line);
  649. return (sizeof($nodes) == 1) ? $nodes[0] : new MDInlineNode($nodes);
  650. }
  651. /**
  652. * Converts a line of markdown to an array of `MDInlineNode`s.
  653. *
  654. * @param string|string[] $line
  655. * @return MDInlineNode[]
  656. */
  657. public function inlineMarkdownToNodes(string|array $line): array {
  658. $tokens = $this->inlineMarkdownToTokens(is_array($line) ? implode("\n", $line) : $line);
  659. return $this->tokensToNodes($tokens);
  660. }
  661. /**
  662. * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
  663. * of only `MDInlineNode` via repeated `MDReader` substition.
  664. *
  665. * @param (MDToken|MDInlineNode)[] $tokens
  666. * @return MDInlineNode[]
  667. */
  668. public function tokensToNodes(array $tokens): array {
  669. $nodes = $tokens;
  670. // Perform repeated substitutions, converting sequences of tokens into
  671. // nodes, until no more substitutions can be made.
  672. $anyChanges = false;
  673. do {
  674. $anyChanges = false;
  675. foreach ($this->root()->readersBySubstitutePriority as $readerTuple) {
  676. /** @var int */
  677. $pass = $readerTuple[0];
  678. /** @var MDReader */
  679. $reader = $readerTuple[1];
  680. $changed = $reader->substituteTokens($this, $pass, $nodes);
  681. if (!$changed) continue;
  682. $anyChanges = true;
  683. break;
  684. }
  685. } while ($anyChanges);
  686. // Convert any remaining tokens to text nodes. Also apply any inline
  687. // CSS modifiers.
  688. $lastNode = null;
  689. $me = $this;
  690. $nodes = array_map(function($node) use (&$lastNode, $me, $nodes) {
  691. if ($node instanceof MDToken) {
  692. /** @var MDToken */
  693. $token = $node;
  694. if ($token->type == MDTokenType::Modifier && $lastNode) {
  695. $me->root()->tagFilter->scrubModifier($token->modifier);
  696. $token->modifier->applyTo($lastNode);
  697. $lastNode = null;
  698. return new MDTextNode('');
  699. }
  700. $lastNode = null;
  701. return new MDTextNode($token->original);
  702. } elseif ($node instanceof MDNode) {
  703. $lastNode = ($node instanceof MDTextNode) ? null : $node;
  704. return $node;
  705. } else {
  706. $nodeClassName = MDUtils::typename($node);
  707. throw new Error("Unexpected node type {$nodeClassName}");
  708. }
  709. }, $nodes);
  710. return $nodes;
  711. }
  712. public $startTime;
  713. /**
  714. * Checks if parsing has taken an excessive length of time. Because I'm not
  715. * fully confident in my loops yet. :)
  716. */
  717. public function checkExecutionTime(float $maxSeconds=1.0) {
  718. $elapsed = microtime(true) - $this->root()->startTime;
  719. if ($elapsed > $maxSeconds) {
  720. throw new Error("Markdown parsing taking too long. Infinite loop?");
  721. }
  722. }
  723. /**
  724. * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
  725. * and `MDReferencedImageReader`.
  726. */
  727. private array $referenceToURL = [];
  728. /**
  729. * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
  730. * and `MDReferencedImageReader`.
  731. */
  732. private array $referenceToTitle = [];
  733. /**
  734. * Defines a URL by reference symbol.
  735. */
  736. public function defineURL(string $reference, string $url, ?string $title=null) {
  737. $this->root()->referenceToURL[mb_strtolower($reference)] = $url;
  738. if ($title !== null) $this->root()->referenceToTitle[mb_strtolower($reference)] = $title;
  739. }
  740. /**
  741. * Returns the URL associated with a reference symbol.
  742. */
  743. public function urlForReference(string $reference): ?string {
  744. return $this->root()->referenceToURL[mb_strtolower($reference)] ?? null;
  745. }
  746. /**
  747. * Returns the link title associated with a reference symbol.
  748. */
  749. public function urlTitleForReference(string $reference): ?string {
  750. return $this->root()->referenceToTitle[mb_strtolower($reference)] ?? null;
  751. }
  752. }
  753. /**
  754. * Defines a set of allowable HTML tags, attributes, and CSS.
  755. */
  756. class MDHTMLFilter {
  757. /**
  758. * Mapping of permitted lowercase tag names to objects containing allowable
  759. * attributes for those tags. Does not need to include those attributes
  760. * defined in `$allowableGlobalAttributes`.
  761. *
  762. * Values are objects with allowable lowercase attribute names mapped to
  763. * allowable value patterns. A `*` means any value is acceptable. Multiple
  764. * allowable values can be joined together with `|`. These special symbols
  765. * represent certain kinds of values and can be used in combination or in
  766. * place of literal values.
  767. *
  768. * - `{classlist}`: A list of legal CSS classnames, separated by spaces
  769. * - `{int}`: An integer
  770. * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
  771. * - `{style}`: One or more CSS declarations, separated by semicolons (simple
  772. * `key: value;` syntax only)
  773. * - `{url}`: A URL
  774. */
  775. public array $allowableTags = [
  776. 'address' => [
  777. 'cite' => '{url}',
  778. ],
  779. 'h1' => [],
  780. 'h2' => [],
  781. 'h3' => [],
  782. 'h4' => [],
  783. 'h5' => [],
  784. 'h6' => [],
  785. 'blockquote' => [],
  786. 'dl' => [],
  787. 'dt' => [],
  788. 'dd' => [],
  789. 'div' => [],
  790. 'hr' => [],
  791. 'ul' => [],
  792. 'ol' => [
  793. 'start' => '{int}',
  794. 'type' => 'a|A|i|I|1',
  795. ],
  796. 'li' => [
  797. 'value' => '{int}',
  798. ],
  799. 'p' => [],
  800. 'pre' => [],
  801. 'table' => [],
  802. 'thead' => [],
  803. 'tbody' => [],
  804. 'tfoot' => [],
  805. 'tr' => [],
  806. 'td' => [],
  807. 'th' => [],
  808. 'a' => [
  809. 'href' => '{url}',
  810. 'target' => '*',
  811. ],
  812. 'abbr' => [],
  813. 'b' => [],
  814. 'br' => [],
  815. 'cite' => [],
  816. 'code' => [],
  817. 'data' => [
  818. 'value' => '*',
  819. ],
  820. 'dfn' => [],
  821. 'em' => [],
  822. 'i' => [],
  823. 'kbd' => [],
  824. 'mark' => [],
  825. 'q' => [
  826. 'cite' => '{url}',
  827. ],
  828. 's' => [],
  829. 'samp' => [],
  830. 'small' => [],
  831. 'span' => [],
  832. 'strong' => [],
  833. 'sub' => [],
  834. 'sup' => [],
  835. 'time' => [
  836. 'datetime' => '*',
  837. ],
  838. 'u' => [],
  839. 'var' => [],
  840. 'wbr' => [],
  841. 'img' => [
  842. 'alt' => '*',
  843. 'href' => '{url}',
  844. ],
  845. 'figure' => [],
  846. 'figcaption' => [],
  847. 'del' => [],
  848. 'ins' => [],
  849. 'details' => [],
  850. 'summary' => [],
  851. ];
  852. /**
  853. * Mapping of allowable lowercase global attributes to their permitted
  854. * values. Uses same value pattern syntax as described in `$allowableTags`.
  855. */
  856. public array $allowableGlobalAttributes = [
  857. 'class' => '{classlist}',
  858. 'data-*' => '*',
  859. 'dir' => 'ltr|rtl|auto',
  860. 'id' => '*',
  861. 'lang' => '*',
  862. 'style' => '{style}',
  863. 'title' => '*',
  864. 'translate' => 'yes|no|{none}',
  865. ];
  866. /**
  867. * Mapping of allowable CSS style names to their allowable value patterns.
  868. * Multiple values can be delimited with `|` characters. Limited support
  869. * so far.
  870. *
  871. * Recognized special values:
  872. * - `{color}`: A hex or named color
  873. */
  874. public array $allowableStyleKeys = [
  875. 'background-color' => '{color}',
  876. 'color' => '{color}',
  877. ];
  878. /**
  879. * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
  880. * itself has already been whitelisted.
  881. *
  882. * @param MDHTMLTag $tag HTML tag
  883. */
  884. public function scrubTag(MDHTMLTag $tag) {
  885. foreach ($tag->attributes as $name => $value) {
  886. if (!$this->isValidAttributeName($tag->tagName, $name)) {
  887. unset($tag->attributes[$name]);
  888. }
  889. if (!$this->isValidAttributeValue($tag->tagName, $name, $value)) {
  890. unset($tag->attributes[$name]);
  891. }
  892. }
  893. }
  894. /**
  895. * Scrubs all forbidden attributes from an HTML modifier.
  896. *
  897. * @param MDTagModifier $modifier
  898. * @param ?string $tagName HTML tag name, if known, otherwise only
  899. * global attributes will be permitted
  900. */
  901. public function scrubModifier(MDHTMLModifier $modifier, ?string $tagName) {
  902. if (sizeof($modifier->cssClasses) > 0) {
  903. $classList = implode(' ', $modifier->cssClasses);
  904. if (!$this->isValidAttributeValue($tagName, 'class', $classList)) {
  905. $modifier->cssClasses = [];
  906. }
  907. }
  908. if ($modifier->cssId !== null) {
  909. if (!$this->isValidAttributeValue($tagName, 'id', $modifier->cssId)) {
  910. $modifier->cssId = null;
  911. }
  912. }
  913. if (!$this->isValidAttributeName($tagName, 'style')) {
  914. $modifier->cssStyles = [];
  915. } else {
  916. foreach ($modifier->cssStyles as $key => $val) {
  917. if (!$this->isValidStyleValue($key, $val)) {
  918. unset($modifier->cssStyles[$key]);
  919. }
  920. }
  921. }
  922. foreach ($modifier->attributes as $key => $val) {
  923. if (!$this->isValidAttributeValue($tagName, $key, $val)) {
  924. unset($modifier->attributes[$key]);
  925. }
  926. }
  927. }
  928. /**
  929. * Tests if an HTML tag name is permitted.
  930. */
  931. public function isValidTagName(string $tagName): bool {
  932. return ($this->allowableTags[mb_strtolower($tagName)] ?? null) !== null;
  933. }
  934. /**
  935. * Tests if an HTML attribute name is permitted.
  936. */
  937. public function isValidAttributeName(?string $tagName, string $attributeName): bool {
  938. $lcAttributeName = mb_strtolower($attributeName);
  939. if (($this->allowableGlobalAttributes[$lcAttributeName] ?? null) !== null) {
  940. return true;
  941. }
  942. foreach ($this->allowableGlobalAttributes as $pattern => $valuePattern) {
  943. if (!str_ends_with($pattern, '*')) continue;
  944. $patternPrefix = mb_substr($pattern, 0, mb_strlen($pattern) - 1);
  945. if (str_starts_with($lcAttributeName, $patternPrefix)) {
  946. return true;
  947. }
  948. }
  949. if ($tagName === null) return false;
  950. $lcTagName = mb_strtolower($tagName);
  951. $tagAttributes = $this->allowableTags[$lcTagName];
  952. if ($tagAttributes !== null) {
  953. return ($tagAttributes[$lcAttributeName] ?? null) !== null;
  954. }
  955. return false;
  956. }
  957. /**
  958. * Tests if an attribute value is allowable.
  959. */
  960. public function isValidAttributeValue(?string $tagName, string $attributeName, $attributeValue): bool {
  961. $lcAttributeName = mb_strtolower($attributeName);
  962. $globalPattern = $this->allowableGlobalAttributes[$lcAttributeName] ?? null;
  963. if ($globalPattern !== null) {
  964. return $this->attributeValueMatchesPattern($attributeValue, $globalPattern);
  965. }
  966. foreach ($this->allowableGlobalAttributes as $namePattern => $valuePattern) {
  967. if (str_ends_with($namePattern, '*') && str_starts_with($lcAttributeName, mb_substr($namePattern, 0, mb_strlen($namePattern) - 1))) {
  968. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  969. }
  970. }
  971. if ($tagName === null) return false;
  972. $lcTagName = mb_strtolower($tagName);
  973. $tagAttributes = $this->allowableTags[$lcTagName] ?? null;
  974. if ($tagAttributes === null) return false;
  975. $valuePattern = $tagAttributes[$lcAttributeName] ?? null;
  976. if ($valuePattern === null) return false;
  977. return $this->attributeValueMatchesPattern($attributeValue, $valuePattern);
  978. }
  979. private const permissiveURLRegex = '^\\S+$';
  980. private const integerRegex = '^[\\-]?\\d+$';
  981. private const classListRegex = '^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$';
  982. private function attributeValueMatchesPattern(string|bool $value, string $pattern): bool {
  983. $options = explode('|', $pattern);
  984. foreach ($options as $option) {
  985. switch ($option) {
  986. case '*':
  987. return true;
  988. case '{classlist}':
  989. if (mb_eregi(self::classListRegex, $value)) return true;
  990. break;
  991. case '{int}':
  992. if (mb_eregi(self::integerRegex, $value)) return true;
  993. break;
  994. case '{none}':
  995. if ($value === true) return true;
  996. break;
  997. case '{style}':
  998. if ($this->isValidStyleDeclaration($value)) return true;
  999. break;
  1000. case '{url}':
  1001. if (mb_eregi(self::permissiveURLRegex, $value)) return true;
  1002. break;
  1003. default:
  1004. if ($value === $option) return true;
  1005. break;
  1006. }
  1007. }
  1008. return false;
  1009. }
  1010. /**
  1011. * Tests if a string of one or more style `key: value;` declarations is
  1012. * fully allowable.
  1013. */
  1014. public function isValidStyleDeclaration(string $styles): bool {
  1015. $settings = explode(';', $styles);
  1016. foreach ($settings as $setting) {
  1017. if (mb_strlen(trim($setting)) == 0) continue;
  1018. $parts = explode(':', $setting);
  1019. if (sizeof($parts) != 2) return false;
  1020. $name = trim($parts[0]);
  1021. if (!$this->isValidStyleKey($name)) return false;
  1022. $value = trim($parts[1]);
  1023. if (!$this->isValidStyleValue($name, $value)) return false;
  1024. }
  1025. return true;
  1026. }
  1027. /**
  1028. * Tests if a CSS style key is allowable.
  1029. */
  1030. public function isValidStyleKey(string $key): bool {
  1031. return ($this->allowableStyleKeys[$key] ?? null) !== null;
  1032. }
  1033. /**
  1034. * Tests if a CSS style value is allowable.
  1035. */
  1036. public function isValidStyleValue(string $key, string $value): bool {
  1037. $pattern = $this->allowableStyleKeys[$key] ?? null;
  1038. if ($pattern === null) return false;
  1039. $options = explode('|', $pattern);
  1040. foreach ($options as $option) {
  1041. switch ($option) {
  1042. case '{color}':
  1043. if ($this->isValidCSSColor($value)) return true;
  1044. default:
  1045. if ($value === $option) return true;
  1046. }
  1047. }
  1048. return false;
  1049. }
  1050. private const styleColorRegex = '^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$';
  1051. private function isValidCSSColor(string $value): bool {
  1052. return mb_eregi(self::styleColorRegex, $value);
  1053. }
  1054. }
  1055. /**
  1056. * Represents a single HTML tag. Paired tags are represented separately.
  1057. */
  1058. class MDHTMLTag {
  1059. /**
  1060. * Verbatim string of the original parsed tag. Not modified. Should be
  1061. * considered unsafe for inclusion in the final document. Use `->toString()`
  1062. * instead.
  1063. */
  1064. public string $original;
  1065. public string $tagName;
  1066. public bool $isCloser;
  1067. /**
  1068. * Map of attribute names to value strings.
  1069. */
  1070. public array $attributes;
  1071. /**
  1072. * @param string $original
  1073. * @param string $tagName
  1074. * @param bool $isCloser
  1075. * @param array $attributes
  1076. */
  1077. public function __construct(string $original, string $tagName, bool $isCloser,
  1078. array $attributes) {
  1079. $this->original = $original;
  1080. $this->tagName = $tagName;
  1081. $this->isCloser = $isCloser;
  1082. $this->attributes = $attributes;
  1083. }
  1084. public function __toString(): string {
  1085. if ($this->isCloser) {
  1086. return "</{$this->tagName}>";
  1087. }
  1088. $html = '<';
  1089. $html .= $this->tagName;
  1090. foreach ($this->attributes as $key => $value) {
  1091. $safeName = MDUtils::scrubAttributeName($key);
  1092. if ($value === true) {
  1093. $html .= " {$safeName}";
  1094. } else {
  1095. $escapedValue = MDUtils::escapeHTML("{$value}");
  1096. $html .= " {$safeName}=\"{$escapedValue}\"";
  1097. }
  1098. }
  1099. $html .= '>';
  1100. return $html;
  1101. }
  1102. public function equals($other): bool {
  1103. if (!($other instanceof MDHTMLTag)) return false;
  1104. if ($other->tagName != $this->tagName) return false;
  1105. if ($other->isCloser != $this->isCloser) return false;
  1106. return MDUtils::equal($other->attributes, $this->attributes);
  1107. }
  1108. private const htmlTagNameFirstRegex = '[a-z]';
  1109. private const htmlTagNameMedialRegex = '[a-z0-9]';
  1110. private const htmlAttributeNameFirstRegex = '[a-z]';
  1111. private const htmlAttributeNameMedialRegex = '[a-z0-9-]';
  1112. private const whitespaceCharRegex = '\\s';
  1113. /**
  1114. * Checks the start of the given string for presence of an HTML tag.
  1115. */
  1116. public static function fromLineStart(string $line): ?MDHTMLTag {
  1117. $expectOpenBracket = 0;
  1118. $expectCloserOrName = 1;
  1119. $expectName = 2;
  1120. $expectAttributeNameOrEnd = 3;
  1121. $expectEqualsOrAttributeOrEnd = 4;
  1122. $expectAttributeValue = 5;
  1123. $expectCloseBracket = 6;
  1124. $isCloser = false;
  1125. $tagName = '';
  1126. $attributeName = '';
  1127. $attributeValue = '';
  1128. $attributeQuote = null;
  1129. $attributes = [];
  1130. $fullTag = null;
  1131. $endAttribute = function(bool $unescape=false) use (&$attributes,
  1132. &$attributeName, &$attributeValue, &$attributeQuote) {
  1133. if (mb_strlen($attributeName) > 0) {
  1134. if (mb_strlen($attributeValue) > 0 || $attributeQuote !== null) {
  1135. $attributes[$attributeName] = $unescape ?
  1136. html_entity_decode($attributeValue, ENT_QUOTES |
  1137. ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8') :
  1138. $attributeValue;
  1139. } else {
  1140. $attributes[$attributeName] = true;
  1141. }
  1142. }
  1143. $attributeName = '';
  1144. $attributeValue = '';
  1145. $attributeQuote = null;
  1146. };
  1147. $expect = $expectOpenBracket;
  1148. for ($p = 0; $p < mb_strlen($line) && $fullTag === null; $p++) {
  1149. $ch = mb_substr($line, $p, 1);
  1150. $isWhitespace = mb_eregi(self::whitespaceCharRegex, $ch);
  1151. switch ($expect) {
  1152. case $expectOpenBracket:
  1153. if ($ch != '<') return null;
  1154. $expect = $expectCloserOrName;
  1155. break;
  1156. case $expectCloserOrName:
  1157. if ($ch == '/') {
  1158. $isCloser = true;
  1159. } else {
  1160. $p--;
  1161. }
  1162. $expect = $expectName;
  1163. break;
  1164. case $expectName:
  1165. if (mb_strlen($tagName) == 0) {
  1166. if (!mb_eregi(self::htmlTagNameFirstRegex, $ch)) return null;
  1167. $tagName .= $ch;
  1168. } else {
  1169. if (mb_eregi(self::htmlTagNameMedialRegex, $ch)) {
  1170. $tagName .= $ch;
  1171. } else {
  1172. $p--;
  1173. $expect = ($isCloser) ? $expectCloseBracket :
  1174. $expectAttributeNameOrEnd;
  1175. }
  1176. }
  1177. break;
  1178. case $expectAttributeNameOrEnd:
  1179. if (mb_strlen($attributeName) == 0) {
  1180. if ($isWhitespace) {
  1181. // skip whitespace
  1182. } elseif ($ch == '/') {
  1183. $expect = $expectCloseBracket;
  1184. } elseif ($ch == '>') {
  1185. $fullTag = mb_substr($line, 0, $p + 1);
  1186. break;
  1187. } elseif (mb_eregi(self::htmlAttributeNameFirstRegex, $ch)) {
  1188. $attributeName .= $ch;
  1189. } else {
  1190. return null;
  1191. }
  1192. } elseif ($isWhitespace) {
  1193. $expect = $expectEqualsOrAttributeOrEnd;
  1194. } elseif ($ch == '/') {
  1195. $endAttribute();
  1196. $expect = $expectCloseBracket;
  1197. } elseif ($ch == '>') {
  1198. $endAttribute();
  1199. $fullTag = mb_substr($line, 0, $p + 1);
  1200. break;
  1201. } elseif ($ch == '=') {
  1202. $expect = $expectAttributeValue;
  1203. } elseif (mb_eregi(self::htmlAttributeNameMedialRegex, $ch)) {
  1204. $attributeName .= $ch;
  1205. } else {
  1206. return null;
  1207. }
  1208. break;
  1209. case $expectEqualsOrAttributeOrEnd:
  1210. if ($ch == '=') {
  1211. $expect = $expectAttributeValue;
  1212. } elseif ($isWhitespace) {
  1213. // skip whitespace
  1214. } elseif ($ch == '/') {
  1215. $expect = $expectCloseBracket;
  1216. } elseif ($ch == '>') {
  1217. $fullTag = mb_substr($line, 0, $p + 1);
  1218. break;
  1219. } elseif (mb_eregi(self::htmlAttributeNameFirstRegex, $ch)) {
  1220. $endAttribute();
  1221. $expect = $expectAttributeNameOrEnd;
  1222. $p--;
  1223. }
  1224. break;
  1225. case $expectAttributeValue:
  1226. if (mb_strlen($attributeValue) == 0) {
  1227. if ($attributeQuote === null) {
  1228. if ($isWhitespace) {
  1229. // skip whitespace
  1230. } elseif ($ch == '"' || $ch == "'") {
  1231. $attributeQuote = $ch;
  1232. } else {
  1233. $attributeQuote = ''; // explicitly unquoted
  1234. $p--;
  1235. }
  1236. } else {
  1237. if ($ch === $attributeQuote) {
  1238. // Empty string
  1239. $endAttribute($attributeQuote != '');
  1240. $expect = $expectAttributeNameOrEnd;
  1241. } elseif ($attributeQuote === '' && ($ch == '/' || $ch == '>')) {
  1242. return null;
  1243. } else {
  1244. $attributeValue .= $ch;
  1245. }
  1246. }
  1247. } else {
  1248. if ($ch === $attributeQuote) {
  1249. $endAttribute($attributeQuote != '');
  1250. $expect = $expectAttributeNameOrEnd;
  1251. } elseif ($attributeQuote === '' && $isWhitespace) {
  1252. $endAttribute();
  1253. $expect = $expectAttributeNameOrEnd;
  1254. } else {
  1255. $attributeValue .= $ch;
  1256. }
  1257. }
  1258. break;
  1259. case $expectCloseBracket:
  1260. if ($isWhitespace) {
  1261. // ignore whitespace
  1262. } elseif ($ch == '>') {
  1263. $fullTag = mb_substr($line, 0, $p + 1);
  1264. break;
  1265. }
  1266. break;
  1267. }
  1268. }
  1269. if ($fullTag === null) return null;
  1270. $endAttribute();
  1271. return new MDHTMLTag($fullTag, $tagName, $isCloser, $attributes);
  1272. }
  1273. }
  1274. /**
  1275. * Represents HTML modifications to a node, such as CSS classes to add or
  1276. * additional attributes. See `MDHTMLFilter->scrubModifier()` to remove disallowed
  1277. * values.
  1278. */
  1279. class MDTagModifier {
  1280. /**
  1281. * Verbatim markdown syntax. Unmodified by changes to other properties.
  1282. */
  1283. public string $original;
  1284. /** @var string[] */
  1285. public array $cssClasses = [];
  1286. public ?string $cssId = null;
  1287. public array $cssStyles = [];
  1288. public array $attributes = [];
  1289. private const leadingClassRegex = '^\\{([^}]+?)}';
  1290. private const trailingClassRegex = '^(.*?)\\s*\\{([^}]+?)}\\s*$';
  1291. private const classRegex = '^\\.([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=classname
  1292. private const idRegex = '^#([a-z_\\-][a-z0-9_\\-]*?)$'; // 1=id
  1293. private const attributeRegex = '^([a-z0-9]+?)=([^\\s\\}]+?)$'; // 1=attribute name, 2=attribute value
  1294. public function applyTo(MDNode $node) {
  1295. if ($node instanceof MDNode) {
  1296. foreach ($this->cssClasses as $cssClass) {
  1297. $node->addClass($cssClass);
  1298. }
  1299. if ($this->cssId) $node->cssId = $this->cssId;
  1300. foreach ($this->attributes as $name => $value) {
  1301. $node->attributes[$name] = $value;
  1302. }
  1303. foreach ($this->cssStyles as $name => $value) {
  1304. $node->cssStyles[$name] = $value;
  1305. }
  1306. }
  1307. }
  1308. /**
  1309. * Adds a CSS class. If already present it will not be duplicated.
  1310. */
  1311. public function addClass(string $cssClass): bool {
  1312. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  1313. array_push($this->cssClasses, $cssClass);
  1314. return true;
  1315. }
  1316. /**
  1317. * Removes a CSS class.
  1318. */
  1319. public function removeClass(string $cssClass): bool {
  1320. $beforeLength = sizeof($this->cssClasses);
  1321. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  1322. return sizeof($this->cssClasses) != $beforeLength;
  1323. }
  1324. public function equals($other): bool {
  1325. if (!($other instanceof MDTagModifier)) return false;
  1326. if (!MDUtils::equal($other->cssClasses, $this->cssClasses)) return false;
  1327. if ($other->cssId !== $this->cssId) return false;
  1328. if (!MDUtils::equal($other->attributes, $this->attributes)) return false;
  1329. return true;
  1330. }
  1331. public function __toString(): string {
  1332. return $this->original;
  1333. }
  1334. private static function styleToObject(string $styleValue): array {
  1335. $pairs = explode(';', $styleValue);
  1336. $styles = [];
  1337. foreach ($pairs as $pair) {
  1338. $keyAndValue = explode(':', $pair);
  1339. if (sizeof($keyAndValue) != 2) continue;
  1340. $styles[$keyAndValue[0]] = $keyAndValue[1];
  1341. }
  1342. return $styles;
  1343. }
  1344. private static function fromContents(string $contents): ?MDTagModifier {
  1345. $modifierTokens = mb_split('\\s+', $contents);
  1346. $mod = new MDTagModifier();
  1347. $mod->original = "{{$contents}}";
  1348. foreach ($modifierTokens as $token) {
  1349. if (trim($token) == '') continue;
  1350. if (mb_eregi(self::classRegex, $token, $groups)) {
  1351. $mod->addClass($groups[1]);
  1352. } elseif (mb_eregi(self::idRegex, $token, $groups)) {
  1353. $mod->cssId = $groups[1];
  1354. } elseif (mb_eregi(self::attributeRegex, $token, $groups)) {
  1355. if ($groups[1] == 'style') {
  1356. $mod->cssStyles = self::styleToObject($groups[2]);
  1357. } else {
  1358. $mod->attributes[$groups[1]] = $groups[2];
  1359. }
  1360. } else {
  1361. return null;
  1362. }
  1363. }
  1364. return $mod;
  1365. }
  1366. /**
  1367. * Extracts block modifier from end of a line. Always returns a 2-element
  1368. * tuple array:
  1369. * - `0`: the line without the modifier
  1370. * - `1`: an `MDTagModifier` if found or `null` if not
  1371. *
  1372. * @param string $line
  1373. * @param ?MDState $state
  1374. * @return array tuple with remaining line and `MDTagModifier` or `null`
  1375. */
  1376. public static function fromLine(string $line, ?MDState $state): array {
  1377. if ($state) {
  1378. $found = false;
  1379. foreach ($state->root()->readersByBlockPriority as $reader) {
  1380. if ($reader instanceof MDModifierReader) {
  1381. $found = true;
  1382. break;
  1383. }
  1384. }
  1385. if (!$found) return [ $line, null ];
  1386. }
  1387. if (!mb_eregi(self::trailingClassRegex, $line, $groups)) return [ $line, null ];
  1388. $bareLine = $groups[1];
  1389. $mod = self::fromContents($groups[2]);
  1390. return [ $bareLine, $mod ];
  1391. }
  1392. /**
  1393. * Attempts to extract modifier from head of string.
  1394. */
  1395. public static function fromStart(string $line): ?MDTagModifier {
  1396. if (!mb_eregi(self::leadingClassRegex, $line, $groups)) return null;
  1397. return self::fromContents($groups[1]);
  1398. }
  1399. /**
  1400. * Discards any modifiers from a line and returns what remains.
  1401. */
  1402. public static function strip(string $line): string {
  1403. if (!mb_eregi(self::trailingClassRegex, $line, $groups)) return $line;
  1404. return $groups[1];
  1405. }
  1406. }
  1407. // -- Readers ---------------------------------------------------------------
  1408. /**
  1409. * Base class for readers of various markdown syntax. A `Markdown` instance can
  1410. * be created with any combination of subclasses of these to customize the
  1411. * flavor of markdown parsed.
  1412. *
  1413. * @see {@link custom.md} for details on subclassing
  1414. */
  1415. class MDReader {
  1416. /**
  1417. * Called before processing begins. `$state->lines` is populated and the
  1418. * line pointer `$state->p` will be at `0`.
  1419. *
  1420. * Default implementation does nothing.
  1421. */
  1422. public function preProcess(MDState $state) {}
  1423. /**
  1424. * Attempts to read an `MDBlockNode` subclass at the current line pointer
  1425. * `$state->p`. Only matches if the block pattern starts at the line pointer,
  1426. * not elsewhere in the `$state->lines` array. If a block is found, `$state->p`
  1427. * should be incremented to the next line _after_ the block structure and
  1428. * a `MDBlockNode` subclass instance is returned. If no block is found,
  1429. * returns `null`.
  1430. *
  1431. * Default implementation always returns `null`.
  1432. */
  1433. public function readBlock(MDState $state): ?MDBlockNode { return null; }
  1434. /**
  1435. * Attempts to read an inline token from the beginning of `$line`. Only the
  1436. * start of the given `$line` is considered. If a matching token is found, an
  1437. * `MDToken` is returned. Otherwise `null` is returned.
  1438. *
  1439. * Default implementation always returns `null`.
  1440. */
  1441. public function readToken(MDState $state, string $line): ?MDToken { return null; }
  1442. /**
  1443. * Attempts to find a pattern anywhere in `$tokens` and perform a _single_
  1444. * in-place substitution with one or more `MDNode` subclass instances.
  1445. * If a substitution is performed, must return `true`, otherwise `false`.
  1446. *
  1447. * Default implementation always returns `false`.
  1448. *
  1449. * @param MDState $state
  1450. * @param int $pass what substitution pass this is, starting with 1
  1451. * @param (MDToken|MDInlineNode)[] $tokens mixed array of `MDToken` and
  1452. * `MDInlineNode` elements
  1453. * @return bool `true` if a substitution was performed, `false` if not
  1454. */
  1455. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool { return false; }
  1456. /**
  1457. * Called after all parsing has completed. An array `$blocks` is passed of
  1458. * all the top-level `MDBlockNode` elements in the document which this
  1459. * method can traverse or alter in-place via `array_splice` operations if
  1460. * necessary.
  1461. *
  1462. * `MDNode->visitChildren` is useful for recursively looking for certain
  1463. * `MDNode` instances. `MDNode::replaceNodes` is useful for swapping in
  1464. * replacements.
  1465. *
  1466. * Default implementation does nothing.
  1467. *
  1468. * @param MDState $state
  1469. * @param MDBlockNode[] $blocks
  1470. */
  1471. public function postProcess(MDState $state, array &$blocks) {}
  1472. /**
  1473. * Can be overridden to influence ordering of this reader with respect to
  1474. * another during the block parsing phase. Return `-1` to be ordered before
  1475. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1476. * Only return non-`0` values to resolve specific conflicts.
  1477. *
  1478. * Default implementation always returns `0` (no preference).
  1479. *
  1480. * @param MDReader $other
  1481. * @return int a negative, positive, or 0 value to be ordered before,
  1482. * after, or anwhere relative to `$other`, respectively
  1483. */
  1484. public function compareBlockOrdering(MDReader $other): int {
  1485. return 0;
  1486. }
  1487. /**
  1488. * Can be overridden to influence ordering of this reader with respect to
  1489. * another during the tokenizing phase. Return `-1` to be ordered before
  1490. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1491. * Only return non-`0` values to resolve specific conflicts.
  1492. *
  1493. * Default implementation always returns `0` (no preference).
  1494. *
  1495. * @param MDReader $other
  1496. * @return int a negative, positive, or 0 value to be ordered before,
  1497. * after, or anwhere relative to `$other`, respectively
  1498. */
  1499. public function compareTokenizeOrdering(MDReader $other): int {
  1500. return 0;
  1501. }
  1502. /**
  1503. * Can be overridden to influence ordering of this reader with respect to
  1504. * another during the substitution phase. Return `-1` to be ordered before
  1505. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1506. * Only return non-`0` values to resolve specific conflicts.
  1507. *
  1508. * Readers are sorted within each substitution pass. All pass 1 readers are
  1509. * processed first, then all pass 2 readers, etc. The number of passes this
  1510. * reader participates in is dictated by `substitionPassCount()`.
  1511. *
  1512. * Default implementation always returns `0` (no preference).
  1513. *
  1514. * @param MDReader $other
  1515. * @param int $pass substitution pass, with numbering starting at `1`
  1516. * @return int a negative, positive, or 0 value to be ordered before,
  1517. * after, or anwhere relative to `$other`, respectively
  1518. */
  1519. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  1520. return 0;
  1521. }
  1522. /**
  1523. * How many substitution passes this reader requires. Substitution allows
  1524. * all pass 1 readers to process first, then all pass 2 readers, etc.
  1525. */
  1526. public function substitutionPassCount(): int { return 1; }
  1527. /**
  1528. * For sorting readers with ordering preferences. The `compare` methods
  1529. * don't have the properties of normal sorting compares so need to sort
  1530. * differently.
  1531. *
  1532. * @param MDReader[] $arr array to sort
  1533. * @param callable $compareFn comparison function, taking two array element
  1534. * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
  1535. * respectively
  1536. * @param callable $idFn function for returning a unique hashable id for
  1537. * the array element
  1538. * @return MDReader[] sorted array
  1539. */
  1540. private static function kahnTopologicalSort(array $arr, callable $compareFn,
  1541. callable $idFn): array {
  1542. $graph = [];
  1543. $inDegrees = [];
  1544. $valuesById = [];
  1545. // Build the graph and compute in-degrees
  1546. foreach ($arr as $index => $elem) {
  1547. $id = $idFn($elem);
  1548. $graph[$id] = [];
  1549. $inDegrees[$id] = 0;
  1550. $valuesById[$id] = $elem;
  1551. }
  1552. for ($i = 0; $i < sizeof($arr); $i++) {
  1553. $elemA = $arr[$i];
  1554. $idA = $idFn($elemA);
  1555. for ($j = 0; $j < sizeof($arr); $j++) {
  1556. if ($i === $j) continue;
  1557. $elemB = $arr[$j];
  1558. $idB = $idFn($elemB);
  1559. $comparisonResult = $compareFn($elemA, $elemB);
  1560. if ($comparisonResult < 0) {
  1561. array_push($graph[$idA], $idB);
  1562. $inDegrees[$idB]++;
  1563. } elseif ($comparisonResult > 0) {
  1564. array_push($graph[$idB], $idA);
  1565. $inDegrees[$idA]++;
  1566. }
  1567. }
  1568. }
  1569. // Initialize the queue with zero-inDegree nodes
  1570. $queue = [];
  1571. foreach ($inDegrees as $elemId => $degree) {
  1572. if ($degree === 0) {
  1573. array_push($queue, $elemId);
  1574. }
  1575. }
  1576. // Process the queue and build the topological order list
  1577. $sorted = [];
  1578. while (sizeof($queue) > 0) {
  1579. $elemId = array_shift($queue);
  1580. array_push($sorted, $valuesById[$elemId]);
  1581. unset($valuesById[$elemId]);
  1582. foreach ($graph[$elemId] as $neighbor) {
  1583. $inDegrees[$neighbor]--;
  1584. if ($inDegrees[$neighbor] === 0) {
  1585. array_push($queue, $neighbor);
  1586. }
  1587. }
  1588. }
  1589. // Anything left over can go at the end. No ordering dependencies.
  1590. foreach ($valuesById as $elemId => $value) {
  1591. array_push($sorted, $value);
  1592. }
  1593. return $sorted;
  1594. }
  1595. /**
  1596. * Returns a sorted array of readers by their block priority preferences.
  1597. *
  1598. * @param MDReader[] $readers
  1599. * @return MDReader[] sorted readers
  1600. */
  1601. public static function sortReaderForBlocks(array &$readers): array {
  1602. $sorted = $readers;
  1603. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1604. return $a->compareBlockOrdering($b);
  1605. }, fn($elem) => MDUtils::typename($elem));
  1606. }
  1607. /**
  1608. * Returns a sorted array of readers by their tokenization priority preferences.
  1609. *
  1610. * @param MDReader[] $readers
  1611. * @return MDReader[] sorted readers
  1612. */
  1613. public static function sortReadersForTokenizing(array &$readers): array {
  1614. $sorted = $readers;
  1615. return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
  1616. return $a->compareTokenizeOrdering($b);
  1617. }, fn($elem) => MDUtils::typename($elem));
  1618. }
  1619. /**
  1620. * Returns a sorted array of tuples (arrays) containing the substitution
  1621. * pass number and reader instance, sorted by their substitution priority
  1622. * preferences.
  1623. *
  1624. * For readers with `substitutionPassCount()` > `1`, the same reader will
  1625. * appear multiple times in the resulting array, one per pass.
  1626. *
  1627. * @param MDReader[] $readers
  1628. * @return MDReader[] sorted array of tuples with the pass number and
  1629. * reader instance in each
  1630. */
  1631. public static function sortReadersForSubstitution(array &$readers): array {
  1632. $tuples = [];
  1633. $maxPass = 1;
  1634. foreach ($readers as $reader) {
  1635. $passCount = $reader->substitutionPassCount();
  1636. $maxPass = max($maxPass, $passCount);
  1637. for ($pass = 1; $pass <= $passCount; $pass++) {
  1638. array_push($tuples, [ $pass, $reader ]);
  1639. }
  1640. }
  1641. $result = [];
  1642. for ($pass = 1; $pass <= $maxPass; $pass++) {
  1643. $readersThisPass = array_values(array_filter($tuples, fn($tup) => $tup[0] === $pass));
  1644. $passResult = self::kahnTopologicalSort($readersThisPass,
  1645. function(array $a, array $b) use ($pass): int {
  1646. $aReader = $a[1];
  1647. $bReader = $b[1];
  1648. return $aReader->compareSubstituteOrdering($bReader, $pass);
  1649. }, fn($elem) => MDUtils::typename($elem[1]));
  1650. $result = array_merge($result, $passResult);
  1651. }
  1652. return $result;
  1653. }
  1654. }
  1655. /**
  1656. * Reads markdown blocks for headings denoted with the underline syntax.
  1657. *
  1658. * Supports `MDTagModifier` suffixes.
  1659. */
  1660. class MDUnderlinedHeadingReader extends MDReader {
  1661. public function readBlock(MDState $state): ?MDBlockNode {
  1662. $p = $state->p;
  1663. if (!$state->hasLines(2)) return null;
  1664. $modifier;
  1665. $contentLine = trim($state->lines[$p++]);
  1666. [$contentLine, $modifier] = MDTagModifier::fromLine($contentLine, $state);
  1667. $underLine = trim($state->lines[$p++]);
  1668. if ($contentLine == '') return null;
  1669. if (mb_eregi('^=+$', $underLine)) {
  1670. $state->p = $p;
  1671. $block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
  1672. if ($modifier) $modifier->applyTo($block);
  1673. return $block;
  1674. }
  1675. if (mb_eregi('^\-+$', $underLine)) {
  1676. $state->p = $p;
  1677. $block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
  1678. if ($modifier) $modifier->applyTo($block);
  1679. return $block;
  1680. }
  1681. return null;
  1682. }
  1683. }
  1684. /**
  1685. * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
  1686. * to 6 are supported.
  1687. *
  1688. * Supports `MDTagModifier` suffixes.
  1689. */
  1690. class MDHashHeadingReader extends MDReader {
  1691. private const hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$'; // 1=hashes, 2=content
  1692. public function readBlock(MDState $state): ?MDBlockNode {
  1693. $p = $state->p;
  1694. $line = $state->lines[$p++];
  1695. $modifier;
  1696. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1697. if (!mb_eregi(self::hashHeadingRegex, $line, $groups)) return null;
  1698. $state->p = $p;
  1699. $level = mb_strlen($groups[1]);
  1700. $content = $groups[2];
  1701. $block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
  1702. if ($modifier) $modifier->applyTo($block);
  1703. return $block;
  1704. }
  1705. }
  1706. /**
  1707. * Reads subtext blocks. Subtext is smaller, fainter text for things like
  1708. * disclaimers or sources.
  1709. *
  1710. * Supports `MDTagModifier` suffixes.
  1711. */
  1712. class MDSubtextReader extends MDReader {
  1713. private const subtextRegex = '^\\-#\\s*(.*?)\\s*$'; // 1=content
  1714. public function readBlock(MDState $state): ?MDBlockNode {
  1715. $p = $state->p;
  1716. $line = $state->lines[$p++];
  1717. $modifier;
  1718. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1719. if (!mb_eregi(self::subtextRegex, $line, $groups)) return null;
  1720. $state->p = $p;
  1721. $content = $groups[1];
  1722. $block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
  1723. if ($modifier) $modifier->applyTo($block);
  1724. return $block;
  1725. }
  1726. public function compareBlockOrdering(MDReader $other): int {
  1727. if ($other instanceof MDUnorderedListReader) {
  1728. return -1;
  1729. }
  1730. return 0;
  1731. }
  1732. }
  1733. /**
  1734. * Reads markdown blocks for blockquoted text.
  1735. */
  1736. class MDBlockQuoteReader extends MDReader {
  1737. public function readBlock(MDState $state): ?MDBlockNode {
  1738. $blockquoteLines = [];
  1739. $p = $state->p;
  1740. while ($p < sizeof($state->lines)) {
  1741. $line = $state->lines[$p++];
  1742. if (str_starts_with($line, ">")) {
  1743. array_push($blockquoteLines, $line);
  1744. } else {
  1745. break;
  1746. }
  1747. }
  1748. if (sizeof($blockquoteLines) == 0) return null;
  1749. $contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '',
  1750. mb_substr($line, 1)), $blockquoteLines);
  1751. $substate = $state->copy($contentLines);
  1752. $quotedBlocks = $substate->readBlocks();
  1753. $state->p = $p;
  1754. return new MDBlockquoteNode($quotedBlocks);
  1755. }
  1756. }
  1757. /**
  1758. * Internal abstract base class for ordered and unordered lists.
  1759. */
  1760. class _MDListReader extends MDReader {
  1761. private static function readItemLines(MDState $state, int $firstLineStartPos): array {
  1762. $p = $state->p;
  1763. $lines = [];
  1764. $seenBlankLine = false;
  1765. $stripTrailingBlankLines = true;
  1766. while ($state->hasLines(1, $p)) {
  1767. $isFirstLine = ($p == $state->p);
  1768. $line = $state->lines[$p++];
  1769. if ($isFirstLine) {
  1770. $line = mb_substr($line, $firstLineStartPos);
  1771. }
  1772. if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
  1773. // Found next list item
  1774. $stripTrailingBlankLines = false; // because this signals extra spacing intended
  1775. break;
  1776. }
  1777. $isBlankLine = trim($line) == '';
  1778. $isIndented = mb_eregi('^\\s+\\S', $line);
  1779. if ($isBlankLine) {
  1780. $seenBlankLine = true;
  1781. } elseif (!$isIndented && $seenBlankLine) {
  1782. // Post-list content
  1783. break;
  1784. }
  1785. array_push($lines, $line);
  1786. }
  1787. $lines = MDUtils::withoutTrailingBlankLines($lines);
  1788. return MDUtils::stripIndent($lines);
  1789. }
  1790. protected function readListItemContent(MDState $state, int $firstLineStartPos): MDNode|array {
  1791. $itemLines = $this->readItemLines($state, $firstLineStartPos);
  1792. $state->p += max(sizeof($itemLines), 1);
  1793. if (sizeof($itemLines) == 1) {
  1794. return $state->inlineMarkdownToNodes($itemLines[0]);
  1795. }
  1796. $hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
  1797. if ($hasBlankLines) {
  1798. $substate = $state->copy($itemLines);
  1799. return $substate->readBlocks();
  1800. }
  1801. // Multiline content with no blank lines. Search for new block
  1802. // boundaries without the benefit of a blank line to demarcate it.
  1803. for ($p = 1; $p < sizeof($itemLines); $p++) {
  1804. $line = $itemLines[$p];
  1805. if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
  1806. // Nested list found
  1807. $firstNodes = $state->inlineMarkdownToNodes(
  1808. implode("\n", array_slice($itemLines, 0, $p)));
  1809. $substate = $state->copy(array_slice($itemLines, $p));
  1810. $blocks = $substate->readBlocks();
  1811. return new MDBlockNode(array_merge($firstNodes, $blocks));
  1812. }
  1813. }
  1814. // Ok, give up and just do a standard block read
  1815. {
  1816. $substate = $state->copy($itemLines);
  1817. return $substate->readBlocks();
  1818. }
  1819. }
  1820. public function readBlock(MDState $state): ?MDBlockNode {
  1821. $className = MDUtils::typename($this);
  1822. throw new Error("Abstract readBlock must be overridden in {$className}");
  1823. }
  1824. }
  1825. /**
  1826. * Block reader for unordered (bulleted) lists.
  1827. */
  1828. class MDUnorderedListReader extends _MDListReader {
  1829. private const unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$'; // 1=bullet, 2=content
  1830. private function readUnorderedListItem(MDState $state): ?MDListItemNode {
  1831. if (!$state->hasLines(1)) return null;
  1832. $p = $state->p;
  1833. $line = $state->lines[$p];
  1834. if (!mb_eregi(self::unorderedListRegex, $line, $groups)) return null;
  1835. $firstLineOffset = mb_strlen($groups[1]);
  1836. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
  1837. }
  1838. public function readBlock(MDState $state): ?MDBlockNode {
  1839. $items = [];
  1840. $item = null;
  1841. do {
  1842. $item = $this->readUnorderedListItem($state);
  1843. if ($item) array_push($items, $item);
  1844. } while ($item);
  1845. if (sizeof($items) == 0) return null;
  1846. return new MDUnorderedListNode($items);
  1847. }
  1848. }
  1849. /**
  1850. * Block reader for ordered (numbered) lists. The number of the first item is
  1851. * used to begin counting. The subsequent items increase by 1, regardless of
  1852. * their value.
  1853. */
  1854. class MDOrderedListReader extends _MDListReader {
  1855. private const orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$'; // 1=number, 2=dot, 3=content
  1856. private function readOrderedListItem(MDState $state): ?MDListItemNode {
  1857. if (!$state->hasLines(1)) return null;
  1858. $p = $state->p;
  1859. $line = $state->lines[$p];
  1860. if (!mb_eregi(self::orderedListRegex, $line, $groups)) return null;
  1861. $ordinal = intval($groups[1]);
  1862. $firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
  1863. return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
  1864. }
  1865. public function readBlock(MDState $state): ?MDBlockNode {
  1866. $items = [];
  1867. $item = null;
  1868. do {
  1869. $item = $this->readOrderedListItem($state);
  1870. if ($item) array_push($items, $item);
  1871. } while ($item);
  1872. if (sizeof($items) == 0) return null;
  1873. return new MDOrderedListNode($items, $items[0]->ordinal);
  1874. }
  1875. }
  1876. /**
  1877. * Block reader for code blocks denoted by pairs of triple tickmarks. If
  1878. * a programming language name, _xyz_, immediately follows the backticks, a
  1879. * `language-xyz` CSS class will be added to the resulting `<code>`
  1880. * element.
  1881. *
  1882. * Supports `MDTagModifier` suffix.
  1883. */
  1884. class MDFencedCodeBlockReader extends MDReader {
  1885. public function readBlock(MDState $state): ?MDBlockNode {
  1886. if (!$state->hasLines(2)) return null;
  1887. $p = $state->p;
  1888. $openFenceLine = $state->lines[$p++];
  1889. [$openFenceLine, $modifier] = MDTagModifier::fromLine($openFenceLine, $state);
  1890. if (!mb_eregi('```\\s*([a-z0-9]*)\\s*$', $openFenceLine, $groups)) return null;
  1891. $language = $groups[1] !== false && mb_strlen($groups[1]) > 0 ? $groups[1] : null;
  1892. $codeLines = [];
  1893. while ($state->hasLines(1, $p)) {
  1894. $line = $state->lines[$p++];
  1895. if (trim($line) == '```') {
  1896. $state->p = $p;
  1897. $block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
  1898. if ($modifier) $modifier->applyTo($block);
  1899. return $block;
  1900. }
  1901. array_push($codeLines, $line);
  1902. }
  1903. return null;
  1904. }
  1905. }
  1906. /**
  1907. * Block reader for code blocks denoted by indenting text.
  1908. */
  1909. class MDIndentedCodeBlockReader extends MDReader {
  1910. public function readBlock(MDState $state): ?MDBlockNode {
  1911. $p = $state->p;
  1912. $codeLines = [];
  1913. while ($state->hasLines(1, $p)) {
  1914. $line = $state->lines[$p++];
  1915. if (MDUtils::countIndents($line, true) < 1) {
  1916. $p--;
  1917. break;
  1918. }
  1919. array_push($codeLines, MDUtils::stripIndent($line));
  1920. }
  1921. if (sizeof($codeLines) == 0) return null;
  1922. $state->p = $p;
  1923. return new MDCodeBlockNode(implode("\n", $codeLines));
  1924. }
  1925. }
  1926. /**
  1927. * Block reader for horizontal rules. Composed of three or more hypens or
  1928. * asterisks on a line by themselves, with or without intermediate whitespace.
  1929. */
  1930. class MDHorizontalRuleReader extends MDReader {
  1931. private const horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
  1932. public function readBlock(MDState $state): ?MDBlockNode {
  1933. $p = $state->p;
  1934. $line = $state->lines[$p++];
  1935. [$line, $modifier] = MDTagModifier::fromLine($line, $state);
  1936. if (mb_eregi(self::horizontalRuleRegex, $line)) {
  1937. $state->p = $p;
  1938. $block = new MDHorizontalRuleNode();
  1939. if ($modifier) $modifier->applyTo($block);
  1940. return $block;
  1941. }
  1942. return null;
  1943. }
  1944. public function compareBlockOrdering(MDReader $other): int {
  1945. if ($other instanceof MDUnorderedListReader) {
  1946. return -1;
  1947. }
  1948. return 0;
  1949. }
  1950. }
  1951. /**
  1952. * Block reader for tables.
  1953. *
  1954. * Supports `MDTagModifier` suffix.
  1955. */
  1956. class MDTableReader extends MDReader {
  1957. /**
  1958. * If cell contents begin with `=`, treat entire contents as plaintext.
  1959. * Used by spreadsheet add-on to prevent equation operators from being
  1960. * interpreted as markdown.
  1961. * @type {boolean}
  1962. */
  1963. public bool $preferFormulas = false;
  1964. private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
  1965. if (!$state->hasLines(1)) return null;
  1966. $p = $state->p;
  1967. $line = MDTagModifier::strip(trim($state->lines[$p++]));
  1968. if (!mb_eregi('.*\\|.*', $line)) return null;
  1969. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  1970. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  1971. $cellTokens = explode('|', $line);
  1972. $cells = array_map(function($token) use ($state, $isHeader) {
  1973. $trimmedToken = trim($token);
  1974. if ($this->preferFormulas && strpos($trimmedToken, '=') !== false) {
  1975. $content = $this->preserveFormula($state, $trimmedToken);
  1976. if ($content === null) {
  1977. $content = $state->inlineMarkdownToNode($trimmedToken);
  1978. }
  1979. } else {
  1980. $content = $state->inlineMarkdownToNode($trimmedToken);
  1981. }
  1982. return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
  1983. }, $cellTokens);
  1984. $state->p = $p;
  1985. return new MDTableRowNode($cells);
  1986. }
  1987. /**
  1988. * @param MDState $state
  1989. * @param string $cellContents
  1990. * @return ?MDNode
  1991. */
  1992. private function preserveFormula(MDState $state, string $cellContents): ?MDNode {
  1993. // Up to three prefix punctuation patterns, formula, then three matching
  1994. // suffixes. Not guaranteed to catch every possible syntax but an awful lot.
  1995. // Using preg_match instead for... reasons.
  1996. $regex = '/^([^a-z0-9\\s]*)([^a-z0-9\\s]*)([^a-z0-9\\s]*)(=.*)\\3\\2\\1$/i';
  1997. if (!preg_match($regex, $cellContents, $groups)) {
  1998. return null;
  1999. }
  2000. $prefix = $groups[1] . $groups[2] . $groups[3];
  2001. $formula = $groups[4];
  2002. if ($prefix === '') {
  2003. return new MDTextNode($formula);
  2004. }
  2005. $suffix = $groups[3] . $groups[2] . $groups[1];
  2006. // Parse substitute markdown with the same prefix and suffix but just
  2007. // an "x" as content. We'll swap in the unaltered formula into the
  2008. // parsed nodes.
  2009. $tempInline = $prefix . 'x' . $suffix;
  2010. $tempNodes = $state->inlineMarkdownToNodes($tempInline);
  2011. if (count($tempNodes) != 1) return null;
  2012. $foundText = false;
  2013. if ($tempNodes[0] instanceof MDTextNode && $tempNodes[0]->text === 'x') {
  2014. $tempNodes[0]->text = $formula;
  2015. $foundText = true;
  2016. } else {
  2017. $tempNodes[0]->visitChildren(function($node) use ($formula, &$foundText) {
  2018. if ($node instanceof MDTextNode && $node->text === 'x') {
  2019. $node->text = $formula;
  2020. $foundText = true;
  2021. }
  2022. });
  2023. }
  2024. if (!$foundText) return null;
  2025. return $tempNodes[0];
  2026. }
  2027. /**
  2028. * @param string $line
  2029. * @return string[]
  2030. */
  2031. private function parseColumnAlignments(string $line): array {
  2032. $line = trim($line);
  2033. if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
  2034. if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
  2035. return array_map(function($token) {
  2036. if (str_starts_with($token, ':')) {
  2037. if (str_ends_with($token, ':')) {
  2038. return 'center';
  2039. }
  2040. return 'left';
  2041. } elseif (str_ends_with($token, ':')) {
  2042. return 'right';
  2043. }
  2044. return null;
  2045. }, mb_split('\\s*\\|\\s*', $line));
  2046. }
  2047. private const tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
  2048. public function readBlock(MDState $state): ?MDBlockNode {
  2049. if (!$state->hasLines(2)) return null;
  2050. $startP = $state->p;
  2051. $firstLine = $state->lines[$startP];
  2052. $modifier = MDTagModifier::fromLine($firstLine, $state)[1];
  2053. $headerRow = $this->readTableRow($state, true);
  2054. if ($headerRow === null) {
  2055. $state->p = $startP;
  2056. return null;
  2057. }
  2058. $dividerLine = $state->lines[$state->p++];
  2059. if (!mb_eregi(self::tableDividerRegex, $dividerLine, $dividerGroups)) {
  2060. $state->p = $startP;
  2061. return null;
  2062. }
  2063. $columnAlignments = $this->parseColumnAlignments($dividerLine);
  2064. $bodyRows = [];
  2065. while ($state->hasLines(1)) {
  2066. $row = $this->readTableRow($state, false);
  2067. if ($row === null) break;
  2068. array_push($bodyRows, $row);
  2069. }
  2070. $table = new MDTableNode($headerRow, $bodyRows);
  2071. $table->columnAlignments = $columnAlignments;
  2072. if ($modifier) $modifier->applyTo($table);
  2073. return $table;
  2074. }
  2075. }
  2076. /**
  2077. * Block reader for definition lists. Definitions go directly under terms starting
  2078. * with a colon.
  2079. */
  2080. class MDDefinitionListReader extends MDReader {
  2081. public function readBlock(MDState $state): ?MDBlockNode {
  2082. $p = $state->p;
  2083. $groups;
  2084. $termCount = 0;
  2085. $definitionCount = 0;
  2086. $defLines = [];
  2087. while ($state->hasLines(1, $p)) {
  2088. $line = $state->lines[$p++];
  2089. if (trim($line) === '') {
  2090. break;
  2091. }
  2092. if (mb_eregi('^\\s+', $line)) {
  2093. if (sizeof($defLines) == 0) return null;
  2094. $defLines[sizeof($defLines) - 1] .= "\n" . $line;
  2095. } elseif (mb_eregi('^:\\s+', $line)) {
  2096. array_push($defLines, $line);
  2097. $definitionCount++;
  2098. } else {
  2099. array_push($defLines, $line);
  2100. $termCount++;
  2101. }
  2102. }
  2103. if ($termCount == 0 || $definitionCount == 0) return null;
  2104. $blocks = array_map(function($line) use ($state) {
  2105. if (mb_eregi('^:\\s+(.*?)$', $line, $groups)) {
  2106. return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
  2107. } else {
  2108. return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
  2109. }
  2110. }, $defLines);
  2111. $state->p = $p;
  2112. return new MDDefinitionListNode($blocks);
  2113. }
  2114. }
  2115. /**
  2116. * Block reader for defining footnote contents. Footnotes can be defined anywhere
  2117. * in the document but will always be rendered at the end of a page or end of
  2118. * the document.
  2119. */
  2120. class MDFootnoteReader extends MDReader {
  2121. private const footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]'; // 1=symbol, 2=title
  2122. private const footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]'; // 1=symbol
  2123. /**
  2124. * @param MDState $state
  2125. * @param string $symbol
  2126. * @param MDNode[] $footnote
  2127. */
  2128. private function defineFootnote(MDState $state, string $symbol, array $footnote) {
  2129. $footnotes = $state->root()->userInfo['footnotes'] ?? [];
  2130. $footnotes[$symbol] = $footnote;
  2131. $state->root()->userInfo['footnotes'] = $footnotes;
  2132. }
  2133. private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
  2134. $footnoteInstances = $state->root()->userInfo['footnoteInstances'];
  2135. $instances = $footnoteInstances[$symbol] ?? [];
  2136. array_push($instances, $unique);
  2137. $footnoteInstances[$symbol] = $instances;
  2138. $state->root()->userInfo['footnoteInstances'] = $footnoteInstances;
  2139. }
  2140. private function idForFootnoteSymbol(MDState $state, string $symbol): int {
  2141. $footnoteIds = $state->root()->userInfo['footnoteIds'] ?? [];
  2142. $existing = $footnoteIds[$symbol] ?? null;
  2143. if ($existing !== null) return $existing;
  2144. $nextFootnoteId = $state->root()->userInfo['nextFootnoteId'] ?? 1;
  2145. $id = $nextFootnoteId++;
  2146. $footnoteIds[$symbol] = $id;
  2147. $state->root()->userInfo['nextFootnoteId'] = $nextFootnoteId;
  2148. $state->root()->userInfo['footnoteIds'] = $footnoteIds;
  2149. return $id;
  2150. }
  2151. public function preProcess(MDState $state) {
  2152. $state->root()->userInfo['footnoteInstances'] = [];
  2153. $state->root()->userInfo['footnotes'] = [];
  2154. $state->root()->userInfo['footnoteIds'] = [];
  2155. $state->root()->userInfo['nextFootnoteId'] = 1;
  2156. }
  2157. public function readBlock(MDState $state): ?MDBlockNode {
  2158. $p = $state->p;
  2159. if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
  2160. $symbol = $groups[1];
  2161. $def = $groups[2];
  2162. while ($state->hasLines(1, $p)) {
  2163. $line = $state->lines[$p++];
  2164. if (mb_eregi('^\\s+', $line)) {
  2165. $def .= "\n" . $line;
  2166. } else {
  2167. $p--;
  2168. break;
  2169. }
  2170. }
  2171. $content = $state->inlineMarkdownToNodes($def);
  2172. $this->defineFootnote($state, $symbol, $content);
  2173. $state->p = $p;
  2174. return new MDBlockNode(); // empty
  2175. }
  2176. public function readToken(MDState $state, string $line): ?MDToken {
  2177. $groups;
  2178. if (mb_eregi(self::footnoteWithTitleRegex, $line, $groups)) {
  2179. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
  2180. }
  2181. if (mb_eregi(self::footnoteRegex, $line, $groups)) {
  2182. return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
  2183. }
  2184. return null;
  2185. }
  2186. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2187. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
  2188. $symbol = $match->tokens[0]->content;
  2189. array_splice($tokens, $match->index, 1, [new MDFootnoteNode($symbol)]);
  2190. return true;
  2191. }
  2192. return false;
  2193. }
  2194. /**
  2195. * @param MDState $state
  2196. * @param MDBlockNode[] $blocks
  2197. */
  2198. public function postProcess(MDState $state, array &$blocks) {
  2199. $nextOccurrenceId = 1;
  2200. foreach ($blocks as $block) {
  2201. $block->visitChildren(function($node) use (&$nextOccurrenceId, $state) {
  2202. if (!($node instanceof MDFootnoteNode)) return;
  2203. $node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
  2204. $node->occurrenceId = $nextOccurrenceId++;
  2205. $node->displaySymbol = strval($node->footnoteId);
  2206. $this->registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
  2207. });
  2208. }
  2209. if (sizeof($state->userInfo['footnotes']) == 0) return;
  2210. array_push($blocks, new MDFootnoteListNode());
  2211. }
  2212. public function compareBlockOrdering(MDReader $other): int {
  2213. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2214. return -1;
  2215. }
  2216. return 0;
  2217. }
  2218. public function compareTokenizeOrdering(MDReader $other): int {
  2219. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2220. return -1;
  2221. }
  2222. return 0;
  2223. }
  2224. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2225. if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
  2226. return -1;
  2227. }
  2228. return 0;
  2229. }
  2230. }
  2231. /**
  2232. * Block reader for abbreviation definitions. Anywhere the abbreviation appears
  2233. * in plain text will have its definition available when hovering over it.
  2234. * Definitions can appear anywhere in the document. Their content should only
  2235. * contain simple text, not markdown.
  2236. */
  2237. class MDAbbreviationReader extends MDReader {
  2238. private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
  2239. $abbrevs = $state->root()->userInfo['abbreviations'];
  2240. $abbrevs[$abbreviation] = $definition;
  2241. $state->root()->userInfo['abbreviations'] = $abbrevs;
  2242. }
  2243. public function preProcess(MDState $state) {
  2244. $state->root()->userInfo['abbreviations'] = [];
  2245. }
  2246. public function readBlock(MDState $state): ?MDBlockNode {
  2247. $p = $state->p;
  2248. $line = $state->lines[$p++];
  2249. if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
  2250. $abbrev = $groups[1];
  2251. $def = $groups[2];
  2252. $this->defineAbbreviation($state, $abbrev, $def);
  2253. $state->p = $p;
  2254. return new MDBlockNode(); // empty
  2255. }
  2256. /**
  2257. * @param MDState $state
  2258. * @param MDNode[] $blocks
  2259. */
  2260. public function postProcess(MDState $state, array &$blocks) {
  2261. $abbreviations = $state->root()->userInfo['abbreviations'];
  2262. MDNode::replaceNodes($state, $blocks, function($original) use ($abbreviations) {
  2263. if (!($original instanceof MDTextNode)) return null;
  2264. $changed = false;
  2265. $elems = [ $original->text ]; // mix of strings and MDNodes
  2266. for ($i = 0; $i < sizeof($elems); $i++) {
  2267. $text = $elems[$i];
  2268. if (!is_string($text)) continue;
  2269. foreach ($abbreviations as $abbreviation => $definition) {
  2270. $index = strpos($text, $abbreviation);
  2271. if ($index === false) continue;
  2272. $prefix = substr($text, 0, $index);
  2273. $suffix = substr($text, $index + strlen($abbreviation));
  2274. array_splice($elems, $i, 1, [$prefix,
  2275. new MDAbbreviationNode($abbreviation, $definition),
  2276. $suffix]);
  2277. $i = -1; // start over
  2278. $changed = true;
  2279. break;
  2280. }
  2281. }
  2282. if (!$changed) return null;
  2283. $nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem, $elems);
  2284. return new MDNode($nodes);
  2285. });
  2286. }
  2287. }
  2288. /**
  2289. * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
  2290. * whitespace-only) line. This reader is prioritized after every other reader
  2291. * since there is no distinguishing syntax.
  2292. */
  2293. class MDParagraphReader extends MDReader {
  2294. public function readBlock(MDState $state): ?MDBlockNode {
  2295. $paragraphLines = [];
  2296. $p = $state->p;
  2297. while ($state->hasLines(1, $p)) {
  2298. $line = $state->lines[$p++];
  2299. if (trim($line) === '') {
  2300. break;
  2301. }
  2302. array_push($paragraphLines, $line);
  2303. }
  2304. if ($state->p == 0 && $p >= sizeof($state->lines)) {
  2305. // If it's the entire document don't wrap it in a paragraph
  2306. return null;
  2307. }
  2308. if (sizeof($paragraphLines) > 0) {
  2309. $state->p = $p;
  2310. $content = implode("\n", $paragraphLines);
  2311. return new MDParagraphNode($state->inlineMarkdownToNodes($content));
  2312. }
  2313. return null;
  2314. }
  2315. public function compareBlockOrdering(MDReader $other): int {
  2316. return 1; // always dead last
  2317. }
  2318. }
  2319. /**
  2320. * Abstract base class for readers that look for one or two delimiting tokens
  2321. * on either side of some content. E.g. `**strong**`.
  2322. */
  2323. class MDSimplePairInlineReader extends MDReader {
  2324. // Passes:
  2325. // 1. Syntaxes with two delimiting tokens, interior tokens of the same
  2326. // kind must be even in number
  2327. // 2. Syntaxes with one delimiting token, interior tokens of the same
  2328. // kind must be even in number
  2329. // 3. Syntaxes with two delimiting tokens, any tokens inside
  2330. // 4. Syntaxes with one delimiting token, any tokens inside
  2331. public function substitutionPassCount(): int { return 4; }
  2332. /**
  2333. * Attempts a substitution of a matched pair of delimiting token types.
  2334. * If successful, the substitution is performed on `$tokens` and `true` is
  2335. * returned, otherwise `false` is returned and the array is untouched.
  2336. *
  2337. * If `this->substitutionPassCount()` is greater than 1, the first pass
  2338. * will reject matches with the delimiting character inside the content
  2339. * tokens. If the reader uses a single pass or a subsequent pass is performed
  2340. * with multiple pass any contents will be accepted.
  2341. *
  2342. * @param MDState $state
  2343. * @param int $pass pass number, starting with `1`
  2344. * @param (MDToken|MDNode)[] $tokens tokens/nodes to perform substitution on
  2345. * @param string $nodeClass class of the node to return if matched
  2346. * @param MDTokenType $delimiter delimiting token
  2347. * @param int $count how many times the token is repeated to form the delimiter
  2348. * @param bool $plaintext whether to create `$nodeClass` with a verbatim
  2349. * content string instead of parsed `MDNode`s
  2350. * @return bool `true` if substitution was performed, `false` if not
  2351. */
  2352. public function attemptPair(MDState $state, int $pass, array &$tokens,
  2353. string $nodeClass, MDTokenType $delimiter, int $count=1,
  2354. bool $plaintext=false): bool {
  2355. // We do four passes. #1: doubles without inner tokens, #2: singles
  2356. // without inner tokens, #3: doubles with paired inner tokens,
  2357. // #4: singles with paired inner tokens
  2358. if ($count == 1 && $pass != 2 && $pass != 4) return false;
  2359. if ($count > 1 && $pass != 1 && $pass != 3) return false;
  2360. $delimiters = array_fill(0, $count, $delimiter);
  2361. $isFirstOfMultiplePasses = $this->substitutionPassCount() > 1 && $pass == 1;
  2362. $match = MDToken::findPairedTokens($tokens, $delimiters, $delimiters,
  2363. function($content) use ($nodeClass, $isFirstOfMultiplePasses, $delimiter) {
  2364. $firstType = $content[0] instanceof MDToken ? $content[0]->type : null;
  2365. $lastType = $content[sizeof($content) - 1] instanceof MDToken ?
  2366. $content[sizeof($content) - 1]->type : null;
  2367. if ($firstType == MDTokenType::Whitespace) return false;
  2368. if ($lastType == MDTokenType::Whitespace) return false;
  2369. foreach ($content as $token) {
  2370. // Don't allow nesting
  2371. if (MDUtils::typename($token) == $nodeClass) return false;
  2372. }
  2373. if ($isFirstOfMultiplePasses) {
  2374. $innerCount = 0;
  2375. foreach ($content as $token) {
  2376. if ($token instanceof MDToken && $token->type == $delimiter) $innerCount++;
  2377. }
  2378. if (($innerCount % 2) != 0) return false;
  2379. }
  2380. return true;
  2381. });
  2382. if ($match === null) return false;
  2383. $state->checkExecutionTime();
  2384. if ($plaintext) {
  2385. $content = implode('', array_map(fn($token) => $token instanceof MDToken ?
  2386. $token->original : $token->toPlaintext($state), $match->contentTokens));
  2387. } else {
  2388. $content = $state->tokensToNodes($match->contentTokens);
  2389. }
  2390. $ref = new ReflectionClass($nodeClass);
  2391. $node = $ref->newInstanceArgs([ $content ]);
  2392. array_splice($tokens, $match->startIndex, $match->totalLength, [$node]);
  2393. return true;
  2394. }
  2395. private static $firstTime = null;
  2396. }
  2397. /**
  2398. * Reader for emphasis syntax. Denoted with a single underscore on either side of
  2399. * some text (preferred) or a single asterisk on either side.
  2400. */
  2401. class MDEmphasisReader extends MDSimplePairInlineReader {
  2402. public function readToken(MDState $state, string $line): ?MDToken {
  2403. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2404. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2405. return null;
  2406. }
  2407. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2408. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Underscore)) return true;
  2409. if ($this->attemptPair($state, $pass, $tokens, 'MDEmphasisNode', MDTokenType::Asterisk)) return true;
  2410. return false;
  2411. }
  2412. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2413. if ($other instanceof MDStrongReader) {
  2414. return 1;
  2415. }
  2416. return 0;
  2417. }
  2418. }
  2419. /**
  2420. * Reader for strong syntax. Denoted with two asterisks on either side of some
  2421. * text (preferred) or two underscores on either side. Note that if
  2422. * `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
  2423. */
  2424. class MDStrongReader extends MDSimplePairInlineReader {
  2425. public function readToken(MDState $state, string $line): ?MDToken {
  2426. if (str_starts_with($line, '*')) return new MDToken('*', MDTokenType::Asterisk);
  2427. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2428. return null;
  2429. }
  2430. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2431. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Asterisk, 2)) return true;
  2432. if ($this->attemptPair($state, $pass, $tokens, 'MDStrongNode', MDTokenType::Underscore, 2)) return true;
  2433. return false;
  2434. }
  2435. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2436. if ($other instanceof MDEmphasisReader) {
  2437. return -1;
  2438. }
  2439. return 0;
  2440. }
  2441. }
  2442. /**
  2443. * Reader for strikethrough syntax. Consists of two tildes on either side of
  2444. * some text (preferred) or single tildes on either side. Note that if
  2445. * `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
  2446. *
  2447. * The number of recognized tildes can be configured.
  2448. */
  2449. class MDStrikethroughReader extends MDSimplePairInlineReader {
  2450. public bool $singleTildeEnabled = true;
  2451. public bool $doubleTildeEnabled = true;
  2452. public function readToken(MDState $state, string $line): ?MDToken {
  2453. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2454. return null;
  2455. }
  2456. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2457. if ($this->singleTildeEnabled) {
  2458. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde, 2)) return true;
  2459. }
  2460. if ($this->doubleTildeEnabled) {
  2461. if ($this->attemptPair($state, $pass, $tokens, 'MDStrikethroughNode', MDTokenType::Tilde)) return true;
  2462. }
  2463. return false;
  2464. }
  2465. }
  2466. /**
  2467. * Reader for underline syntax. Consists of two underscores on either side of
  2468. * some text. If used with `MDStrongReader` which also looks for double
  2469. * underscores, this reader will take priority.
  2470. */
  2471. class MDUnderlineReader extends MDSimplePairInlineReader {
  2472. public function readToken(MDState $state, string $line): ?MDToken {
  2473. if (str_starts_with($line, '_')) return new MDToken('_', MDTokenType::Underscore);
  2474. return null;
  2475. }
  2476. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2477. return $this->attemptPair($state, $pass, $tokens, 'MDUnderlineNode', MDTokenType::Underscore, 2);
  2478. }
  2479. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2480. if ($other instanceof MDStrongReader) {
  2481. return -1;
  2482. }
  2483. return 0;
  2484. }
  2485. }
  2486. /**
  2487. * Reader for highlight syntax. Consists of pairs of equal signs on either side
  2488. * of some text.
  2489. */
  2490. class MDHighlightReader extends MDSimplePairInlineReader {
  2491. public function readToken(MDState $state, string $line): ?MDToken {
  2492. if (str_starts_with($line, '=')) return new MDToken('=', MDTokenType::Equal);
  2493. return null;
  2494. }
  2495. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2496. return $this->attemptPair($state, $pass, $tokens, 'MDHighlightNode', MDTokenType::Equal, 2);
  2497. }
  2498. }
  2499. /**
  2500. * Reader for inline code syntax. Consists of one or two delimiting backticks
  2501. * around text. The contents between the backticks will be rendered verbatim,
  2502. * ignoring any inner markdown syntax. To include a backtick inside, escape it
  2503. * with a backslash.
  2504. */
  2505. class MDCodeSpanReader extends MDSimplePairInlineReader {
  2506. public function readToken(MDState $state, string $line): ?MDToken {
  2507. if (str_starts_with($line, '`')) return new MDToken('`', MDTokenType::Backtick);
  2508. return null;
  2509. }
  2510. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2511. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 2, true)) return true;
  2512. if ($this->attemptPair($state, $pass, $tokens, 'MDCodeNode', MDTokenType::Backtick, 1, true)) return true;
  2513. return false;
  2514. }
  2515. }
  2516. /**
  2517. * Reader for subscript syntax. Consists of single tildes on either side of
  2518. * some text. If used with `MDStrikethroughReader`, this reader will take
  2519. * precedence, and strikethrough can only be done with double tildes.
  2520. */
  2521. class MDSubscriptReader extends MDSimplePairInlineReader {
  2522. public function readToken(MDState $state, string $line): ?MDToken {
  2523. if (str_starts_with($line, '~')) return new MDToken('~', MDTokenType::Tilde);
  2524. return null;
  2525. }
  2526. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2527. return $this->attemptPair($state, $pass, $tokens, 'MDSubscriptNode', MDTokenType::Tilde);
  2528. }
  2529. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2530. if ($other instanceof MDStrikethroughReader) {
  2531. return -1;
  2532. }
  2533. return 0;
  2534. }
  2535. }
  2536. /**
  2537. * Reader for superscript syntax. Consists of single caret characters on either
  2538. * side of some text.
  2539. */
  2540. class MDSuperscriptReader extends MDSimplePairInlineReader {
  2541. public function readToken(MDState $state, string $line): ?MDToken {
  2542. if (str_starts_with($line, '^')) return new MDToken('^', MDTokenType::Caret);
  2543. return null;
  2544. }
  2545. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2546. return $this->attemptPair($state, $pass, $tokens, 'MDSuperscriptNode', MDTokenType::Caret);
  2547. }
  2548. }
  2549. /**
  2550. * Reads a hypertext link. Consists of link text between square brackets
  2551. * followed immediately by a URL in parentheses.
  2552. */
  2553. class MDLinkReader extends MDReader {
  2554. public function readToken(MDState $state, string $line): ?MDToken {
  2555. $simpleEmailRegex = "^<(" . MDUtils::baseEmailRegex . ")>";
  2556. $simpleURLRegex = "^<(" . MDUtils::baseURLRegex . ")>";
  2557. if ($groups = MDToken::tokenizeLabel($line)) {
  2558. return new MDToken($groups[0], MDTokenType::Label, $groups[1]);
  2559. }
  2560. if ($groups = MDToken::tokenizeEmail($line)) {
  2561. return new MDToken($groups[0], MDTokenType::Email, $groups[1], $groups[2]);
  2562. }
  2563. if ($groups = MDToken::tokenizeURL($line)) {
  2564. return new MDToken($groups[0], MDTokenType::URL, $groups[1], $groups[2]);
  2565. }
  2566. if (mb_eregi($simpleEmailRegex, $line, $groups)) {
  2567. return new MDToken($groups[0], MDTokenType::SimpleEmail, $groups[1]);
  2568. }
  2569. if (mb_eregi($simpleURLRegex, $line, $groups)) {
  2570. return new MDToken($groups[0], MDTokenType::SimpleLink, $groups[1]);
  2571. }
  2572. return null;
  2573. }
  2574. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2575. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
  2576. MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2577. $text = $match->tokens[0]->content;
  2578. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2579. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2580. array_splice($tokens, $match->index, sizeof($match->tokens),
  2581. [new MDLinkNode($url, $state->inlineMarkdownToNode($text), $title)]);
  2582. return true;
  2583. }
  2584. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
  2585. MDTokenType::META_OptionalWhitespace, MDTokenType::Email ])) {
  2586. $text = $match->tokens[0]->content;
  2587. $email = $match->tokens[sizeof($match->tokens) - 1]->content;
  2588. $url = "mailto:{$email}";
  2589. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2590. array_splice($tokens, $match->index, sizeof($match->tokens),
  2591. [new MDLinkNode($url, $state->inlineMarkdownToNodes($text), $title)]);
  2592. return true;
  2593. }
  2594. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleEmail ])) {
  2595. $token = $match->tokens[0];
  2596. $link = "mailto:{$token->content}";
  2597. $node = new MDLinkNode($link, new MDObfuscatedTextNode($token->content));
  2598. array_splice($tokens, $match->index, 1, [$node]);
  2599. return true;
  2600. }
  2601. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::SimpleLink ])) {
  2602. $token = $match->tokens[0];
  2603. $link = $token->content;
  2604. $node = new MDLinkNode($link, new MDTextNode($link));
  2605. array_splice($tokens, $match->index, 1, [$node]);
  2606. return true;
  2607. }
  2608. return false;
  2609. }
  2610. }
  2611. /**
  2612. * Reader for referential URL definitions. Consists of link text between square
  2613. * brackets followed immediately by a reference symbol also in square brackets.
  2614. * The URL can be defined elsewhere on a line by itself with the symbol in square
  2615. * brackets, colon, and the URL (and optional title in quotes).
  2616. */
  2617. class MDReferencedLinkReader extends MDLinkReader {
  2618. public function readBlock(MDState $state): ?MDBlockNode {
  2619. $p = $state->p;
  2620. $line = $state->lines[$p++];
  2621. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s+"(.*?)"\\s*$', $line, $groups)) {
  2622. $symbol = $groups[1];
  2623. $url = $groups[2];
  2624. $title = $groups[3];
  2625. } else {
  2626. if (mb_eregi('^\\s*\\[(.+?)]:\\s*(\\S+)\\s*$', $line, $groups)) {
  2627. $symbol = $groups[1];
  2628. $url = $groups[2];
  2629. $title = null;
  2630. } else {
  2631. return null;
  2632. }
  2633. }
  2634. $state->defineURL($symbol, $url, $title);
  2635. $state->p = $p;
  2636. return new MDBlockNode([]); // empty
  2637. }
  2638. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2639. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Label,
  2640. MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2641. $text = $match->tokens[0]->content;
  2642. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2643. array_splice($tokens, $match->index, sizeof($match->tokens),
  2644. [new MDReferencedLinkNode($ref, $state->inlineMarkdownToNodes($text))]);
  2645. return true;
  2646. }
  2647. return false;
  2648. }
  2649. }
  2650. /**
  2651. * Reader for images. Consists of an exclamation, alt text in square brackets,
  2652. * and image URL in parentheses.
  2653. */
  2654. class MDImageReader extends MDLinkReader {
  2655. public function readToken(MDState $state, string $line): ?MDToken {
  2656. $s = parent::readToken($state, $line);
  2657. if ($s) return $s;
  2658. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2659. return null;
  2660. }
  2661. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2662. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang,
  2663. MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::URL ])) {
  2664. $alt = $match->tokens[1]->content;
  2665. $url = $match->tokens[sizeof($match->tokens) - 1]->content;
  2666. $title = $match->tokens[sizeof($match->tokens) - 1]->extra;
  2667. $node = new MDImageNode($url, $alt);
  2668. if ($title !== null) {
  2669. $node->attributes['title'] = $title;
  2670. }
  2671. array_splice($tokens, $match->index, sizeof($match->tokens), [$node]);
  2672. return true;
  2673. }
  2674. return false;
  2675. }
  2676. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2677. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2678. return -1;
  2679. }
  2680. return 0;
  2681. }
  2682. }
  2683. /**
  2684. * Reader for images with referential URL definitions. Consists of an
  2685. * exclamation, alt text in square brackets, and link symbol in square brackets.
  2686. * URL is defined the same as for `MDReferencedLinkReader`.
  2687. */
  2688. class MDReferencedImageReader extends MDReferencedLinkReader {
  2689. public function readToken(MDState $state, string $line): ?MDToken {
  2690. $s = parent::readToken($state, $line);
  2691. if ($s) return $s;
  2692. if (str_starts_with($line, '!')) return new MDToken('!', MDTokenType::Bang);
  2693. return null;
  2694. }
  2695. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2696. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Bang,
  2697. MDTokenType::Label, MDTokenType::META_OptionalWhitespace, MDTokenType::Label ])) {
  2698. $alt = $match->tokens[1]->content;
  2699. $ref = $match->tokens[sizeof($match->tokens) - 1]->content;
  2700. array_splice($tokens, $match->index, sizeof($match->tokens),
  2701. [new MDReferencedImageNode($ref, $alt)]);
  2702. return true;
  2703. }
  2704. return false;
  2705. }
  2706. public function compareSubstituteOrdering(MDReader $other, int $pass): int {
  2707. if (get_class($other) === 'MDLinkReader' || get_class($other) === 'MDReferencedLinkReader') {
  2708. return -1;
  2709. }
  2710. return 0;
  2711. }
  2712. }
  2713. /**
  2714. * Converts line breaks within blocks into line breaks in the HTML. Not
  2715. * included in any of the default reader sets since most flavors ignore
  2716. * line breaks within blocks.
  2717. */
  2718. class MDLineBreakReader extends MDReader {
  2719. public function postProcess(MDState $state, array &$blocks) {
  2720. MDNode::replaceNodes($state, $blocks, function(MDNode $original) {
  2721. if (!($original instanceof MDTextNode)) return null;
  2722. $lines = explode("\n", $original->text);
  2723. if (sizeof($lines) == 1) return null;
  2724. $nodes = [];
  2725. foreach ($lines as $i => $line) {
  2726. if ($i > 0) {
  2727. array_push($nodes, new MDLineBreakNode());
  2728. }
  2729. array_push($nodes, new MDTextNode($line));
  2730. }
  2731. return new MDNode($nodes);
  2732. });
  2733. }
  2734. }
  2735. /**
  2736. * Reads a verbatim HTML tag, and if it passes validation by `MDState->$tagFilter`,
  2737. * will be rendered in the final HTML document. Disallowed tags will be rendered
  2738. * as plain text in the resulting document.
  2739. */
  2740. class MDHTMLTagReader extends MDReader {
  2741. public function readToken(MDState $state, string $line): ?MDToken {
  2742. $tag = MDHTMLTag::fromLineStart($line, $state);
  2743. if ($tag === null) return null;
  2744. if (!$state->root()->tagFilter->isValidTagName($tag->tagName)) return null;
  2745. $state->root()->tagFilter->scrubTag($tag);
  2746. return new MDToken($tag->original, MDTokenType::HTMLTag, $tag);
  2747. }
  2748. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2749. if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::HTMLTag ])) {
  2750. $tag = $match->tokens[0]->tag;
  2751. array_splice($tokens, $match->index, 1, [new MDHTMLTagNode($tag)]);
  2752. return true;
  2753. }
  2754. return false;
  2755. }
  2756. }
  2757. /**
  2758. * Reads tag modifiers. Consists of curly braces with one or more CSS classes,
  2759. * IDs, or custom attributes separated by spaces to apply to the preceding
  2760. * node. Validation is performed on modifiers and only acceptable values are
  2761. * applied.
  2762. */
  2763. class MDModifierReader extends MDReader {
  2764. public function readToken(MDState $state, string $line): ?MDToken {
  2765. $modifier = MDTagModifier::fromStart($line);
  2766. if ($modifier) return new MDToken($modifier->original, MDTokenType::Modifier, $modifier);
  2767. return null;
  2768. }
  2769. public function substituteTokens(MDState $state, int $pass, array &$tokens): bool {
  2770. // Modifiers are applied elsewhere, and if they're not it's fine if they're
  2771. // rendered as the original syntax.
  2772. return false;
  2773. }
  2774. }
  2775. // -- Nodes -----------------------------------------------------------------
  2776. /**
  2777. * Base class for nodes in the assembled document tree.
  2778. */
  2779. class MDNode {
  2780. /**
  2781. * Array of CSS classes to add to the node when rendered as HTML.
  2782. * @var string[]
  2783. */
  2784. public array $cssClasses = [];
  2785. public ?string $cssId = null;
  2786. /**
  2787. * Mapping of CSS attributes to values.
  2788. * @var string[]
  2789. */
  2790. public array $cssStyles = [];
  2791. /**
  2792. * Mapping of arbitrary attributes and values to add to this node's top-level
  2793. * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
  2794. * `$cssClasses`, `$cssId`, and `$cssStyles` instead.
  2795. * @var array
  2796. */
  2797. public array $attributes = [];
  2798. /**
  2799. * All child nodes in this node.
  2800. * @var MDNode[]
  2801. */
  2802. public array $children = [];
  2803. /**
  2804. * @param MDNode|MDNode[] $children
  2805. */
  2806. public function __construct(MDNode|array $children=[]) {
  2807. if (is_array($children)) {
  2808. foreach ($children as $elem) {
  2809. if (!($elem instanceof MDNode)) {
  2810. $thisClassName = MDUtils::typename($this);
  2811. $elemClassName = MDUtils::typename($elem);
  2812. throw new Error("{$thisClassName} expects children of type " .
  2813. "MDNode[] or MDNode, got array with {$elemClassName} element");
  2814. }
  2815. }
  2816. $this->children = $children;
  2817. } elseif ($children instanceof MDNode) {
  2818. $this->children = [ $children ];
  2819. } else {
  2820. $thisClassName = MDUtils::typename($this);
  2821. $elemClassName = MDUtils::typename($children);
  2822. throw new Error("{$thisClassName} expects children of type MDNode[] " .
  2823. "or MDNode, got {$elemClassName}");
  2824. }
  2825. }
  2826. public function __toString(): string {
  2827. $s = "<" . get_class($this);
  2828. foreach ($this->children as $child) {
  2829. $s .= " {$child}";
  2830. }
  2831. $s .= ">";
  2832. return $s;
  2833. }
  2834. /**
  2835. * Adds a CSS class. If already present it will not be duplicated.
  2836. */
  2837. public function addClass(string $cssClass): bool {
  2838. if (array_search($cssClass, $this->cssClasses) !== false) return false;
  2839. array_push($this->cssClasses, $cssClass);
  2840. return true;
  2841. }
  2842. /**
  2843. * Removes a CSS class.
  2844. *
  2845. * @param string $cssClass
  2846. * @return bool whether the class was present and removed
  2847. */
  2848. public function removeClass(string $cssClass): bool {
  2849. $beforeLength = sizeof($this->cssClasses);
  2850. $this->cssClasses = array_diff($this->cssClasses, [ $cssClass ]);
  2851. return sizeof($this->cssClasses) != $beforeLength;
  2852. }
  2853. /**
  2854. * Renders this node and any children as an HTML string. If the node has no
  2855. * content an empty string should be returned.
  2856. */
  2857. public function toHTML(MDState $state): string {
  2858. return MDNode::arrayToHTML($this->children, $state);
  2859. }
  2860. /**
  2861. * Renders this node and any children as a plain text string. The conversion
  2862. * should only render ordinary text, not attempt markdown-like formatting
  2863. * (e.g. list items should not be prefixed with asterisks, only have their
  2864. * content text returned). If the node has no renderable content an empty
  2865. * string should be returned.
  2866. */
  2867. public function toPlaintext(MDState $state): string {
  2868. return MDNode::arrayToPlaintext($this->children, $state);
  2869. }
  2870. /**
  2871. * Protected helper method that renders an HTML fragment of the attributes
  2872. * to apply to the root HTML tag representation of this node.
  2873. *
  2874. * Example result with a couple `$cssClasses`, a `$cssId`, and a custom
  2875. * `$attributes` key-value pair:
  2876. *
  2877. * ```
  2878. * class="foo bar" id="baz" lang="en"
  2879. * ```
  2880. *
  2881. * The value includes a leading space if it's non-empty so that it can be
  2882. * concatenated directly after the tag name and before the closing `>`.
  2883. */
  2884. protected function htmlAttributes(): string {
  2885. $html = '';
  2886. if (sizeof($this->cssClasses) > 0) {
  2887. $classlist = MDUtils::escapeHTML(implode(' ', $this->cssClasses));
  2888. $html .= " class=\"{$classlist}\"";
  2889. }
  2890. if ($this->cssId !== null && mb_strlen($this->cssId) > 0) {
  2891. $html .= " id=\"" . MDUtils::escapeHTML($this->cssId) . "\"";
  2892. }
  2893. $styles = [];
  2894. foreach ($this->cssStyles as $key => $value) {
  2895. array_push($styles, "{$key}: {$value};");
  2896. }
  2897. if (sizeof($styles) > 0) {
  2898. $escaped = MDUtils::escapeHTML(implode(' ', $styles));
  2899. $html .= " style=\"{$escaped}\"";
  2900. }
  2901. foreach ($this->attributes as $key => $value) {
  2902. if ($key === 'class' || $key === 'id' || $key === 'style') continue;
  2903. $cleanKey = MDUtils::scrubAttributeName($key);
  2904. if (mb_strlen($cleanKey) == 0) continue;
  2905. $cleanValue = MDUtils::escapeHTML($value);
  2906. $html .= " {$cleanKey}=\"{$cleanValue}\"";
  2907. }
  2908. return $html;
  2909. }
  2910. /**
  2911. * Protected helper that renders and concatenates the HTML of all children
  2912. * of this node. Mostly for use by subclasses in their `toHTML`
  2913. * implementations.
  2914. */
  2915. protected function childHTML(MDState $state): string {
  2916. return MDNode::arrayToHTML($this->children, $state);
  2917. }
  2918. /**
  2919. * Protected helper that renders and concatenates the plaintext of all
  2920. * children of this node.
  2921. */
  2922. protected function childPlaintext(MDState $state): string {
  2923. return MDNode::arrayToPlaintext($this->children, $state);
  2924. }
  2925. /**
  2926. * Protected helper for rendering nodes represented by simple paired HTML
  2927. * tags. Custom CSS classes and attributes will be included in the result,
  2928. * and child content will be rendered between the tags.
  2929. */
  2930. protected function simplePairedTagHTML(MDState $state, string $tagName): string {
  2931. $openTagSuffix = ($this->children[0] ?? null) instanceof MDBlockNode ? "\n" : "";
  2932. $closeTagPrefix = ($this->children[sizeof($this->children) - 1] ?? null) instanceof MDBlockNode ? "\n" : '';
  2933. $closeTagSuffix = $this instanceof MDBlockNode ? "\n" : '';
  2934. $attr = $this->htmlAttributes();
  2935. $childHTML = $this->childHTML($state);
  2936. return "<{$tagName}{$attr}>{$openTagSuffix}{$childHTML}{$closeTagPrefix}</{$tagName}>{$closeTagSuffix}";
  2937. }
  2938. /**
  2939. * Calls the given callback function with every child node, recursively.
  2940. * Nodes are visited depth-first.
  2941. */
  2942. public function visitChildren(callable $fn) {
  2943. foreach ($this->children as $child) {
  2944. $fn($child);
  2945. $child->visitChildren($fn);
  2946. }
  2947. }
  2948. /**
  2949. * Helper for rendering and concatenating HTML from an array of `MDNode`s.
  2950. *
  2951. * @param MDNode[] $nodes
  2952. * @param MDState $state
  2953. * @return string HTML string
  2954. */
  2955. public static function arrayToHTML(array $nodes, MDState $state): string {
  2956. return implode('', array_map(fn($node) => $node->toHTML($state) . ($node instanceof MDBlockNode ? "\n" : ''), $nodes));
  2957. }
  2958. /**
  2959. * Helper for rendering and concatenating plaintext from an array of `MDNode`s.
  2960. *
  2961. * @param MDNode[] $nodes
  2962. * @param MDState $state
  2963. * @return string plaintext
  2964. */
  2965. public static function arrayToPlaintext(array $nodes, MDState $state): string {
  2966. return implode('', array_map(fn($node) => $node->toPlaintext($state), $nodes));
  2967. }
  2968. /**
  2969. * Recursively searches and replaces nodes in a tree. The given `$replacer`
  2970. * is passed every node in the tree. If `$replacer` returns a new `MDNode`
  2971. * the original will be replaced with it. If the function returns `null` no
  2972. * change will be made to that node. Traversal is depth-first.
  2973. *
  2974. * @param MDState $state
  2975. * @param MDNode[] $nodes
  2976. * @param callable $replacer takes a node as an argument, returns either
  2977. * a new node or `null` to leave it unchanged
  2978. */
  2979. public static function replaceNodes(MDState $state, array &$nodes, callable $replacer) {
  2980. for ($i = 0; $i < sizeof($nodes); $i++) {
  2981. $originalNode = $nodes[$i];
  2982. $replacement = $replacer($originalNode);
  2983. if ($replacement instanceof MDNode) {
  2984. array_splice($nodes, $i, 1, [$replacement]);
  2985. } else {
  2986. self::replaceNodes($state, $originalNode->children, $replacer);
  2987. }
  2988. }
  2989. }
  2990. }
  2991. /**
  2992. * Marker subclass that indicates a node represents block syntax.
  2993. */
  2994. class MDBlockNode extends MDNode {}
  2995. /**
  2996. * Paragraph block.
  2997. */
  2998. class MDParagraphNode extends MDBlockNode {
  2999. public function toHTML(MDState $state): string {
  3000. return $this->simplePairedTagHTML($state, 'p');
  3001. }
  3002. }
  3003. /**
  3004. * A heading block with a level from 1 to 6.
  3005. */
  3006. class MDHeadingNode extends MDBlockNode {
  3007. public int $level;
  3008. /**
  3009. * @param int $level
  3010. * @param MDNode|MDNode[] $children
  3011. */
  3012. public function __construct(int $level, MDNode|array $children) {
  3013. parent::__construct($children);
  3014. if (!is_int($level) || ($level < 1 || $level > 6)) {
  3015. $thisClassName = MDUtils::typename($this);
  3016. throw new Error("{$thisClassName} requires heading level 1 to 6");
  3017. }
  3018. $this->level = $level;
  3019. }
  3020. public function toHTML(MDState $state): string {
  3021. return $this->simplePairedTagHTML($state, "h{$this->level}");
  3022. }
  3023. }
  3024. /**
  3025. * A sub-text block with smaller, less prominent text.
  3026. */
  3027. class MDSubtextNode extends MDBlockNode {
  3028. public function toHTML(MDState $state): string {
  3029. $this->addClass('subtext');
  3030. return $this->simplePairedTagHTML($state, 'div');
  3031. }
  3032. }
  3033. /**
  3034. * Node for a horizontal dividing line.
  3035. */
  3036. class MDHorizontalRuleNode extends MDBlockNode {
  3037. public function toHTML(MDState $state): string {
  3038. return "<hr" . $this->htmlAttributes() . ">";
  3039. }
  3040. }
  3041. /**
  3042. * A block quote, usually rendered indented from other text.
  3043. */
  3044. class MDBlockquoteNode extends MDBlockNode {
  3045. public function toHTML(MDState $state): string {
  3046. return $this->simplePairedTagHTML($state, 'blockquote');
  3047. }
  3048. }
  3049. /**
  3050. * A bulleted list. Contains `MDListItemNode` children.
  3051. */
  3052. class MDUnorderedListNode extends MDBlockNode {
  3053. /** @var MDListItemNode[] $children */
  3054. public function toHTML(MDState $state): string {
  3055. return $this->simplePairedTagHTML($state, 'ul');
  3056. }
  3057. }
  3058. /**
  3059. * A numbered list. Contains `MDListItemNode` children.
  3060. */
  3061. class MDOrderedListNode extends MDBlockNode {
  3062. /** @var MDListItemNode[] $children */
  3063. public ?int $startOrdinal;
  3064. /**
  3065. * @param MDListItemNode[] $children
  3066. * @param ?int $startOrdinal
  3067. */
  3068. public function __construct(array $children, ?int $startOrdinal=null) {
  3069. parent::__construct($children);
  3070. $this->startOrdinal = $startOrdinal;
  3071. }
  3072. public function toHTML(MDState $state): string {
  3073. if ($this->startOrdinal !== null && $this->startOrdinal != 1) {
  3074. $this->attributes['start'] = strval($this->startOrdinal);
  3075. }
  3076. return $this->simplePairedTagHTML($state, 'ol');
  3077. }
  3078. }
  3079. /**
  3080. * An item in a bulleted or numbered list.
  3081. */
  3082. class MDListItemNode extends MDBlockNode {
  3083. public ?int $ordinal;
  3084. /**
  3085. * @param MDNode|MDNode[] $children
  3086. * @param ?int $ordinal
  3087. */
  3088. public function __construct(MDNode|array $children, ?int $ordinal=null) {
  3089. parent::__construct($children);
  3090. $this->ordinal = $ordinal;
  3091. }
  3092. public function toHTML(MDState $state): string {
  3093. return $this->simplePairedTagHTML($state, 'li');
  3094. }
  3095. }
  3096. /**
  3097. * A block of preformatted computer code. Inner markdown is ignored.
  3098. */
  3099. class MDCodeBlockNode extends MDBlockNode {
  3100. public string $text;
  3101. /**
  3102. * The programming language of the content.
  3103. */
  3104. public ?string $language;
  3105. public function __construct(string $text, ?string $language=null) {
  3106. parent::__construct([]);
  3107. $this->text = $text;
  3108. $this->language = $language;
  3109. }
  3110. public function toHTML(MDState $state): string {
  3111. $languageModifier = ($this->language !== null) ? " class=\"language-{$this->language}\"" : '';
  3112. return "<pre" . $this->htmlAttributes() . "><code{$languageModifier}>" .
  3113. MDUtils::escapeHTML($this->text) . "</code></pre>\n";
  3114. }
  3115. }
  3116. /**
  3117. * A table node with a single header row and any number of body rows.
  3118. */
  3119. class MDTableNode extends MDBlockNode {
  3120. /** @var MDTableRowNode[] $children */
  3121. public function headerRow(): ?MDTableRowNode { return $this->children[0] ?? null; }
  3122. public function bodyRows(): array { return array_slice($this->children, 1); }
  3123. /**
  3124. * How to align each column. Columns beyond the length of the array or with
  3125. * corresponding `null` elements will have no alignment set. Values should
  3126. * be valid CSS `text-align` values.
  3127. *
  3128. * @var string[]
  3129. */
  3130. public array $columnAlignments = [];
  3131. /**
  3132. * @param MDTableRowNode $headerRow
  3133. * @param MDTableRowNode[] $bodyRows
  3134. */
  3135. public function __construct(MDTableRowNode $headerRow, array $bodyRows) {
  3136. parent::__construct(array_merge([ $headerRow ], $bodyRows));
  3137. }
  3138. /**
  3139. * Returns a given body cell.
  3140. *
  3141. * @param {number} column
  3142. * @param {number} row
  3143. * @returns {MDTableCellNode|null} cell or `null` if out of bounds
  3144. */
  3145. public function bodyCellAt(int $column, int $row): ?MDTableCellNode {
  3146. $rowNode = $this->bodyRows()[$row] ?? null;
  3147. if ($rowNode === null) return null;
  3148. $cellNode = $rowNode->children[$column] ?? null;
  3149. return ($cellNode === null) ? null : $cellNode;
  3150. }
  3151. public function applyAlignments() {
  3152. foreach ($this->children as $child) {
  3153. $this->applyAlignmentsToRow($child);
  3154. }
  3155. }
  3156. private function applyAlignmentsToRow(MDTableRowNode $row) {
  3157. foreach ($row->children as $columnIndex => $cell) {
  3158. $alignment = $this->columnAlignments[$columnIndex] ?? null;
  3159. $this->applyAlignmentToCell($cell, $alignment);
  3160. }
  3161. }
  3162. public function applyAlignmentToCell(MDTableCellNode $cell, ?string $alignment) {
  3163. if ($alignment) {
  3164. $cell->cssStyles['text-align'] = $alignment;
  3165. } else {
  3166. unset($cell->cssStyles['text-align']);
  3167. }
  3168. }
  3169. public function toHTML(MDState $state): string {
  3170. $this->applyAlignments();
  3171. $html = '';
  3172. $html .= "<table" . $this->htmlAttributes() . ">\n";
  3173. $html .= "<thead>\n";
  3174. $html .= $this->headerRow()->toHTML($state) . "\n";
  3175. $html .= "</thead>\n";
  3176. $html .= "<tbody>\n";
  3177. $html .= MDNode::arrayToHTML($this->bodyRows(), $state) . "\n";
  3178. $html .= "</tbody>\n";
  3179. $html .= "</table>\n";
  3180. return $html;
  3181. }
  3182. }
  3183. /**
  3184. * Node for one row (header or body) in a table.
  3185. */
  3186. class MDTableRowNode extends MDBlockNode {
  3187. /** @var MDTableCellNode[] $children */
  3188. public function toHTML(MDState $state): string {
  3189. return $this->simplePairedTagHTML($state, 'tr');
  3190. }
  3191. }
  3192. /**
  3193. * Node for one cell in a table row.
  3194. */
  3195. class MDTableCellNode extends MDBlockNode {
  3196. public function toHTML(MDState $state): string {
  3197. return $this->simplePairedTagHTML($state, 'td');
  3198. }
  3199. }
  3200. /**
  3201. * Node for a header cell in a header table row.
  3202. */
  3203. class MDTableHeaderCellNode extends MDTableCellNode {
  3204. public function toHTML(MDState $state): string {
  3205. return $this->simplePairedTagHTML($state, 'th');
  3206. }
  3207. }
  3208. /**
  3209. * Definition list with `MDDefinitionListTermNode` and
  3210. * `MDDefinitionListDefinitionNode` children.
  3211. */
  3212. class MDDefinitionListNode extends MDBlockNode {
  3213. public function toHTML(MDState $state): string {
  3214. return $this->simplePairedTagHTML($state, 'dl');
  3215. }
  3216. }
  3217. /**
  3218. * A word or term in a definition list.
  3219. */
  3220. class MDDefinitionListTermNode extends MDBlockNode {
  3221. public function toHTML(MDState $state): string {
  3222. return $this->simplePairedTagHTML($state, 'dt');
  3223. }
  3224. }
  3225. /**
  3226. * The definition of a word or term in a definition list. Should follow a
  3227. * definition term, or another definition to serve as an alternate.
  3228. */
  3229. class MDDefinitionListDefinitionNode extends MDBlockNode {
  3230. public function toHTML(MDState $state): string {
  3231. return $this->simplePairedTagHTML($state, 'dd');
  3232. }
  3233. }
  3234. /**
  3235. * Block at the bottom of a document listing all the footnotes with their
  3236. * content.
  3237. */
  3238. class MDFootnoteListNode extends MDBlockNode {
  3239. private function footnoteId(MDState $state, string $symbol): ?int {
  3240. $lookup = $state->root()->userInfo['footnoteIds'];
  3241. if (!$lookup) return null;
  3242. return $lookup[$symbol] ?? null;
  3243. }
  3244. public function toHTML(MDState $state): string {
  3245. $footnotes = $state->root()->userInfo['footnotes'];
  3246. $symbolOrder = array_keys($footnotes);
  3247. if (sizeof($footnotes) == 0) return '';
  3248. $footnoteUniques = $state->root()->userInfo['footnoteInstances'];
  3249. $html = '';
  3250. $html .= '<div class="footnotes">';
  3251. $html .= '<ol>';
  3252. foreach ($symbolOrder as $symbolRaw) {
  3253. $symbol = "{$symbolRaw}";
  3254. $content = $footnotes[$symbol];
  3255. if (!$content) continue;
  3256. $footnoteId = $this->footnoteId($state, $symbol);
  3257. $contentHTML = MDNode::arrayToHTML($content, $state);
  3258. $html .= "<li value=\"{$footnoteId}\" id=\"" .
  3259. "{$state->root()->elementIdPrefix}footnote_{$footnoteId}\">{$contentHTML}";
  3260. $uniques = $footnoteUniques[$symbol] ?? null;
  3261. if ($uniques) {
  3262. foreach ($uniques as $unique) {
  3263. $html .= " <a href=\"#{$state->root()->elementIdPrefix}footnoteref_{$unique}\"" .
  3264. " class=\"footnote-backref\">↩︎</a>";
  3265. }
  3266. }
  3267. $html .= "</li>\n";
  3268. }
  3269. $html .= '</ol>';
  3270. $html .= '</div>';
  3271. return $html;
  3272. }
  3273. public function toPlaintext(MDState $state): string {
  3274. $footnotes = $state->userInfo['footnotes'];
  3275. $symbolOrder = array_keys($footnotes);
  3276. if (sizeof($footnotes) == 0) return '';
  3277. $text = '';
  3278. foreach ($symbolOrder as $symbolRaw) {
  3279. $symbol = "{$symbolRaw}";
  3280. $content = $footnotes[$symbol];
  3281. if (!$content) continue;
  3282. $text .= "{$symbol}. " . $this->childPlaintext(state) . "\n";
  3283. }
  3284. return trim($text);
  3285. }
  3286. }
  3287. /**
  3288. * Marker subclass that indicates a node represents inline syntax.
  3289. */
  3290. class MDInlineNode extends MDNode {}
  3291. /**
  3292. * Contains plain text. Special HTML characters are escaped when rendered.
  3293. */
  3294. class MDTextNode extends MDInlineNode {
  3295. public string $text;
  3296. public function __construct(string $text) {
  3297. parent::__construct([]);
  3298. $this->text = $text;
  3299. }
  3300. public function toHTML(MDState $state): string {
  3301. return MDUtils::escapeHTML($this->text);
  3302. }
  3303. public function toPlaintext(MDState $state): string {
  3304. return $this->text;
  3305. }
  3306. public function __toString(): string {
  3307. return "<MDTextNode \"{$this->text}\">";
  3308. }
  3309. }
  3310. /**
  3311. * Contains plain text which is rendered with HTML entities when rendered to
  3312. * be marginally more difficult for web scapers to decipher. Used for
  3313. * semi-sensitive info like email addresses.
  3314. */
  3315. class MDObfuscatedTextNode extends MDTextNode {
  3316. public function toHTML(MDState $state): string {
  3317. return MDUtils::escapeObfuscated($this->text);
  3318. }
  3319. }
  3320. /**
  3321. * Emphasized (italicized) content.
  3322. */
  3323. class MDEmphasisNode extends MDInlineNode {
  3324. public function toHTML(MDState $state): string {
  3325. return $this->simplePairedTagHTML($state, 'em');
  3326. }
  3327. }
  3328. /**
  3329. * Strong (bold) content.
  3330. */
  3331. class MDStrongNode extends MDInlineNode {
  3332. public function toHTML(MDState $state): string {
  3333. return $this->simplePairedTagHTML($state, 'strong');
  3334. }
  3335. }
  3336. /**
  3337. * Content rendered with a line through it.
  3338. */
  3339. class MDStrikethroughNode extends MDInlineNode {
  3340. public function toHTML(MDState $state): string {
  3341. return $this->simplePairedTagHTML($state, 's');
  3342. }
  3343. }
  3344. /**
  3345. * Underlined content.
  3346. */
  3347. class MDUnderlineNode extends MDInlineNode {
  3348. public function toHTML(MDState $state): string {
  3349. return $this->simplePairedTagHTML($state, 'u');
  3350. }
  3351. }
  3352. /**
  3353. * Highlighted content. Usually rendered with a bright colored background.
  3354. */
  3355. class MDHighlightNode extends MDInlineNode {
  3356. public function toHTML(MDState $state): string {
  3357. return $this->simplePairedTagHTML($state, 'mark');
  3358. }
  3359. }
  3360. /**
  3361. * Superscripted content.
  3362. */
  3363. class MDSuperscriptNode extends MDInlineNode {
  3364. public function toHTML(MDState $state): string {
  3365. return $this->simplePairedTagHTML($state, 'sup');
  3366. }
  3367. }
  3368. /**
  3369. * Subscripted content.
  3370. */
  3371. class MDSubscriptNode extends MDInlineNode {
  3372. public function toHTML(MDState $state): string {
  3373. return $this->simplePairedTagHTML($state, 'sub');
  3374. }
  3375. }
  3376. /**
  3377. * Inline plaintext indicating computer code.
  3378. */
  3379. class MDCodeNode extends MDInlineNode {
  3380. public string $text;
  3381. public function __construct(string $text) {
  3382. parent::__construct([]);
  3383. $this->text = $text;
  3384. }
  3385. public function toHTML(MDState $state): string {
  3386. return "<code" . $this->htmlAttributes() . ">" . MDUtils::escapeHTML($this->text) . "</code>";
  3387. }
  3388. }
  3389. /**
  3390. * A footnote symbol in a document. Denoted as a superscripted number that can
  3391. * be clicked to go to its content at the bottom of the document.
  3392. */
  3393. class MDFootnoteNode extends MDInlineNode {
  3394. /**
  3395. * Symbol the author used to match up the footnote to its content definition.
  3396. */
  3397. public string $symbol;
  3398. /**
  3399. * The superscript symbol rendered in HTML. May be the same or different
  3400. * than `$symbol`.
  3401. */
  3402. public ?string $displaySymbol = null;
  3403. /**
  3404. * Unique ID for the footnote definition.
  3405. */
  3406. public ?int $footnoteId = null;
  3407. /**
  3408. * Unique number for backlinking to a footnote occurrence. Populated by
  3409. * `MDFootnoteReader->postProcess()`.
  3410. */
  3411. public ?int $occurrenceId = null;
  3412. public function __construct(string $symbol, ?string $title=null) {
  3413. parent::__construct([]);
  3414. $this->symbol = $symbol;
  3415. if ($title) $this->attributes['title'] = $title;
  3416. }
  3417. public function toHTML(MDState $state): string {
  3418. if ($this->footnoteId !== null) {
  3419. return "<sup class=\"footnote\" id=\"{$state->root()->elementIdPrefix}footnoteref_{$this->occurrenceId}\"" .
  3420. $this->htmlAttributes() . ">" .
  3421. "<a href=\"#{$state->root()->elementIdPrefix}footnote_{$this->footnoteId}\">" .
  3422. MDUtils::escapeHTML($this->displaySymbol ?? $this->symbol) . "</a></sup>";
  3423. }
  3424. return "<!--FNREF:{{$this->symbol}}-->";
  3425. }
  3426. }
  3427. /**
  3428. * A clickable hypertext link.
  3429. */
  3430. class MDLinkNode extends MDInlineNode {
  3431. public string $href;
  3432. /**
  3433. * @param string $href
  3434. * @param MDNode|MDNode[] $children
  3435. * @param ?string $title
  3436. */
  3437. public function __construct(string $href, MDNode|array $children, ?string $title=null) {
  3438. parent::__construct($children);
  3439. $this->href = $href;
  3440. if ($title !== null) $this->attributes['title'] = $title;
  3441. }
  3442. public function toHTML(MDState $state): string {
  3443. if (str_starts_with($this->href, 'mailto:')) {
  3444. $escapedLink = MDUtils::escapeObfuscated($this->href);
  3445. } else {
  3446. $escapedLink = MDUtils::escapeHTML($this->href);
  3447. }
  3448. return "<a href=\"{$escapedLink}\"" . $this->htmlAttributes() . ">" . $this->childHTML($state) . "</a>";
  3449. }
  3450. }
  3451. /**
  3452. * A clickable hypertext link where the URL is defined elsewhere by reference.
  3453. */
  3454. class MDReferencedLinkNode extends MDLinkNode {
  3455. public string $reference;
  3456. /**
  3457. * @param string $reference
  3458. * @param MDNode|MDNode[] $children
  3459. */
  3460. public function __construct(string $reference, MDNode|array $children) {
  3461. parent::__construct('', $children);
  3462. $this->reference = $reference;
  3463. }
  3464. public function toHTML(MDState $state): string {
  3465. if ($this->href === '') {
  3466. $url = $state->urlForReference($this->reference);
  3467. if ($url) $this->href = $url;
  3468. $title = $state->urlTitleForReference($this->reference);
  3469. if ($title) $this->attributes['title'] = $title;
  3470. }
  3471. return parent::toHTML($state);
  3472. }
  3473. }
  3474. /**
  3475. * An inline image.
  3476. */
  3477. class MDImageNode extends MDInlineNode {
  3478. public string $src;
  3479. public ?string $alt;
  3480. public function __construct(string $src, ?string $alt) {
  3481. parent::__construct([]);
  3482. $this->src = $src;
  3483. $this->alt = $alt;
  3484. }
  3485. public function toHTML(MDState $state): string {
  3486. $html = "<img src=\"" . MDUtils::escapeHTML($this->src) . "\"";
  3487. if ($this->alt) $html .= " alt=\"" . MDUtils::escapeHTML($this->alt) . "\"";
  3488. $html .= $this->htmlAttributes() . ">";
  3489. return $html;
  3490. }
  3491. }
  3492. /**
  3493. * An inline image where the URL is defined elsewhere by reference.
  3494. */
  3495. class MDReferencedImageNode extends MDImageNode {
  3496. public string $reference;
  3497. public function __construct(string $reference, ?string $alt=null) {
  3498. parent::__construct('', $alt, []);
  3499. $this->reference = $reference;
  3500. }
  3501. public function toHTML(MDState $state): string {
  3502. if ($this->src === '') {
  3503. $url = $state->urlForReference($this->reference);
  3504. if ($url !== null) $this->src = $url;
  3505. $title = $state->urlTitleForReference($this->reference);
  3506. if ($title !== null) $this->attributes['title'] = $title;
  3507. }
  3508. return parent::toHTML($state);
  3509. }
  3510. }
  3511. /**
  3512. * An abbreviation that can be hovered over to see its full expansion.
  3513. */
  3514. class MDAbbreviationNode extends MDInlineNode {
  3515. public string $abbreviation;
  3516. /**
  3517. * @param string $abbreviation
  3518. * @param string $definition
  3519. */
  3520. public function __construct(string $abbreviation, string $definition) {
  3521. parent::__construct([]);
  3522. $this->abbreviation = $abbreviation;
  3523. $this->attributes['title'] = $definition;
  3524. }
  3525. public function toHTML(MDState $state): string {
  3526. return "<abbr" . $this->htmlAttributes() . ">" . MDUtils::escapeHTML($this->abbreviation) . "</abbr>";
  3527. }
  3528. }
  3529. /**
  3530. * A line break that is preserved when rendered to HTML.
  3531. */
  3532. class MDLineBreakNode extends MDInlineNode {
  3533. public function toHTML(MDState $state): string {
  3534. return '<br>';
  3535. }
  3536. public function toPlaintext(MDState $state): string {
  3537. return "\n";
  3538. }
  3539. }
  3540. /**
  3541. * A verbatim HTML tag. May be altered to strip out disallowed attributes or
  3542. * CSS values.
  3543. */
  3544. class MDHTMLTagNode extends MDInlineNode {
  3545. public MDHTMLTag $tag;
  3546. public function __construct(MDHTMLTag $tag) {
  3547. parent::__construct([]);
  3548. $this->tag = $tag;
  3549. }
  3550. public function toHTML(MDState $state): string {
  3551. return "{$this->tag}";
  3552. }
  3553. }
  3554. // -- Main class ------------------------------------------------------------
  3555. /**
  3556. * Markdown parser.
  3557. */
  3558. class Markdown {
  3559. /**
  3560. * Set of standard readers to handle common syntax.
  3561. * @return MDReader[]
  3562. */
  3563. public static function standardReaders(): array {
  3564. if (self::$sharedStandardReaders === null) {
  3565. self::$sharedStandardReaders = [
  3566. new MDUnderlinedHeadingReader(),
  3567. new MDHashHeadingReader(),
  3568. new MDBlockQuoteReader(),
  3569. new MDHorizontalRuleReader(),
  3570. new MDUnorderedListReader(),
  3571. new MDOrderedListReader(),
  3572. new MDFencedCodeBlockReader(),
  3573. new MDIndentedCodeBlockReader(),
  3574. new MDParagraphReader(),
  3575. new MDStrongReader(),
  3576. new MDEmphasisReader(),
  3577. new MDCodeSpanReader(),
  3578. new MDImageReader(),
  3579. new MDLinkReader(),
  3580. new MDHTMLTagReader(),
  3581. ];
  3582. }
  3583. return self::$sharedStandardReaders;
  3584. }
  3585. private static ?array $sharedStandardReaders = null;
  3586. /**
  3587. * All supported readers except `MDLineBreakReader`.
  3588. * @return MDReader[]
  3589. */
  3590. public static function allReaders(): array {
  3591. if (self::$sharedAllReaders === null) {
  3592. $sharedAllReaders = array_merge(self::standardReaders(), [
  3593. new MDSubtextReader(),
  3594. new MDTableReader(),
  3595. new MDDefinitionListReader(),
  3596. new MDFootnoteReader(),
  3597. new MDAbbreviationReader(),
  3598. new MDUnderlineReader(),
  3599. new MDSubscriptReader(),
  3600. new MDStrikethroughReader(),
  3601. new MDHighlightReader(),
  3602. new MDSuperscriptReader(),
  3603. new MDReferencedImageReader(),
  3604. new MDReferencedLinkReader(),
  3605. new MDModifierReader(),
  3606. ]);
  3607. }
  3608. return $sharedAllReaders;
  3609. }
  3610. private static ?array $sharedAllReaders = null;
  3611. /**
  3612. * Shared instance of a parser with standard syntax.
  3613. */
  3614. public static function standardParser(): Markdown {
  3615. if (self::$sharedStandardMarkdown === null) {
  3616. self::$sharedStandardMarkdown = new Markdown(self::standardReaders());
  3617. }
  3618. return self::$sharedStandardMarkdown;
  3619. }
  3620. private static ?Markdown $sharedStandardMarkdown = null;
  3621. /**
  3622. * Shared instance of a parser with all supported syntax.
  3623. */
  3624. public static function completeParser(): Markdown {
  3625. if (self::$sharedCompleteParser === null) {
  3626. self::$sharedCompleteParser = new Markdown(self::allReaders());
  3627. }
  3628. return self::$sharedCompleteParser;
  3629. }
  3630. public static ?Markdown $sharedCompleteParser = null;
  3631. /**
  3632. * Filter for what non-markdown HTML is permitted. HTML generated as a
  3633. * result of markdown is unaffected.
  3634. */
  3635. public MDHTMLFilter $tagFilter;
  3636. /**
  3637. * If an exception occurs, attempts to narrow down the portion of the
  3638. * markdown that triggered the error and outputs it to the console. For
  3639. * debugging. Investigation mode can be slow.
  3640. */
  3641. public bool $investigateErrors = false;
  3642. /** @var MDReader[] */
  3643. private array $readers;
  3644. /** @var MDReader[] */
  3645. private array $readersByBlockPriority;
  3646. /** @var MDReader[] */
  3647. private array $readersByTokenPriority;
  3648. private array $readersBySubstitutePriority;
  3649. /**
  3650. * Creates a Markdown parser with the given syntax readers.
  3651. *
  3652. * @param MDReader[] $readers
  3653. */
  3654. public function __construct(?array $readers=null) {
  3655. $this->readers = $readers ?? self::allReaders();
  3656. $this->readersByBlockPriority = MDReader::sortReaderForBlocks($this->readers);
  3657. $this->readersByTokenPriority = MDReader::sortReadersForTokenizing($this->readers);
  3658. $this->readersBySubstitutePriority = MDReader::sortReadersForSubstitution($this->readers);
  3659. $this->tagFilter = new MDHTMLFilter();
  3660. }
  3661. /**
  3662. * Converts a markdown string to an HTML string.
  3663. *
  3664. * @param string $markdown
  3665. * @param string $elementIdPrefix Optional prefix for generated element
  3666. * `id`s and links to them. For differentiating multiple markdown docs in
  3667. * the same HTML page.
  3668. * @return string HTML
  3669. */
  3670. public function toHTML(string $markdown, string $elementIdPrefix='') {
  3671. $lines = mb_split('(?:\\n|\\r|\\r\\n)', $markdown);
  3672. try {
  3673. return $this->parse($lines, $elementIdPrefix);
  3674. } catch (Error $e) {
  3675. if ($this->investigateErrors) {
  3676. $this->investigateException($lines, $elementIdPrefix);
  3677. }
  3678. throw $e;
  3679. }
  3680. }
  3681. /**
  3682. * @param string[] $lines
  3683. * @param string $elementIdPrefix
  3684. */
  3685. private function parse(array $lines, string $elementIdPrefix) {
  3686. $state = new MDState($lines);
  3687. $state->readersByBlockPriority = $this->readersByBlockPriority;
  3688. $state->readersByTokenPriority = $this->readersByTokenPriority;
  3689. $state->readersBySubstitutePriority = $this->readersBySubstitutePriority;
  3690. $state->tagFilter = $this->tagFilter;
  3691. $state->elementIdPrefix = $elementIdPrefix;
  3692. foreach ($this->readers as $reader) {
  3693. $reader->preProcess($state);
  3694. }
  3695. $nodes = $state->readBlocks();
  3696. foreach ($this->readers as $reader) {
  3697. $reader->postProcess($state, $nodes);
  3698. }
  3699. return MDNode::arrayToHTML($nodes, $state);
  3700. }
  3701. /**
  3702. * Keeps removing first and last lines of markdown to locate the source of
  3703. * an exception and prints the minimal snippet.
  3704. *
  3705. * @param string[] $lines
  3706. * @param string $elementIdPrefix
  3707. */
  3708. private function investigateException(array $lines, string $elementIdPrefix) {
  3709. print("Investigating error...\n");
  3710. $startIndex = 0;
  3711. $endIndex = sizeof($lines);
  3712. // Keep stripping away first line until an exception stops being thrown
  3713. for ($i = 0; $i < sizeof($lines); $i++) {
  3714. try {
  3715. $this->parse(array_slice($lines, $i, $endIndex), $elementIdPrefix);
  3716. break;
  3717. } catch (Error $e0) {
  3718. $startIndex = $i;
  3719. }
  3720. }
  3721. // Keep stripping away last line until an exception stops being thrown
  3722. for ($i = sizeof($lines); $i > $startIndex; $i--) {
  3723. try {
  3724. $this->parse(array_slice($lines, $startIndex, $i), $elementIdPrefix);
  3725. break;
  3726. } catch (Error $e0) {
  3727. $endIndex = $i;
  3728. }
  3729. }
  3730. $problematicMarkdown = implode("\n", array_slice($lines, $startIndex, $endIndex));
  3731. print("This portion of markdown caused an unexpected exception:\n{$problematicMarkdown}\n");
  3732. }
  3733. }
  3734. ?>