PHP and Javascript implementations of a simple markdown parser
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

markdown.js 116KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201
  1. /**
  2. * Static utilities.
  3. */
  4. class MDUtils {
  5. // Modified from https://urlregex.com/ to remove capture groups. Matches fully qualified URLs only.
  6. static baseURLRegex = /(?:(?:(?:[a-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[a-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[a-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/i;
  7. // Modified from https://emailregex.com/ to remove capture groups.
  8. static baseEmailRegex = /(?:(?:[^<>()\[\]\\.,;:\s@"]+(?:\.[^<>()\[\]\\.,;:\s@"]+)*)|(?:".+"))@(?:(?:\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(?:(?:[a-z\-0-9]+\.)+[a-z]{2,}))/i;
  9. /**
  10. * Escapes special HTML characters.
  11. *
  12. * @param {string} str - string to escape
  13. * @param {boolean} encodeNewlinesAsBreaks - whether to convert newline characters to `<br>` tags
  14. * @returns {string} escaped HTML
  15. */
  16. static escapeHTML(str, encodeNewlinesAsBreaks=false) {
  17. if (typeof str !== 'string') return '';
  18. var html = str.replace(/&/g, '&amp;').replace(/</g, '&lt;')
  19. .replace(/>/g, '&gt;').replace(/"/g, '&quot;');
  20. if (encodeNewlinesAsBreaks) {
  21. html = html.replace(/\n/g, "<br>\n");
  22. }
  23. return html;
  24. }
  25. /**
  26. * Converts HTML entities to characters. HTML tags are stripped.
  27. *
  28. * @param {string} html
  29. * @returns {string} plain text
  30. */
  31. static unescapeHTML(html, decodeBRsAsNewlines=false) {
  32. if (decodeBRsAsNewlines) {
  33. html = html.replace(/<br[\/]?>\n?/g, "\n");
  34. }
  35. const doc = (new DOMParser()).parseFromString(html, "text/html");
  36. return doc.documentElement.textContent;
  37. }
  38. /**
  39. * Encodes characters as HTML numeric entities to make it marginally more
  40. * difficult for web scrapers to grab sensitive info. If `text` starts with
  41. * `mailto:` only the email address following it will be obfuscated.
  42. *
  43. * @param {string} text - text to escape
  44. * @returns {string} escaped HTML
  45. */
  46. static escapeObfuscated(text) {
  47. if (text.startsWith('mailto:')) {
  48. return 'mailto:' + this.escapeObfuscated(text.substring(7));
  49. }
  50. var html = '';
  51. for (var p = 0; p < text.length; p++) {
  52. const cp = text.codePointAt(p);
  53. html += `&#${cp};`;
  54. }
  55. return html;
  56. }
  57. /**
  58. * Removes illegal characters from an HTML attribute name.
  59. *
  60. * @param {string} name
  61. * @returns {string}
  62. */
  63. static scrubAttributeName(name) {
  64. return name.replace(/[\t\n\f \/>"'=]+/, '');
  65. }
  66. /**
  67. * Strips one or more leading indents from a line or lines of markdown. An
  68. * indent is defined as 4 spaces or one tab. Incomplete indents (i.e. 1-3
  69. * spaces) are treated like one indent level.
  70. *
  71. * @param {string|string[]} line - string or strings to strip
  72. * @param {number} levels - how many indent levels to strip
  73. * @returns {string|string[]} stripped lines
  74. */
  75. static stripIndent(line, levels=1) {
  76. const regex = new RegExp(`^(?: {1,4}|\t){${levels}}`);
  77. return (line instanceof Array) ? line.map((l) => l.replace(regex, '')) : line.replace(regex, '');
  78. }
  79. /**
  80. * Counts the number of indent levels in a line of text. Partial indents
  81. * (1 to 3 spaces) are counted as one indent level unless `fullIndentsOnly`
  82. * is `true`.
  83. *
  84. * @param {string} line - line of markdown
  85. * @param {boolean} fullIndentsOnly - whether to only count full indent levels (4 spaces or a tab)
  86. * @returns {number} number of indent levels found
  87. */
  88. static countIndents(line, fullIndentsOnly=false) {
  89. // normalize indents to tabs
  90. return line.replace(fullIndentsOnly
  91. ? /(?: {4}|\t)/g
  92. : /(?: {1,4}|\t)/g,
  93. "\t")
  94. // remove content after indent
  95. .replace(/^(\t*)(.*?)$/, '$1')
  96. // count tabs
  97. .length;
  98. }
  99. /**
  100. * Returns a copy of an array without any whitespace-only lines at the end.
  101. *
  102. * @param {String[]} lines - text lines
  103. * @returns {String[]} - text lines without trailing blank lines
  104. */
  105. static withoutTrailingBlankLines(lines) {
  106. var stripped = lines.slice();
  107. while (stripped.length > 0 && stripped[stripped.length - 1].trim().length == 0) {
  108. stripped.pop();
  109. }
  110. return stripped;
  111. }
  112. /**
  113. * Tests if an array of lines contains at least one blank. A blank line
  114. * can contain whitespace.
  115. *
  116. * @param {String[]} lines
  117. * @returns {boolean} whether `lines` contains any whitespace-only lines
  118. */
  119. static containsBlankLine(lines) {
  120. for (const line of lines) {
  121. if (line.trim().length == 0) return true;
  122. }
  123. return false;
  124. }
  125. /**
  126. * Describes the type of a variable for debugging.
  127. *
  128. * @param {any} value - value
  129. * @returns {String} description of type
  130. */
  131. static typename(value) {
  132. if (value === null) return 'null';
  133. if (value instanceof Object) {
  134. return value.constructor.name;
  135. }
  136. return typeof value;
  137. }
  138. static #equalArrays(a, b) {
  139. if (a === b) return true;
  140. if (!(a instanceof Array) || !(b instanceof Array)) return false;
  141. if (a == null || b == null) return false;
  142. if (a.length != b.length) return false;
  143. for (var i = 0; i < a.length; i++) {
  144. if (!this.equal(a[i], b[i])) return false;
  145. }
  146. return true;
  147. }
  148. static #equalObjects(a, b) {
  149. if (a === b) return true;
  150. if (!(a instanceof Object) || !(b instanceof Object)) return false;
  151. if (a == null || b == null) return false;
  152. if (a.equals !== undefined) {
  153. return a.equals(b);
  154. }
  155. for (const key of Object.keys(a)) {
  156. if (!this.equal(a[key], b[key])) return false;
  157. }
  158. for (const key of Object.keys(b)) {
  159. if (!this.equal(a[key], b[key])) return false;
  160. }
  161. return true;
  162. }
  163. /**
  164. * Tests for equality on lots of different kinds of values including objects
  165. * and arrays. Will use `.equals` on objects that implement it.
  166. *
  167. * @param {any} a
  168. * @param {any} b
  169. * @returns {boolean}
  170. */
  171. static equal(a, b, floatDifferencePercent=0.0) {
  172. if (a instanceof Array && b instanceof Array) {
  173. return this.#equalArrays(a, b);
  174. }
  175. if (a instanceof Object && b instanceof Object) {
  176. return this.#equalObjects(a, b);
  177. }
  178. if (typeof a == 'number' && typeof b == 'number') {
  179. if (a === b) return true;
  180. const delta = b - a;
  181. const ratio = delta / a;
  182. return Math.abs(ratio) <= floatDifferencePercent;
  183. }
  184. return a == b;
  185. }
  186. /**
  187. * Escapes special characters in a string for inclusion as a literal in a
  188. * regular expression.
  189. *
  190. * @param {string} text
  191. */
  192. static escapeRegex(text) {
  193. // Partially following escaping scheme from not-yet-widely-supported RegExp.escape().
  194. // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/escape
  195. const escapeHex = function(ch) {
  196. const codepoint = ch.codePointAt(0);
  197. const s = '00' + codepoint.toString(16);
  198. return `\\x${s.substring(s.length - 2)}`;
  199. }
  200. var escaped = '';
  201. const l = text.length;
  202. for (var i = 0; i < l; i++) {
  203. const ch = text.substring(i, i + 1);
  204. if (i == 0 && /[a-zA-Z0-9]/.exec(ch)) {
  205. escaped += escapeHex(ch);
  206. } else if ("^$\\.*+?()[]{}|/".indexOf(ch) >= 0) {
  207. escaped += `\\${ch}`;
  208. } else if (",-=<>#&!%:;@~'`\"".indexOf(ch) >= 0) {
  209. escaped += escapeHex(ch);
  210. } else if (ch == '\f') {
  211. escaped += "\\f";
  212. } else if (ch == '\n') {
  213. escaped += "\\n";
  214. } else if (ch == '\r') {
  215. escaped += "\\r";
  216. } else if (ch == '\t') {
  217. escaped += "\\t";
  218. } else if (ch == '\v') {
  219. escaped += "\\v";
  220. } else {
  221. escaped += ch;
  222. }
  223. }
  224. return escaped;
  225. }
  226. }
  227. /**
  228. * Token type enum for `MDToken`.
  229. */
  230. class MDTokenType {
  231. static Text = new MDTokenType('Text');
  232. /**
  233. * Only used for the leading and trailing whitespace around a run of text,
  234. * not every single whitespace character.
  235. */
  236. static Whitespace = new MDTokenType('Whitespace');
  237. static Underscore = new MDTokenType('Underscore');
  238. static Asterisk = new MDTokenType('Asterisk');
  239. static Slash = new MDTokenType('Slash');
  240. static Tilde = new MDTokenType('Tilde');
  241. static Bang = new MDTokenType('Bang');
  242. static Backtick = new MDTokenType('Backtick');
  243. static Equal = new MDTokenType('Equal');
  244. static Caret = new MDTokenType('Caret');
  245. static Label = new MDTokenType('Label'); // content=label
  246. static URL = new MDTokenType('URL'); // content=URL, extra=title
  247. static Email = new MDTokenType('Email'); // content=email address, extra=title
  248. static SimpleLink = new MDTokenType('SimpleLink'); // content=URL
  249. static SimpleEmail = new MDTokenType('SimpleEmail'); // content=email address
  250. static Footnote = new MDTokenType('Footnote'); // content=symbol
  251. static Modifier = new MDTokenType('Modifier'); // modifier=MDTagModifier
  252. static HTMLTag = new MDTokenType('HTMLTag'); // tag=MDHTMLTag
  253. /** Wildcard for `MDToken.findFirstTokens` */
  254. static META_AnyNonWhitespace = new MDTokenType('META_AnyNonWhitespace');
  255. /** Wildcard for `MDToken.findFirstTokens` */
  256. static META_OptionalWhitespace = new MDTokenType('META_OptionalWhitespace');
  257. /** @type {string} */
  258. name;
  259. /**
  260. * @param {string} name
  261. */
  262. constructor(name) {
  263. this.name = name;
  264. }
  265. /** @returns {string} */
  266. toString() {
  267. return `${this.constructor.name}.${this.name}`;
  268. }
  269. }
  270. /**
  271. * Search results from `MDToken.findFirstTokens`.
  272. */
  273. class MDTokenMatch {
  274. /** @type {MDToken{}} */
  275. tokens;
  276. /** @type {number} */
  277. index;
  278. constructor(tokens, index) {
  279. this.tokens = tokens;
  280. this.index = index;
  281. }
  282. }
  283. /**
  284. * Search results from `MDToken.findPairedTokens`.
  285. */
  286. class MDPairedTokenMatch {
  287. /** @type {MDToken[]} */
  288. startTokens;
  289. /** @type {MDToken[]} */
  290. contentTokens;
  291. /** @type {MDToken[]} */
  292. endTokens;
  293. /** @type {number} */
  294. startIndex;
  295. /** @type {number} */
  296. contentIndex;
  297. /** @type {number} */
  298. endIndex;
  299. /** @type {number} */
  300. totalLength;
  301. constructor(startTokens, contentTokens, endTokens, startIndex, contentIndex, endIndex, totalLength) {
  302. this.startTokens = startTokens;
  303. this.contentTokens = contentTokens;
  304. this.endTokens = endTokens;
  305. this.startIndex = startIndex;
  306. this.contentIndex = contentIndex;
  307. this.endIndex = endIndex;
  308. this.totalLength = totalLength;
  309. }
  310. }
  311. /**
  312. * One lexical unit in inline markdown syntax parsing.
  313. */
  314. class MDToken {
  315. /**
  316. * The original verbatim token string. Required as a plaintext fallback if
  317. * the token remains unresolved.
  318. * @type {string}
  319. */
  320. original;
  321. /** @type {MDTokenType} */
  322. type;
  323. /** @type {string|null} */
  324. content = null;
  325. /** @type {string|null} */
  326. extra = null;
  327. /** @type {MDHTMLTag|null} */
  328. tag = null;
  329. /** @type {MDTagModifier|null} */
  330. modifier = null;
  331. /**
  332. * Creates a token.
  333. *
  334. * @param {string} original - verbatim token string
  335. * @param {MDTokenType} type - token type
  336. * @param {string|MDTagModifier|MDHTMLTag|null} content - primary content of the token
  337. * @param {string|null} extra - additional content
  338. */
  339. constructor(original, type, content=null, extra=null) {
  340. this.original = original;
  341. this.type = type;
  342. if (content instanceof MDTagModifier) {
  343. this.modifier = content;
  344. } else if (content instanceof MDHTMLTag) {
  345. this.tag = content;
  346. } else {
  347. this.content = content;
  348. }
  349. this.extra = extra;
  350. }
  351. toString() {
  352. return `(${this.constructor.name} type=${this.type.toString()} content=${this.content})`;
  353. }
  354. /**
  355. * Attempts to parse a label token from the beginning of `line`. A label is
  356. * of the form `[content]`. If found, returns an array:
  357. * - `0`: the entire label including brackets
  358. * - `1`: the content of the label
  359. *
  360. * @param {string} line
  361. * @returns {string[]|null} match groups or null if not found
  362. */
  363. static tokenizeLabel(line) {
  364. if (!line.startsWith('[')) return null;
  365. var parenCount = 0;
  366. var bracketCount = 0;
  367. for (var p = 1; p < line.length; p++) {
  368. let ch = line.substring(p, p + 1);
  369. if (ch == '\\') {
  370. p++;
  371. } else if (ch == '(') {
  372. parenCount++;
  373. } else if (ch == ')') {
  374. parenCount--;
  375. if (parenCount < 0) return null;
  376. } else if (ch == '[') {
  377. bracketCount++;
  378. } else if (ch == ']') {
  379. if (bracketCount > 0) {
  380. bracketCount--;
  381. } else {
  382. return [ line.substring(0, p + 1), line.substring(1, p) ];
  383. }
  384. }
  385. }
  386. return null;
  387. }
  388. static #urlWithTitleRegex = /^\((\S+?)\s+"(.*?)"\)/i; // 1=URL, 2=title
  389. static #urlRegex = /^\((\S+?)\)/i; // 1=URL
  390. /**
  391. * Attempts to parse a URL token from the beginning of `line`. A URL token
  392. * is of the form `(url)` or `(url "title")`. If found, returns an array:
  393. * - `0`: the entire URL token including parentheses
  394. * - `1`: the URL
  395. * - `2`: the optional title, or `null`
  396. *
  397. * @param {string} line
  398. * @returns {string[]} token tuple
  399. */
  400. static tokenizeURL(line) {
  401. var groups;
  402. if (groups = this.#urlWithTitleRegex.exec(line)) {
  403. if (this.tokenizeEmail(line)) return null; // make sure it's not better described as an email address
  404. return groups;
  405. }
  406. if (groups = this.#urlRegex.exec(line)) {
  407. if (this.tokenizeEmail(line)) return null;
  408. return [...groups, null];
  409. }
  410. return null;
  411. }
  412. static #emailWithTitleRegex = new RegExp("^\\(\\s*(" + MDUtils.baseEmailRegex.source + ")\\s+\"(.*?)\"\\s*\\)", "i"); // 1=email, 2=title
  413. static #emailRegex = new RegExp("^\\(\\s*(" + MDUtils.baseEmailRegex.source + ")\\s*\\)", "i"); // 1=email
  414. /**
  415. * Attempts to parse an email address from the beginning of `line`. An
  416. * email address is of the form `(user@example.com)` or
  417. * `(user@example.com "link title")`. If found, returns an array:
  418. * - `0`: the entire token including parentheses
  419. * - `1`: the email address
  420. * - `2`: the optional link title, or `null`
  421. *
  422. * @param {string} line
  423. * @returns {string[]} token tuple
  424. */
  425. static tokenizeEmail(line) {
  426. var groups;
  427. if (groups = this.#emailWithTitleRegex.exec(line)) {
  428. return groups;
  429. }
  430. if (groups = this.#emailRegex.exec(line)) {
  431. return [...groups, null];
  432. }
  433. return null;
  434. }
  435. /**
  436. * Searches an array of `MDToken` for the given pattern of `MDTokenType`s.
  437. * If found, returns a `MDTokenMatch`, otherwise `null`.
  438. *
  439. * Special token types `META_AnyNonWhitespace` and `META_OptionalWhitespace`
  440. * are special supported token types. Note that `META_OptionalWhitespace`
  441. * may give a result with a variable number of tokens.
  442. *
  443. * @param {MDToken[]|MDNode[]} tokensToSearch - mixed array of `MDToken` and
  444. * `MDNode` elements
  445. * @param {MDTokenType[]} pattern - contiguous run of token types to find
  446. * @param {number} startIndex - token index to begin searching (defaults to 0)
  447. * @returns {MDTokenMatch|null} match object, or `null` if not found
  448. */
  449. static findFirstTokens(tokensToSearch, pattern, startIndex=0) {
  450. var matched = [];
  451. for (var t = startIndex; t < tokensToSearch.length; t++) {
  452. var matchedAll = true;
  453. matched = [];
  454. var patternOffset = 0;
  455. for (var p = 0; p < pattern.length; p++) {
  456. var t0 = t + p + patternOffset;
  457. if (t0 >= tokensToSearch.length) return null;
  458. let token = tokensToSearch[t0];
  459. let elem = pattern[p];
  460. if (elem == MDTokenType.META_OptionalWhitespace) {
  461. if (token instanceof MDToken && token.type == MDTokenType.Whitespace) {
  462. matched.push(token);
  463. } else {
  464. patternOffset--;
  465. }
  466. } else if (elem == MDTokenType.META_AnyNonWhitespace) {
  467. if (token instanceof MDToken && token.type == MDTokenType.Whitespace) {
  468. matchedAll = false;
  469. break;
  470. }
  471. matched.push(token);
  472. } else {
  473. if (!(token instanceof MDToken) || token.type != elem) {
  474. matchedAll = false;
  475. break;
  476. }
  477. matched.push(token);
  478. }
  479. }
  480. if (matchedAll) {
  481. return new MDTokenMatch(matched, t);
  482. }
  483. }
  484. return null;
  485. }
  486. /**
  487. * Searches an array of MDToken for a given starting pattern and ending
  488. * pattern and returns match info about both and the tokens in between.
  489. *
  490. * If `contentValidator` is specified, it will be called with the content
  491. * tokens of a potential match. If the validator returns `true`, the result
  492. * will be accepted and returned by this method. If the validator returns
  493. * `false`, this method will keep looking for another matching pair. If no
  494. * validator is given the first match will be returned regardless of content.
  495. *
  496. * If a match is found, a `MDPairedTokenMatch` is returned with details
  497. * of the opening tokens, closing tokens, and content tokens between. Otherwise
  498. * `null` is returned.
  499. *
  500. * @param {MDToken[]} tokensToSearch - array of `MDToken` to search in
  501. * @param {MDTokenType[]} startPattern - array of `MDTokenType` to find first
  502. * @param {MDTokenType[]} endPattern - array of `MDTokenType` to find positioned after `startPattern`
  503. * @param {function|null} contentValidator - optional validator function. If provided, will be passed an array of inner `MDToken`, and the function can return `true` to accept the contents or `false` to keep searching
  504. * @param {number} startIndex - token index where searching should begin
  505. * @returns {MDPairedTokenMatch|null} match, or `null`
  506. */
  507. static findPairedTokens(tokensToSearch, startPattern, endPattern, contentValidator=null, startIndex=0) {
  508. for (var s = startIndex; s < tokensToSearch.length; s++) {
  509. var startMatch = this.findFirstTokens(tokensToSearch, startPattern, s);
  510. if (startMatch === null) return null;
  511. var endStart = startMatch.index + startMatch.tokens.length;
  512. while (endStart < tokensToSearch.length) {
  513. var endMatch = this.findFirstTokens(tokensToSearch, endPattern, endStart);
  514. if (endMatch === null) break;
  515. var contents = tokensToSearch.slice(startMatch.index + startMatch.tokens.length, endMatch.index);
  516. if (contents.length > 0 && (contentValidator === null || contentValidator(contents))) {
  517. return new MDPairedTokenMatch(startMatch.tokens,
  518. contents,
  519. endMatch.tokens,
  520. startMatch.index,
  521. startMatch.index + startMatch.tokens.length,
  522. endMatch.index,
  523. endMatch.index + endMatch.tokens.length - startMatch.index);
  524. } else {
  525. // Contents rejected. Try next end match.
  526. endStart = endMatch.index + 1;
  527. }
  528. }
  529. // No end matches. Increment start match.
  530. s = startMatch.index;
  531. }
  532. return null;
  533. }
  534. equals(other) {
  535. if (!(other instanceof MDToken)) return false;
  536. if (other.original !== this.original) return false;
  537. if (!other.type.equals(this.type)) return false;
  538. if (other.content !== this.content) return false;
  539. if (other.extra !== this.extra) return false;
  540. if (!MDUtils.equal(other.tag, this.tag)) return false;
  541. if (!MDUtils.equals(other.modifier, this.modifier)) return false;
  542. return true
  543. }
  544. }
  545. /**
  546. * Parsing and rendering state. Passed around throughout the parsing process.
  547. *
  548. * States are hierarchical. A sub-state can be created by calling `.copy()` with
  549. * a new array of lines. The sub-state points back to its parent state. This
  550. * is done to parse inner content of a syntax as its own standalone document.
  551. *
  552. * If a custom `MDReader` implementation wants to store data in this object,
  553. * always do so on `state.root` to ensure it's stored on the original state,
  554. * not a child state. Otherwise data may be lost when the sub-state is discarded.
  555. */
  556. class MDState {
  557. /**
  558. * Ascends the parent chain to the root `MDState` instance. This should be
  559. * used when referencing most stored fields except `lines` and `p`.
  560. *
  561. * @type {MDState}
  562. */
  563. get root() { return this.#parent ? this.#parent.root : this; }
  564. /**
  565. * Lines of the markdown document. The current line index is pointed to by `p`.
  566. *
  567. * @type {string[]}
  568. */
  569. lines;
  570. /**
  571. * The current line in `lines`.
  572. *
  573. * @returns {string|null} current line or `null` if out of content
  574. */
  575. get currentLine() { return (this.p < this.lines.length) ? this.lines[this.p] : null; }
  576. /**
  577. * Current line pointer into array `lines`.
  578. *
  579. * @type {number} line pointer
  580. */
  581. p = 0;
  582. /** @type {MDState|null} */
  583. #parent = null;
  584. /**
  585. * Array of `MDReader`s sorted by block reading priority.
  586. * @type {MDReader[]}
  587. */
  588. readersByBlockPriority = [];
  589. /**
  590. * Array of `MDReader`s sorted by tokenization priority.
  591. * @type {MDReader[]}
  592. */
  593. readersByTokenPriority = [];
  594. /**
  595. * Array of tuples of `pass:number` and `MDReader` sorted by substitution
  596. * priority.
  597. * @type {Array}
  598. */
  599. readersBySubstitutePriority = [];
  600. /**
  601. * Prefix to include in any generated `id` attributes on HTML elements.
  602. * Useful for keeping elements unique in multiple parsed documents in the
  603. * same HTML page.
  604. *
  605. * @type {string}
  606. */
  607. elementIdPrefix = '';
  608. /**
  609. * Filter for removing unapproved HTML tags, attributes, and values.
  610. * @type {MDHTMLFilter}
  611. */
  612. tagFilter;
  613. static #textWhitespaceRegex = /^(\s*)(?:(\S|\S.*\S)(\s*?))?$/; // 1=leading WS, 2=text, 3=trailing WS
  614. /**
  615. * @param {string[]} lines - lines of markdown text
  616. */
  617. constructor(lines) {
  618. this.lines = lines;
  619. }
  620. /**
  621. * Creates a copy of this state with new lines. Useful for parsing nested
  622. * content.
  623. *
  624. * @param {string[]} lines
  625. * @returns {MDState} copied sub-state
  626. */
  627. copy(lines) {
  628. let cp = new MDState(lines);
  629. cp.#parent = this;
  630. return cp;
  631. }
  632. /**
  633. * Tests if there are at least `minCount` lines available to read. If `p`
  634. * is not provided it will be relative to `this.p`.
  635. *
  636. * @param {number} minCount - minimum number of lines
  637. * @param {number|null} p - line pointer, or `null` to use `this.p`
  638. * @returns {boolean} whether at least the given number of lines is available
  639. */
  640. hasLines(minCount, p=null) {
  641. let relativeTo = (p === null) ? this.p : p;
  642. return relativeTo + minCount <= this.lines.length;
  643. }
  644. /**
  645. * Reads and returns an array of blocks from the current line pointer.
  646. *
  647. * @returns {MDBlockNode[]} parsed blocks
  648. */
  649. readBlocks() {
  650. var blocks = [];
  651. while (this.hasLines(1)) {
  652. let block = this.#readNextBlock();
  653. if (block) {
  654. blocks.push(block);
  655. } else {
  656. break;
  657. }
  658. }
  659. return blocks;
  660. }
  661. /**
  662. * Creates a simple `MDBlockNode` if no other registered blocks match.
  663. *
  664. * @returns {MDBlockNode|null} fallback block
  665. */
  666. #readFallbackBlock() {
  667. if (this.p >= this.lines.length) return null;
  668. const lines = MDUtils.withoutTrailingBlankLines(this.lines.slice(this.p));
  669. if (lines.length == 0) return null;
  670. this.p = this.lines.length;
  671. return this.inlineMarkdownToNode(lines.join("\n"));
  672. }
  673. /**
  674. * Attempts to read one block from the current line pointer. The pointer
  675. * will be positioned just after the end of the block.
  676. *
  677. * @param {MDState} state
  678. * @returns {MDBlockNode|null}
  679. */
  680. #readNextBlock() {
  681. while (this.hasLines(1) && this.lines[this.p].trim().length == 0) {
  682. this.p++;
  683. }
  684. if (!this.hasLines(1)) return null;
  685. for (const reader of this.root.readersByBlockPriority) {
  686. const startP = this.p;
  687. const block = reader.readBlock(this);
  688. if (block) {
  689. if (this.p == startP) {
  690. throw new Error(`${reader.constructor.name} returned an ` +
  691. `${block.constructor.name} without incrementing MDState.p. ` +
  692. `This could lead to an infinite loop.`);
  693. }
  694. return block;
  695. }
  696. }
  697. const fallback = this.#readFallbackBlock();
  698. return fallback;
  699. }
  700. /**
  701. * @param {string} line
  702. * @returns {MDToken[]}
  703. */
  704. #inlineMarkdownToTokens(line) {
  705. if (this.#parent) return this.#parent.#inlineMarkdownToTokens(line);
  706. var tokens = [];
  707. var text = '';
  708. var expectLiteral = false;
  709. /**
  710. * Flushes accumulated content in `text` to `tokens`.
  711. */
  712. const endText = function() {
  713. if (text.length == 0) return;
  714. const textGroups = MDState.#textWhitespaceRegex.exec(text);
  715. if (textGroups !== null) {
  716. if (textGroups[1].length > 0) {
  717. tokens.push(new MDToken(textGroups[1], MDTokenType.Whitespace, textGroups[1]));
  718. }
  719. if (textGroups[2] !== undefined && textGroups[2].length > 0) {
  720. tokens.push(new MDToken(textGroups[2], MDTokenType.Text, textGroups[2]));
  721. }
  722. if (textGroups[3] !== undefined && textGroups[3].length > 0) {
  723. tokens.push(new MDToken(textGroups[3], MDTokenType.Whitespace, textGroups[3]));
  724. }
  725. } else {
  726. tokens.push(new MDToken(text, MDTokenType.Text, text));
  727. }
  728. text = '';
  729. }
  730. for (var p = 0; p < line.length; p++) {
  731. const ch = line.substring(p, p + 1);
  732. const remainder = line.substring(p);
  733. if (expectLiteral) {
  734. text += ch;
  735. expectLiteral = false;
  736. continue;
  737. }
  738. if (ch == '\\') {
  739. expectLiteral = true;
  740. continue;
  741. }
  742. var found = false;
  743. for (const reader of this.root.readersByTokenPriority) {
  744. const token = reader.readToken(this, remainder);
  745. if (token === null) continue;
  746. if (token === undefined) {
  747. console.warn(`${reader.constructor.name}.readToken returned undefined instead of null`);
  748. }
  749. endText();
  750. tokens.push(token);
  751. if (token.original == null || token.original.length == 0) {
  752. throw new Error(`${reader.constructor.name} returned a token with an empty .original. This would cause an infinite loop.`);
  753. }
  754. p += token.original.length - 1;
  755. found = true;
  756. break;
  757. }
  758. if (!found) {
  759. text += ch;
  760. }
  761. }
  762. endText();
  763. return tokens;
  764. }
  765. /**
  766. * Converts a line of markdown to an `MDInlineNode`.
  767. *
  768. * @param {string|string[]} line
  769. * @returns {MDInlineNode}
  770. */
  771. inlineMarkdownToNode(line) {
  772. let nodes = this.inlineMarkdownToNodes(line);
  773. return (nodes.length == 1) ? nodes[0] : new MDInlineNode(nodes);
  774. }
  775. /**
  776. * Converts a line of markdown to an array of `MDInlineNode`s.
  777. *
  778. * @param {string|string[]} line
  779. * @returns {MDInlineNode[]}
  780. */
  781. inlineMarkdownToNodes(line) {
  782. var tokens = this.#inlineMarkdownToTokens((line instanceof Array) ? line.join('\n') : line);
  783. return this.tokensToNodes(tokens);
  784. }
  785. /**
  786. * Converts a mixed array of `MDToken` and `MDInlineNode` elements into an array
  787. * of only `MDInlineNode` via repeated `MDReader` substition.
  788. *
  789. * @param {MDToken[]|MDInlineNode[]} tokens
  790. * @returns {MDInlineNode[]}
  791. */
  792. tokensToNodes(tokens) {
  793. var nodes = tokens.slice();
  794. // Perform repeated substitutions, converting sequences of tokens into
  795. // nodes, until no more substitutions can be made.
  796. var anyChanges = false;
  797. do {
  798. anyChanges = false;
  799. for (const readerTuple of this.root.readersBySubstitutePriority) {
  800. /** @type {number} */
  801. const pass = readerTuple[0];
  802. /** @type {MDReader} */
  803. const reader = readerTuple[1];
  804. const changed = reader.substituteTokens(this, pass, nodes);
  805. if (!changed) continue;
  806. anyChanges = true;
  807. break;
  808. }
  809. } while (anyChanges);
  810. // Convert any remaining tokens to text nodes. Also apply any inline
  811. // CSS modifiers.
  812. var lastNode = null;
  813. const me = this;
  814. nodes = nodes.map(function(node) {
  815. if (node instanceof MDToken) {
  816. /** @type {MDToken} */
  817. const token = node;
  818. if (token.type == MDTokenType.Modifier && lastNode) {
  819. me.root.tagFilter.scrubModifier(token.modifier);
  820. token.modifier.applyTo(lastNode);
  821. lastNode = null;
  822. return new MDTextNode('');
  823. }
  824. lastNode = null;
  825. return new MDTextNode(token.original);
  826. } else if (node instanceof MDNode) {
  827. lastNode = (node instanceof MDTextNode) ? null : node;
  828. return node;
  829. } else {
  830. throw new Error(`Unexpected node type ${node.constructor.name}`);
  831. }
  832. });
  833. return nodes;
  834. }
  835. /**
  836. * Mapping of reference symbols to URLs. Used by `MDReferencedLinkReader`
  837. * and `MDReferencedImageReader`.
  838. * @type {object} symbol -> URL
  839. */
  840. #referenceToURL = {};
  841. /**
  842. * Mapping of reference symbols to titles. Used by `MDReferencedLinkReader`
  843. * and `MDReferencedImageReader`.
  844. * @type {object} symbol -> title string
  845. */
  846. #referenceToTitle = {};
  847. /**
  848. * Defines a URL by reference symbol.
  849. *
  850. * @param {string} reference - case-insensitive reference symbol
  851. * @param {string} url - URL to map the symbol to
  852. * @param {string|null} title - optional link title
  853. */
  854. defineURL(reference, url, title=null) {
  855. this.root.#referenceToURL[reference.toLowerCase()] = url;
  856. if (title !== null) this.root.#referenceToTitle[reference.toLowerCase()] = title;
  857. }
  858. /**
  859. * Returns the URL associated with a reference symbol.
  860. *
  861. * @param {string} reference - case-insensitive reference symbol
  862. * @returns {string|null} URL for the given reference, or `null` if not defined
  863. */
  864. urlForReference(reference) {
  865. return this.root.#referenceToURL[reference.toLowerCase()] ?? null;
  866. }
  867. /**
  868. * Returns the link title associated with a reference symbol.
  869. *
  870. * @param {string} reference - case-insensitive reference symbol
  871. * @returns {string|null} link title for the given reference, or `null` if not defined
  872. */
  873. urlTitleForReference(reference) {
  874. return this.root.#referenceToTitle[reference.toLowerCase()] ?? null;
  875. }
  876. }
  877. /**
  878. * Defines a set of allowable HTML tags, attributes, and CSS.
  879. */
  880. class MDHTMLFilter {
  881. /**
  882. * Mapping of permitted lowercase tag names to objects containing allowable
  883. * attributes for those tags. Does not need to include those attributes
  884. * defined in `allowableGlobalAttributes`.
  885. *
  886. * Values are objects with allowable lowercase attribute names mapped to
  887. * allowable value patterns. A `*` means any value is acceptable. Multiple
  888. * allowable values can be joined together with `|`. These special symbols
  889. * represent certain kinds of values and can be used in combination or in
  890. * place of literal values.
  891. *
  892. * - `{classlist}`: A list of legal CSS classnames, separated by spaces
  893. * - `{int}`: An integer
  894. * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
  895. * - `{style}`: One or more CSS declarations, separated by semicolons (simple
  896. * `key: value;` syntax only)
  897. * - `{url}`: A URL
  898. * @type {object}
  899. */
  900. allowableTags = {
  901. 'address': {
  902. 'cite': '{url}',
  903. },
  904. 'h1': {},
  905. 'h2': {},
  906. 'h3': {},
  907. 'h4': {},
  908. 'h5': {},
  909. 'h6': {},
  910. 'blockquote': {},
  911. 'dl': {},
  912. 'dt': {},
  913. 'dd': {},
  914. 'div': {},
  915. 'hr': {},
  916. 'ul': {},
  917. 'ol': {
  918. 'start': '{int}',
  919. 'type': 'a|A|i|I|1',
  920. },
  921. 'li': {
  922. 'value': '{int}',
  923. },
  924. 'p': {},
  925. 'pre': {},
  926. 'table': {},
  927. 'thead': {},
  928. 'tbody': {},
  929. 'tfoot': {},
  930. 'tr': {},
  931. 'td': {},
  932. 'th': {},
  933. 'a': {
  934. 'href': '{url}',
  935. 'target': '*',
  936. },
  937. 'abbr': {},
  938. 'b': {},
  939. 'br': {},
  940. 'cite': {},
  941. 'code': {},
  942. 'data': {
  943. 'value': '*',
  944. },
  945. 'dfn': {},
  946. 'em': {},
  947. 'i': {},
  948. 'kbd': {},
  949. 'mark': {},
  950. 'q': {
  951. 'cite': '{url}',
  952. },
  953. 's': {},
  954. 'samp': {},
  955. 'small': {},
  956. 'span': {},
  957. 'strong': {},
  958. 'sub': {},
  959. 'sup': {},
  960. 'time': {
  961. 'datetime': '*',
  962. },
  963. 'u': {},
  964. 'var': {},
  965. 'wbr': {},
  966. 'img': {
  967. 'alt': '*',
  968. 'href': '{url}',
  969. },
  970. 'figure': {},
  971. 'figcaption': {},
  972. 'del': {},
  973. 'ins': {},
  974. 'details': {},
  975. 'summary': {},
  976. };
  977. /**
  978. * Mapping of allowable lowercase global attributes to their permitted
  979. * values. Uses same value pattern syntax as described in `allowableTags`.
  980. * @type {object}
  981. */
  982. allowableGlobalAttributes = {
  983. 'class': '{classlist}',
  984. 'data-*': '*',
  985. 'dir': 'ltr|rtl|auto',
  986. 'id': '*',
  987. 'lang': '*',
  988. 'style': '{style}',
  989. 'title': '*',
  990. 'translate': 'yes|no|{none}',
  991. };
  992. /**
  993. * Mapping of allowable CSS style names to their allowable value patterns.
  994. * Multiple values can be delimited with `|` characters. Limited support
  995. * so far.
  996. *
  997. * Recognized special values:
  998. * - `{color}`: A hex or named color
  999. *
  1000. * @type {object}
  1001. */
  1002. allowableStyleKeys = {
  1003. 'background-color': '{color}',
  1004. 'color': '{color}',
  1005. };
  1006. /**
  1007. * Scrubs all forbidden attributes from an HTML tag. Assumes the tag name
  1008. * itself has already been whitelisted.
  1009. *
  1010. * @param {MDHTMLTag} tag - HTML tag
  1011. */
  1012. scrubTag(tag) {
  1013. for (const name of Object.keys(tag.attributes)) {
  1014. if (!this.isValidAttributeName(tag.tagName, name)) {
  1015. delete tag.attributes[name];
  1016. }
  1017. if (!this.isValidAttributeValue(tag.tagName, name, tag.attributes[name])) {
  1018. delete tag.attributes[name];
  1019. }
  1020. }
  1021. }
  1022. /**
  1023. * Scrubs all forbidden attributes from an HTML modifier.
  1024. *
  1025. * @param {MDTagModifier} modifier
  1026. * @param {string|null} tagName - HTML tag name, if known, otherwise only
  1027. * global attributes will be permitted
  1028. */
  1029. scrubModifier(modifier, tagName) {
  1030. if (modifier.cssClasses.length > 0) {
  1031. const classList = modifier.cssClasses.join(' ');
  1032. if (!this.isValidAttributeValue(tagName, 'class', classList)) {
  1033. modifier.cssClasses = [];
  1034. }
  1035. }
  1036. if (modifier.cssId !== null) {
  1037. if (!this.isValidAttributeValue(tagName, 'id', modifier.cssId)) {
  1038. modifier.cssId = null;
  1039. }
  1040. }
  1041. if (!this.isValidAttributeName(tagName, 'style')) {
  1042. modifier.cssStyles = {};
  1043. } else {
  1044. for (const key of Object.keys(modifier.cssStyles)) {
  1045. const val = modifier.cssStyles[key];
  1046. if (!this.isValidStyleValue(key, val)) {
  1047. delete modifier.cssStyles[key];
  1048. }
  1049. }
  1050. }
  1051. for (const key of Object.keys(modifier.attributes)) {
  1052. const val = modifier.attributes[key];
  1053. if (!this.isValidAttributeValue(tagName, key, val)) {
  1054. delete modifier.attributes[key];
  1055. }
  1056. }
  1057. }
  1058. /**
  1059. * Tests if an HTML tag name is permitted.
  1060. *
  1061. * @param {string} tagName
  1062. * @returns {boolean}
  1063. */
  1064. isValidTagName(tagName) {
  1065. return this.allowableTags[tagName.toLowerCase()] !== undefined;
  1066. }
  1067. /**
  1068. * Tests if an HTML attribute name is permitted.
  1069. *
  1070. * @param {string|null} tagName - HTML tag name or null to only check global
  1071. * attributes
  1072. * @param {string} attributeName - attribute name
  1073. * @returns {boolean}
  1074. */
  1075. isValidAttributeName(tagName, attributeName) {
  1076. const lcAttributeName = attributeName.toLowerCase();
  1077. if (this.allowableGlobalAttributes[lcAttributeName] !== undefined) {
  1078. return true;
  1079. }
  1080. for (const pattern in this.allowableGlobalAttributes) {
  1081. if (pattern.endsWith('*') && lcAttributeName.startsWith(pattern.substring(0, pattern.length - 1))) {
  1082. return true;
  1083. }
  1084. }
  1085. if (tagName === null) return false;
  1086. const lcTagName = tagName.toLowerCase();
  1087. const tagAttributes = this.allowableTags[lcTagName];
  1088. if (tagAttributes) {
  1089. return tagAttributes[lcAttributeName] !== undefined;
  1090. }
  1091. return false;
  1092. }
  1093. /**
  1094. * Tests if an attribute value is allowable.
  1095. *
  1096. * @param {string|null} tagName
  1097. * @param {string} attributeName
  1098. * @param {string} attributeValue
  1099. * @returns {boolean}
  1100. */
  1101. isValidAttributeValue(tagName, attributeName, attributeValue) {
  1102. const lcAttributeName = attributeName.toLowerCase();
  1103. const globalPattern = this.allowableGlobalAttributes[lcAttributeName];
  1104. if (globalPattern !== undefined) {
  1105. return this.#attributeValueMatchesPattern(attributeValue, globalPattern);
  1106. }
  1107. for (const namePattern in this.allowableGlobalAttributes) {
  1108. if (namePattern.endsWith('*') && lcAttributeName.startsWith(namePattern.substring(0, namePattern.length - 1))) {
  1109. return this.#attributeValueMatchesPattern(attributeValue, this.allowableGlobalAttributes[namePattern]);
  1110. }
  1111. }
  1112. if (tagName === null) return false;
  1113. const lcTagName = tagName.toLowerCase();
  1114. const tagAttributes = this.allowableTags[lcTagName];
  1115. if (tagAttributes === undefined) return false;
  1116. const valuePattern = tagAttributes[lcAttributeName];
  1117. if (valuePattern === undefined) return false;
  1118. return this.#attributeValueMatchesPattern(attributeValue, valuePattern);
  1119. }
  1120. static #permissiveURLRegex = /^\S+$/;
  1121. static #integerRegex = /^[\-]?\d+$/;
  1122. static #classListRegex = /^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$/;
  1123. /**
  1124. * @param {string} value
  1125. * @param {string} pattern
  1126. * @returns {boolean}
  1127. */
  1128. #attributeValueMatchesPattern(value, pattern) {
  1129. const options = pattern.split('|');
  1130. for (const option of options) {
  1131. switch (option) {
  1132. case '*':
  1133. return true;
  1134. case '{classlist}':
  1135. if (MDHTMLFilter.#classListRegex.exec(value)) return true;
  1136. break;
  1137. case '{int}':
  1138. if (MDHTMLFilter.#integerRegex.exec(value)) return true;
  1139. break;
  1140. case '{none}':
  1141. if (value === true) return true;
  1142. break;
  1143. case '{style}':
  1144. if (this.isValidStyleDeclaration(value)) return true;
  1145. break;
  1146. case '{url}':
  1147. if (MDHTMLFilter.#permissiveURLRegex.exec(value)) return true;
  1148. break;
  1149. default:
  1150. if (value === option) return true;
  1151. break;
  1152. }
  1153. }
  1154. return false;
  1155. }
  1156. /**
  1157. * Tests if a string of one or more style `key: value;` declarations is
  1158. * fully allowable.
  1159. *
  1160. * @param {string} styles
  1161. * @returns {boolean}
  1162. */
  1163. isValidStyleDeclaration(styles) {
  1164. const settings = styles.split(';');
  1165. for (const setting of settings) {
  1166. if (setting.trim().length == 0) continue;
  1167. const parts = setting.split(':');
  1168. if (parts.length != 2) return false;
  1169. const name = parts[0].trim();
  1170. if (!this.isValidStyleKey(name)) return false;
  1171. const value = parts[1].trim();
  1172. if (!this.isValidStyleValue(name, value)) return false;
  1173. }
  1174. return true;
  1175. }
  1176. /**
  1177. * Tests if a CSS style key is allowable.
  1178. *
  1179. * @param {string} key - CSS key
  1180. * @returns {boolean}
  1181. */
  1182. isValidStyleKey(key) {
  1183. return this.allowableStyleKeys[key] !== undefined;
  1184. }
  1185. /**
  1186. * Tests if a CSS style value is allowable.
  1187. *
  1188. * @param {string} key
  1189. * @param {string} value
  1190. * @returns {boolean}
  1191. */
  1192. isValidStyleValue(key, value) {
  1193. const pattern = this.allowableStyleKeys[key];
  1194. if (pattern === undefined) return false;
  1195. const options = pattern.split('|');
  1196. for (const option of options) {
  1197. switch (option) {
  1198. case '{color}':
  1199. if (this.#isValidCSSColor(value)) return true;
  1200. default:
  1201. if (value === option) return true;
  1202. }
  1203. }
  1204. return false;
  1205. }
  1206. static #styleColorRegex = /^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$/i;
  1207. #isValidCSSColor(value) {
  1208. return MDHTMLFilter.#styleColorRegex.exec(value) !== null;
  1209. }
  1210. }
  1211. /**
  1212. * Represents a single HTML tag. Paired tags are represented separately.
  1213. */
  1214. class MDHTMLTag {
  1215. /**
  1216. * Verbatim string of the original parsed tag. Not modified. Should be
  1217. * considered unsafe for inclusion in the final document. Use `toString()`
  1218. * instead.
  1219. * @type {string}
  1220. */
  1221. original;
  1222. /** @type {string} */
  1223. tagName;
  1224. /** @type {boolean} */
  1225. isCloser;
  1226. /**
  1227. * Map of attribute names to value strings.
  1228. *
  1229. * @type {object}
  1230. */
  1231. attributes;
  1232. /**
  1233. * @param {string} original
  1234. * @param {string} tagName
  1235. * @param {boolean} isCloser
  1236. * @param {object} attributes
  1237. */
  1238. constructor(original, tagName, isCloser, attributes) {
  1239. this.original = original;
  1240. this.tagName = tagName;
  1241. this.isCloser = isCloser;
  1242. this.attributes = attributes;
  1243. }
  1244. toString() {
  1245. if (this.isCloser) {
  1246. return `</${this.tagName}>`;
  1247. }
  1248. var html = '<';
  1249. html += this.tagName;
  1250. for (const key in this.attributes) {
  1251. const safeName = MDUtils.scrubAttributeName(key);
  1252. const value = this.attributes[key];
  1253. if (value === true) {
  1254. html += ` ${safeName}`;
  1255. } else {
  1256. const escapedValue = MDUtils.escapeHTML(`${value}`);
  1257. html += ` ${safeName}="${escapedValue}"`;
  1258. }
  1259. }
  1260. html += '>';
  1261. return html;
  1262. }
  1263. equals(other) {
  1264. if (!(other instanceof MDHTMLTag)) return false;
  1265. if (other.tagName != this.tagName) return false;
  1266. if (other.isCloser != this.isCloser) return false;
  1267. return MDUtils.equal(other.attributes, this.attributes);
  1268. }
  1269. static #htmlTagNameFirstRegex = /[a-z]/i;
  1270. static #htmlTagNameMedialRegex = /[a-z0-9]/i;
  1271. static #htmlAttributeNameFirstRegex = /[a-z]/i;
  1272. static #htmlAttributeNameMedialRegex = /[a-z0-9-]/i;
  1273. static #whitespaceCharRegex = /\s/;
  1274. /**
  1275. * Checks the start of the given string for presence of an HTML tag.
  1276. *
  1277. * @param {string} line
  1278. * @returns {MDHTMLTag|null} HTML tag if found, `null` otherwise
  1279. */
  1280. static fromLineStart(line) {
  1281. let expectOpenBracket = 0;
  1282. let expectCloserOrName = 1;
  1283. let expectName = 2;
  1284. let expectAttributeNameOrEnd = 3;
  1285. let expectEqualsOrAttributeOrEnd = 4;
  1286. let expectAttributeValue = 5;
  1287. let expectCloseBracket = 6;
  1288. var isCloser = false;
  1289. var tagName = '';
  1290. var attributeName = '';
  1291. var attributeValue = '';
  1292. var attributeQuote = null;
  1293. var attributes = {};
  1294. var fullTag = null;
  1295. let endAttribute = function(unescape=false) {
  1296. if (attributeName.length > 0) {
  1297. if (attributeValue.length > 0 || attributeQuote) {
  1298. attributes[attributeName] = unescape ? MDUtils.unescapeHTML(attributeValue) : attributeValue;
  1299. } else {
  1300. attributes[attributeName] = true;
  1301. }
  1302. }
  1303. attributeName = '';
  1304. attributeValue = '';
  1305. attributeQuote = null;
  1306. };
  1307. var expect = expectOpenBracket;
  1308. for (var p = 0; p < line.length && fullTag === null; p++) {
  1309. let ch = line.substring(p, p + 1);
  1310. let isWhitespace = this.#whitespaceCharRegex.exec(ch) !== null;
  1311. switch (expect) {
  1312. case expectOpenBracket:
  1313. if (ch != '<') return null;
  1314. expect = expectCloserOrName;
  1315. break;
  1316. case expectCloserOrName:
  1317. if (ch == '/') {
  1318. isCloser = true;
  1319. } else {
  1320. p--;
  1321. }
  1322. expect = expectName;
  1323. break;
  1324. case expectName:
  1325. if (tagName.length == 0) {
  1326. if (this.#htmlTagNameFirstRegex.exec(ch) === null) return null;
  1327. tagName += ch;
  1328. } else {
  1329. if (this.#htmlTagNameMedialRegex.exec(ch)) {
  1330. tagName += ch;
  1331. } else {
  1332. p--;
  1333. expect = (isCloser) ? expectCloseBracket : expectAttributeNameOrEnd;
  1334. }
  1335. }
  1336. break;
  1337. case expectAttributeNameOrEnd:
  1338. if (attributeName.length == 0) {
  1339. if (isWhitespace) {
  1340. // skip whitespace
  1341. } else if (ch == '/') {
  1342. expect = expectCloseBracket;
  1343. } else if (ch == '>') {
  1344. fullTag = line.substring(0, p + 1);
  1345. break;
  1346. } else if (this.#htmlAttributeNameFirstRegex.exec(ch)) {
  1347. attributeName += ch;
  1348. } else {
  1349. return null;
  1350. }
  1351. } else if (isWhitespace) {
  1352. expect = expectEqualsOrAttributeOrEnd;
  1353. } else if (ch == '/') {
  1354. endAttribute();
  1355. expect = expectCloseBracket;
  1356. } else if (ch == '>') {
  1357. endAttribute();
  1358. fullTag = line.substring(0, p + 1);
  1359. break;
  1360. } else if (ch == '=') {
  1361. expect = expectAttributeValue;
  1362. } else if (this.#htmlAttributeNameMedialRegex.exec(ch)) {
  1363. attributeName += ch;
  1364. } else {
  1365. return null;
  1366. }
  1367. break;
  1368. case expectEqualsOrAttributeOrEnd:
  1369. if (ch == '=') {
  1370. expect = expectAttributeValue;
  1371. } else if (isWhitespace) {
  1372. // skip whitespace
  1373. } else if (ch == '/') {
  1374. expect = expectCloseBracket;
  1375. } else if (ch == '>') {
  1376. fullTag = line.substring(0, p + 1);
  1377. break;
  1378. } else if (this.#htmlAttributeNameFirstRegex.exec(ch)) {
  1379. endAttribute();
  1380. expect = expectAttributeNameOrEnd;
  1381. p--;
  1382. }
  1383. break;
  1384. case expectAttributeValue:
  1385. if (attributeValue.length == 0) {
  1386. if (attributeQuote === null) {
  1387. if (isWhitespace) {
  1388. // skip whitespace
  1389. } else if (ch == '"' || ch == "'") {
  1390. attributeQuote = ch;
  1391. } else {
  1392. attributeQuote = ''; // explicitly unquoted
  1393. p--;
  1394. }
  1395. } else {
  1396. if (ch === attributeQuote) {
  1397. // Empty string
  1398. endAttribute(attributeQuote != '');
  1399. expect = expectAttributeNameOrEnd;
  1400. } else if (attributeQuote === '' && (ch == '/' || ch == '>')) {
  1401. return null;
  1402. } else {
  1403. attributeValue += ch;
  1404. }
  1405. }
  1406. } else {
  1407. if (ch === attributeQuote) {
  1408. endAttribute();
  1409. expect = expectAttributeNameOrEnd;
  1410. } else if (attributeQuote === '' && isWhitespace) {
  1411. endAttribute();
  1412. expect = expectAttributeNameOrEnd;
  1413. } else {
  1414. attributeValue += ch;
  1415. }
  1416. }
  1417. break;
  1418. case expectCloseBracket:
  1419. if (isWhitespace) {
  1420. // ignore whitespace
  1421. } else if (ch == '>') {
  1422. fullTag = line.substring(0, p + 1);
  1423. break;
  1424. }
  1425. break;
  1426. }
  1427. }
  1428. if (fullTag === null) return null;
  1429. endAttribute();
  1430. return new MDHTMLTag(fullTag, tagName, isCloser, attributes);
  1431. }
  1432. }
  1433. /**
  1434. * Represents HTML modifications to a node, such as CSS classes to add or
  1435. * additional attributes. See `MDHTMLFilter.scrubModifier()` to remove disallowed
  1436. * values.
  1437. */
  1438. class MDTagModifier {
  1439. /**
  1440. * Verbatim markdown syntax. Unmodified by changes to other properties.
  1441. * @type {string}
  1442. */
  1443. original;
  1444. /** @type {string[]} */
  1445. cssClasses = [];
  1446. /** @type {string|null} */
  1447. cssId = null;
  1448. /** @type {object} */
  1449. cssStyles = {};
  1450. /** @type {object} */
  1451. attributes = {};
  1452. static #baseClassRegex = /\.([a-z_\-][a-z0-9_\-]*?)/i;
  1453. static #baseIdRegex = /#([a-z_\-][a-z0-9_\-]*?)/i;
  1454. static #baseAttributeRegex = /([a-z0-9]+?)=([^\s\}]+?)/i;
  1455. static #baseRegex = /\{([^}]+?)}/i;
  1456. static #leadingClassRegex = new RegExp('^' + this.#baseRegex.source, 'i');
  1457. static #trailingClassRegex = new RegExp('^(.*?)\\s*' + this.#baseRegex.source + '\\s*$', 'i');
  1458. static #classRegex = new RegExp('^' + this.#baseClassRegex.source + '$', 'i'); // 1=classname
  1459. static #idRegex = new RegExp('^' + this.#baseIdRegex.source + '$', 'i'); // 1=id
  1460. static #attributeRegex = new RegExp('^' + this.#baseAttributeRegex.source + '$', 'i'); // 1=attribute name, 2=attribute value
  1461. /**
  1462. * @param {MDNode} node
  1463. */
  1464. applyTo(node) {
  1465. if (node instanceof MDNode) {
  1466. node.cssClasses = node.cssClasses.concat(this.cssClasses);
  1467. if (this.cssId) node.cssId = this.cssId;
  1468. for (const name in this.attributes) {
  1469. node.attributes[name] = this.attributes[name];
  1470. }
  1471. for (const name in this.cssStyles) {
  1472. node.cssStyles[name] = this.cssStyles[name];
  1473. }
  1474. }
  1475. }
  1476. /**
  1477. * Adds a CSS class. If already present it will not be duplicated.
  1478. *
  1479. * @param {string} cssClass
  1480. * @returns {boolean} whether the class was added
  1481. */
  1482. addClass(cssClass) {
  1483. if (this.cssClasses.contains(cssClass)) return false;
  1484. this.cssClasses.push(cssClass);
  1485. return true;
  1486. }
  1487. /**
  1488. * Removes a CSS class.
  1489. *
  1490. * @param {string} cssClass
  1491. * @returns {boolean} whether the class was present and removed
  1492. */
  1493. removeClass(cssClass) {
  1494. const beforeLength = this.cssClasses.length;
  1495. this.cssClasses = this.cssClasses.filter((val) => val !== cssClass);
  1496. return this.cssClasses.length != beforeLength;
  1497. }
  1498. equals(other) {
  1499. if (!(other instanceof MDTagModifier)) return false;
  1500. if (!MDUtils.equal(other.cssClasses, this.cssClasses)) return false;
  1501. if (other.cssId !== this.cssId) return false;
  1502. if (!MDUtils.equal(other.attributes, this.attributes)) return false;
  1503. return true;
  1504. }
  1505. toString() {
  1506. return this.original;
  1507. }
  1508. static #styleToObject(styleValue) {
  1509. const pairs = styleValue.split(';');
  1510. var styles = {};
  1511. for (const pair of pairs) {
  1512. const keyAndValue = pair.split(':');
  1513. if (keyAndValue.length != 2) continue;
  1514. styles[keyAndValue[0]] = keyAndValue[1];
  1515. }
  1516. return styles;
  1517. }
  1518. static #fromContents(contents) {
  1519. let modifierTokens = contents.split(/\s+/);
  1520. let mod = new MDTagModifier();
  1521. mod.original = `{${contents}}`;
  1522. var groups;
  1523. for (const token of modifierTokens) {
  1524. if (token.trim() == '') continue;
  1525. if (groups = this.#classRegex.exec(token)) {
  1526. mod.cssClasses.push(groups[1]);
  1527. } else if (groups = this.#idRegex.exec(token)) {
  1528. mod.cssId = groups[1];
  1529. } else if (groups = this.#attributeRegex.exec(token)) {
  1530. if (groups[1] == 'style') {
  1531. mod.cssStyles = this.#styleToObject(groups[2]);
  1532. } else {
  1533. mod.attributes[groups[1]] = groups[2];
  1534. }
  1535. } else {
  1536. return null;
  1537. }
  1538. }
  1539. return mod;
  1540. }
  1541. /**
  1542. * Extracts block modifier from end of a line. Always returns a 2-element
  1543. * tuple array:
  1544. * - `0`: the line without the modifier
  1545. * - `1`: an `MDTagModifier` if found or `null` if not
  1546. *
  1547. * @param {string} line
  1548. * @param {MDState} state
  1549. * @returns {Array} tuple with remaining line and `MDTagModifier` or `null`
  1550. */
  1551. static fromLine(line, state) {
  1552. if (state) {
  1553. var found = false;
  1554. for (const reader of state.root.readersByBlockPriority) {
  1555. if (reader instanceof MDModifierReader) {
  1556. found = true;
  1557. break;
  1558. }
  1559. }
  1560. if (!found) return [ line, null ];
  1561. }
  1562. let groups = this.#trailingClassRegex.exec(line);
  1563. if (groups === null) return [ line, null ];
  1564. let bareLine = groups[1];
  1565. let mod = this.#fromContents(groups[2]);
  1566. return [ bareLine, mod ];
  1567. }
  1568. /**
  1569. * Attempts to extract modifier from head of string.
  1570. *
  1571. * @param {string} line
  1572. * @returns {MDTagModifier|null}
  1573. */
  1574. static fromStart(line) {
  1575. let groups = this.#leadingClassRegex.exec(line);
  1576. if (groups === null) return null;
  1577. return this.#fromContents(groups[1]);
  1578. }
  1579. /**
  1580. * Discards any modifiers from a line and returns what remains.
  1581. *
  1582. * @param {string} line
  1583. * @returns {string}
  1584. */
  1585. static strip(line) {
  1586. let groups = this.#trailingClassRegex.exec(line);
  1587. if (groups === null) return line;
  1588. return groups[1];
  1589. }
  1590. }
  1591. // -- Readers ---------------------------------------------------------------
  1592. /**
  1593. * Base class for readers of various markdown syntax. A `Markdown` instance can
  1594. * be created with any combination of subclasses of these to customize the
  1595. * flavor of markdown parsed.
  1596. *
  1597. * @see {@link custom.md} for details on subclassing
  1598. */
  1599. class MDReader {
  1600. /**
  1601. * Called before processing begins. `state.lines` is populated and the
  1602. * line pointer `state.p` will be at `0`.
  1603. *
  1604. * Default implementation does nothing.
  1605. *
  1606. * @param {MDState} state
  1607. */
  1608. preProcess(state) {}
  1609. /**
  1610. * Attempts to read an `MDBlockNode` subclass at the current line pointer
  1611. * `state.p`. Only matches if the block pattern starts at the line pointer,
  1612. * not elsewhere in the `state.lines` array. If a block is found, `state.p`
  1613. * should be incremented to the next line _after_ the block structure and
  1614. * a `MDBlockNode` subclass instance is returned. If no block is found,
  1615. * returns `null`.
  1616. *
  1617. * Default implementation always returns `null`.
  1618. *
  1619. * @param {MDState} state
  1620. * @returns {MDBlockNode|null} found block, or `null` if not found
  1621. */
  1622. readBlock(state) { return null; }
  1623. /**
  1624. * Attempts to read an inline token from the beginning of `line`. Only the
  1625. * start of the given `line` is considered. If a matching token is found, an
  1626. * `MDToken` is returned. Otherwise `null` is returned.
  1627. *
  1628. * Default implementation always returns `null`.
  1629. *
  1630. * @param {MDState} state
  1631. * @param {string} line - string to check for a leading token
  1632. * @returns {MDToken|null} found token, or `null` if not found
  1633. */
  1634. readToken(state, line) { return null; }
  1635. /**
  1636. * Attempts to find a pattern anywhere in `tokens` and perform a _single_
  1637. * in-place substitution with one or more `MDNode` subclass instances.
  1638. * If a substitution is performed, must return `true`, otherwise `false`.
  1639. *
  1640. * Default implementation always returns `false`.
  1641. *
  1642. * @param {MDState} state
  1643. * @param {number} pass - what substitution pass this is, starting with 1
  1644. * @param {Array} tokens - mixed array of `MDToken` and `MDInlineNode` elements
  1645. * @returns {boolean} `true` if a substitution was performed, `false` if not
  1646. */
  1647. substituteTokens(state, pass, tokens) { return false; }
  1648. /**
  1649. * Called after all parsing has completed. An array `blocks` is passed of
  1650. * all the top-level `MDBlockNode` elements in the document which this
  1651. * method can traverse or alter in-place via `.splice` operations if
  1652. * necessary.
  1653. *
  1654. * `MDNode.visitChildren` is useful for recursively looking for certain
  1655. * `MDNode` instances. `MDNode.replaceNodes` is useful for swapping in
  1656. * replacements.
  1657. *
  1658. * Default implementation does nothing.
  1659. *
  1660. * @param {MDState} state
  1661. * @param {MDBlockNode[]} blocks
  1662. */
  1663. postProcess(state, blocks) {}
  1664. /**
  1665. * Can be overridden to influence ordering of this reader with respect to
  1666. * another during the block parsing phase. Return `-1` to be ordered before
  1667. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1668. * Only return non-`0` values to resolve specific conflicts.
  1669. *
  1670. * Default implementation always returns `0` (no preference).
  1671. *
  1672. * @param {MDReader} other
  1673. * @returns {number} a negative, positive, or 0 value to be ordered before,
  1674. * after, or anwhere relative to `other`, respectively
  1675. */
  1676. compareBlockOrdering(other) {
  1677. return 0;
  1678. }
  1679. /**
  1680. * Can be overridden to influence ordering of this reader with respect to
  1681. * another during the tokenizing phase. Return `-1` to be ordered before
  1682. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1683. * Only return non-`0` values to resolve specific conflicts.
  1684. *
  1685. * Default implementation always returns `0` (no preference).
  1686. *
  1687. * @param {MDReader} other
  1688. * @returns {number} a negative, positive, or 0 value to be ordered before,
  1689. * after, or anwhere relative to `other`, respectively
  1690. */
  1691. compareTokenizeOrdering(other) {
  1692. return 0;
  1693. }
  1694. /**
  1695. * Can be overridden to influence ordering of this reader with respect to
  1696. * another during the substitution phase. Return `-1` to be ordered before
  1697. * the given reader, `1` to be ordered after it, or `0` for no preference.
  1698. * Only return non-`0` values to resolve specific conflicts.
  1699. *
  1700. * Readers are sorted within each substitution pass. All pass 1 readers are
  1701. * processed first, then all pass 2 readers, etc. The number of passes this
  1702. * reader participates in is dictated by `substitionPassCount`.
  1703. *
  1704. * Default implementation always returns `0` (no preference).
  1705. *
  1706. * @param {MDReader} other
  1707. * @param {number} pass - substitution pass, with numbering starting at `1`
  1708. * @returns {number} a negative, positive, or 0 value to be ordered before,
  1709. * after, or anwhere relative to `other`, respectively
  1710. */
  1711. compareSubstituteOrdering(other, pass) {
  1712. return 0;
  1713. }
  1714. /**
  1715. * How many substitution passes this reader requires. Substitution allows
  1716. * all pass 1 readers to process first, then all pass 2 readers, etc.
  1717. */
  1718. get substitutionPassCount() { return 1; }
  1719. /**
  1720. * For sorting readers with ordering preferences. The `compare` methods
  1721. * don't have the properties of normal sorting compares so need to sort
  1722. * differently.
  1723. *
  1724. * @param {MDReader[]} arr - array to sort
  1725. * @param {function} compareFn - comparison function, taking two array element
  1726. * arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
  1727. * respectively
  1728. * @param {function} idFn - function for returning a unique hashable id for
  1729. * the array element
  1730. * @returns {MDReader[]} sorted array
  1731. */
  1732. static #kahnTopologicalSort(arr, compareFn, idFn) {
  1733. const graph = {};
  1734. const inDegrees = {};
  1735. const valuesById = {};
  1736. // Build the graph and compute in-degrees
  1737. for (const elem of arr) {
  1738. const id = idFn(elem);
  1739. graph[id] = [];
  1740. inDegrees[id] = 0;
  1741. valuesById[id] = elem;
  1742. }
  1743. for (let i = 0; i < arr.length; i++) {
  1744. const elemA = arr[i];
  1745. const idA = idFn(elemA);
  1746. for (let j = 0; j < arr.length; j++) {
  1747. if (i === j) continue;
  1748. const elemB = arr[j];
  1749. const idB = idFn(elemB);
  1750. const comparisonResult = compareFn(elemA, elemB);
  1751. if (comparisonResult < 0) {
  1752. graph[idA].push(idB);
  1753. inDegrees[idB]++;
  1754. } else if (comparisonResult > 0) {
  1755. graph[idB].push(idA);
  1756. inDegrees[idA]++;
  1757. }
  1758. }
  1759. }
  1760. // Initialize the queue with zero-inDegree nodes
  1761. const queue = [];
  1762. for (const elemId in inDegrees) {
  1763. if (inDegrees[elemId] === 0) {
  1764. queue.push(elemId);
  1765. }
  1766. }
  1767. // Process the queue and build the topological order list
  1768. const sorted = [];
  1769. while (queue.length > 0) {
  1770. const elemId = queue.shift();
  1771. sorted.push(valuesById[elemId]);
  1772. delete valuesById[elemId];
  1773. for (const neighbor of graph[elemId]) {
  1774. inDegrees[neighbor]--;
  1775. if (inDegrees[neighbor] === 0) {
  1776. queue.push(neighbor);
  1777. }
  1778. }
  1779. }
  1780. // Anything left over can go at the end. No ordering dependencies.
  1781. for (const elemId in valuesById) {
  1782. sorted.push(valuesById[elemId]);
  1783. }
  1784. return sorted;
  1785. }
  1786. /**
  1787. * Returns a sorted array of readers by their block priority preferences.
  1788. *
  1789. * @param {MDReader[]} readers
  1790. * @returns {MDReader[]} sorted readers
  1791. */
  1792. static sortReaderForBlocks(readers) {
  1793. const sorted = readers.slice();
  1794. return MDReader.#kahnTopologicalSort(sorted, (a, b) => {
  1795. return a.compareBlockOrdering(b);
  1796. }, (elem) => elem.constructor.name);
  1797. }
  1798. /**
  1799. * Returns a sorted array of readers by their tokenization priority preferences.
  1800. *
  1801. * @param {MDReader[]} readers
  1802. * @returns {MDReader[]} sorted readers
  1803. */
  1804. static sortReadersForTokenizing(readers) {
  1805. const sorted = readers.slice();
  1806. return MDReader.#kahnTopologicalSort(sorted, (a, b) => {
  1807. return a.compareTokenizeOrdering(b);
  1808. }, (elem) => elem.constructor.name);
  1809. }
  1810. /**
  1811. * Returns a sorted array of tuples (arrays) containing the substitution
  1812. * pass number and reader instance, sorted by their substitution priority
  1813. * preferences.
  1814. *
  1815. * For readers with `substitutionPassCount` > `1`, the same reader will
  1816. * appear multiple times in the resulting array, one per pass.
  1817. *
  1818. * @param {MDReader[]} readers
  1819. * @returns {MDReader[]} sorted array of tuples with the pass number and
  1820. * reader instance in each
  1821. */
  1822. static sortReadersForSubstitution(readers) {
  1823. var tuples = [];
  1824. var maxPass = 1;
  1825. for (const reader of readers) {
  1826. const passCount = reader.substitutionPassCount;
  1827. for (var pass = 1; pass <= passCount; pass++) {
  1828. tuples.push([ pass, reader ]);
  1829. }
  1830. maxPass = Math.max(maxPass, pass);
  1831. }
  1832. var result = [];
  1833. for (var pass = 1; pass <= maxPass; pass++) {
  1834. var readersThisPass = tuples.filter((tup) => tup[0] == pass);
  1835. const passResult = MDReader.#kahnTopologicalSort(readersThisPass, (a, b) => {
  1836. const aReader = a[1];
  1837. const bReader = b[1];
  1838. return aReader.compareSubstituteOrdering(bReader, pass);
  1839. }, (elem) => `${elem[1].constructor.name}:${elem[0]}`);
  1840. result = result.concat(passResult);
  1841. }
  1842. return result;
  1843. }
  1844. }
  1845. /**
  1846. * Reads markdown blocks for headings denoted with the underline syntax.
  1847. *
  1848. * Supports `MDTagModifier` suffixes.
  1849. */
  1850. class MDUnderlinedHeadingReader extends MDReader {
  1851. readBlock(state) {
  1852. var p = state.p;
  1853. if (!state.hasLines(2)) return null;
  1854. var modifier;
  1855. let contentLine = state.lines[p++].trim();
  1856. [contentLine, modifier] = MDTagModifier.fromLine(contentLine, state);
  1857. let underLine = state.lines[p++].trim();
  1858. if (contentLine == '') return null;
  1859. if (/^=+$/.exec(underLine)) {
  1860. state.p = p;
  1861. let block = new MDHeadingNode(1, state.inlineMarkdownToNodes(contentLine));
  1862. if (modifier) modifier.applyTo(block);
  1863. return block;
  1864. }
  1865. if (/^\-+$/.exec(underLine)) {
  1866. state.p = p;
  1867. let block = new MDHeadingNode(2, state.inlineMarkdownToNodes(contentLine));
  1868. if (modifier) modifier.applyTo(block);
  1869. return block;
  1870. }
  1871. return null;
  1872. }
  1873. }
  1874. /**
  1875. * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
  1876. * to 6 are supported.
  1877. *
  1878. * Supports `MDTagModifier` suffixes.
  1879. */
  1880. class MDHashHeadingReader extends MDReader {
  1881. static #hashHeadingRegex = /^(#{1,6})\s*([^#].*?)\s*\#*\s*$/; // 1=hashes, 2=content
  1882. readBlock(state) {
  1883. var p = state.p;
  1884. let line = state.lines[p++];
  1885. var modifier;
  1886. [line, modifier] = MDTagModifier.fromLine(line, state);
  1887. var groups = MDHashHeadingReader.#hashHeadingRegex.exec(line);
  1888. if (groups === null) return null;
  1889. state.p = p;
  1890. const level = groups[1].length;
  1891. const content = groups[2];
  1892. let block = new MDHeadingNode(level, state.inlineMarkdownToNodes(content));
  1893. if (modifier) modifier.applyTo(block);
  1894. return block;
  1895. }
  1896. }
  1897. /**
  1898. * Reads subtext blocks. Subtext is smaller, fainter text for things like
  1899. * disclaimers or sources.
  1900. *
  1901. * Supports `MDTagModifier` suffixes.
  1902. */
  1903. class MDSubtextReader extends MDReader {
  1904. static #subtextRegex = /^\-#\s*(.*?)\s*$/; // 1=content
  1905. readBlock(state) {
  1906. var p = state.p;
  1907. let line = state.lines[p++];
  1908. var modifier;
  1909. [line, modifier] = MDTagModifier.fromLine(line, state);
  1910. var groups = MDSubtextReader.#subtextRegex.exec(line);
  1911. if (groups === null) return null;
  1912. state.p = p;
  1913. const content = groups[1];
  1914. let block = new MDSubtextNode(state.inlineMarkdownToNodes(content));
  1915. if (modifier) modifier.applyTo(block);
  1916. return block;
  1917. }
  1918. compareBlockOrdering(other) {
  1919. if (other instanceof MDUnorderedListReader) {
  1920. return -1;
  1921. }
  1922. return 0;
  1923. }
  1924. }
  1925. /**
  1926. * Reads markdown blocks for blockquoted text.
  1927. */
  1928. class MDBlockQuoteReader extends MDReader {
  1929. readBlock(state) {
  1930. var blockquoteLines = [];
  1931. var p = state.p;
  1932. while (p < state.lines.length) {
  1933. let line = state.lines[p++];
  1934. if (line.startsWith(">")) {
  1935. blockquoteLines.push(line);
  1936. } else {
  1937. break;
  1938. }
  1939. }
  1940. if (blockquoteLines.length == 0) return null;
  1941. let contentLines = blockquoteLines.map(function(line) {
  1942. return line.substring(1).replace(/^ {0,3}\t?/, '');
  1943. });
  1944. let substate = state.copy(contentLines);
  1945. let quotedBlocks = substate.readBlocks();
  1946. state.p = p;
  1947. return new MDBlockquoteNode(quotedBlocks);
  1948. }
  1949. }
  1950. /**
  1951. * Internal abstract base class for ordered and unordered lists.
  1952. */
  1953. class _MDListReader extends MDReader {
  1954. #readItemLines(state, firstLineStartPos) {
  1955. var p = state.p;
  1956. var lines = [];
  1957. var seenBlankLine = false;
  1958. var stripTrailingBlankLines = true;
  1959. while (state.hasLines(1, p)) {
  1960. const isFirstLine = p == state.p;
  1961. var line = state.lines[p++];
  1962. if (isFirstLine) {
  1963. line = line.substring(firstLineStartPos);
  1964. }
  1965. if (/^(?:\*|\+|\-|\d+\.)\s+/.exec(line)) {
  1966. // Found next list item
  1967. stripTrailingBlankLines = false; // because this signals extra spacing intended
  1968. break;
  1969. }
  1970. const isBlankLine = line.trim().length == 0;
  1971. const isIndented = /^\s+\S/.exec(line) !== null;
  1972. if (isBlankLine) {
  1973. seenBlankLine = true;
  1974. } else if (!isIndented && seenBlankLine) {
  1975. // Post-list content
  1976. break;
  1977. }
  1978. lines.push(line);
  1979. }
  1980. lines = MDUtils.withoutTrailingBlankLines(lines);
  1981. return MDUtils.stripIndent(lines);
  1982. }
  1983. /**
  1984. * @param {MDState} state
  1985. * @param {number} firstLineStart
  1986. * @return {MDBlockNode}
  1987. */
  1988. _readListItemContent(state, firstLineStartPos) {
  1989. const itemLines = this.#readItemLines(state, firstLineStartPos);
  1990. state.p += Math.max(itemLines.length, 1);
  1991. if (itemLines.length == 1) {
  1992. return state.inlineMarkdownToNode(itemLines[0]);
  1993. }
  1994. const hasBlankLines = itemLines.filter((line) => line.trim().length == 0).length > 0;
  1995. if (hasBlankLines) {
  1996. const substate = state.copy(itemLines);
  1997. const blocks = substate.readBlocks();
  1998. return (blocks.length == 1) ? blocks[0] : new MDNode(blocks);
  1999. }
  2000. // Multiline content with no blank lines. Search for new block
  2001. // boundaries without the benefit of a blank line to demarcate it.
  2002. for (var p = 1; p < itemLines.length; p++) {
  2003. const line = itemLines[p];
  2004. if (/^(?:\*|\-|\+|\d+\.)\s+/.exec(line)) {
  2005. // Nested list found
  2006. const firstBlock = state.inlineMarkdownToNode(itemLines.slice(0, p).join("\n"));
  2007. const substate = state.copy(itemLines.slice(p));
  2008. const blocks = substate.readBlocks();
  2009. return new MDNode([ firstBlock, ...blocks ]);
  2010. }
  2011. }
  2012. // Ok, give up and just do a standard block read
  2013. {
  2014. const substate = state.copy(itemLines);
  2015. const blocks = substate.readBlocks();
  2016. return (blocks.length == 1) ? blocks[0] : new MDNode(blocks);
  2017. }
  2018. }
  2019. readBlock(state) {
  2020. throw new Error(`Abstract readBlock must be overridden in ${this.constructor.name}`);
  2021. }
  2022. }
  2023. /**
  2024. * Block reader for unordered (bulleted) lists.
  2025. */
  2026. class MDUnorderedListReader extends _MDListReader {
  2027. static #unorderedListRegex = /^([\*\+\-]\s+)(.*)$/; // 1=bullet, 2=content
  2028. /**
  2029. * @param {MDState} state
  2030. * @returns {MDListItemNode|null}
  2031. */
  2032. #readUnorderedListItem(state) {
  2033. var p = state.p;
  2034. let line = state.lines[p];
  2035. let groups = MDUnorderedListReader.#unorderedListRegex.exec(line);
  2036. if (groups === null) return null;
  2037. const firstLineOffset = groups[1].length;
  2038. return new MDListItemNode(this._readListItemContent(state, firstLineOffset));
  2039. }
  2040. readBlock(state) {
  2041. var items = [];
  2042. var item = null;
  2043. do {
  2044. item = this.#readUnorderedListItem(state);
  2045. if (item) items.push(item);
  2046. } while (item);
  2047. if (items.length == 0) return null;
  2048. return new MDUnorderedListNode(items);
  2049. }
  2050. }
  2051. /**
  2052. * Block reader for ordered (numbered) lists. The number of the first item is
  2053. * used to begin counting. The subsequent items increase by 1, regardless of
  2054. * their value.
  2055. */
  2056. class MDOrderedListReader extends _MDListReader {
  2057. static #orderedListRegex = /^(\d+)(\.\s+)(.*)$/; // 1=number, 2=dot, 3=content
  2058. /**
  2059. * @param {MDState} state
  2060. * @returns {MDListItemNode|null}
  2061. */
  2062. #readOrderedListItem(state) {
  2063. var p = state.p;
  2064. let line = state.lines[p];
  2065. let groups = MDOrderedListReader.#orderedListRegex.exec(line);
  2066. if (groups === null) return null;
  2067. const ordinal = parseInt(groups[1]);
  2068. const firstLineOffset = groups[1].length + groups[2].length;
  2069. return new MDListItemNode(this._readListItemContent(state, firstLineOffset), ordinal);
  2070. }
  2071. readBlock(state) {
  2072. var items = [];
  2073. var item = null;
  2074. do {
  2075. item = this.#readOrderedListItem(state);
  2076. if (item) items.push(item);
  2077. } while (item);
  2078. if (items.length == 0) return null;
  2079. return new MDOrderedListNode(items, items[0].ordinal);
  2080. }
  2081. }
  2082. /**
  2083. * Block reader for code blocks denoted by pairs of triple tickmarks. If
  2084. * a programming language name, _xyz_, immediately follows the backticks, a
  2085. * `language-xyz` CSS class will be added to the resulting `<code>`
  2086. * element.
  2087. *
  2088. * Supports `MDTagModifier` suffix.
  2089. */
  2090. class MDFencedCodeBlockReader extends MDReader {
  2091. readBlock(state) {
  2092. if (!state.hasLines(2)) return null;
  2093. var p = state.p;
  2094. let openFenceLine = state.lines[p++];
  2095. var modifier;
  2096. [openFenceLine, modifier] = MDTagModifier.fromLine(openFenceLine, state);
  2097. const match = /^```\s*([a-z0-9]*)\s*$/.exec(openFenceLine);
  2098. if (match === null) return null;
  2099. const language = match[1].length > 0 ? match[1] : null;
  2100. var codeLines = [];
  2101. while (state.hasLines(1, p)) {
  2102. let line = state.lines[p++];
  2103. if (line.trim() == '```') {
  2104. state.p = p;
  2105. let block = new MDCodeBlockNode(codeLines.join("\n"), language);
  2106. if (modifier) modifier.applyTo(block);
  2107. return block;
  2108. }
  2109. codeLines.push(line);
  2110. }
  2111. return null;
  2112. }
  2113. }
  2114. /**
  2115. * Block reader for code blocks denoted by indenting text.
  2116. */
  2117. class MDIndentedCodeBlockReader extends MDReader {
  2118. readBlock(state) {
  2119. var p = state.p;
  2120. var codeLines = [];
  2121. while (state.hasLines(1, p)) {
  2122. let line = state.lines[p++];
  2123. if (MDUtils.countIndents(line, true) < 1) {
  2124. p--;
  2125. break;
  2126. }
  2127. codeLines.push(MDUtils.stripIndent(line));
  2128. }
  2129. if (codeLines.length == 0) return null;
  2130. state.p = p;
  2131. return new MDCodeBlockNode(codeLines.join("\n"));
  2132. }
  2133. }
  2134. /**
  2135. * Block reader for horizontal rules. Composed of three or more hypens or
  2136. * asterisks on a line by themselves, with or without intermediate whitespace.
  2137. */
  2138. class MDHorizontalRuleReader extends MDReader {
  2139. static #horizontalRuleRegex = /^\s*(?:\-(?:\s*\-){2,}|\*(?:\s*\*){2,})\s*$/;
  2140. readBlock(state) {
  2141. var p = state.p;
  2142. let line = state.lines[p++];
  2143. var modifier;
  2144. [line, modifier] = MDTagModifier.fromLine(line, state);
  2145. if (MDHorizontalRuleReader.#horizontalRuleRegex.exec(line)) {
  2146. state.p = p;
  2147. let block = new MDHorizontalRuleNode();
  2148. if (modifier) modifier.applyTo(block);
  2149. return block;
  2150. }
  2151. return null;
  2152. }
  2153. compareBlockOrdering(other) {
  2154. if (other instanceof MDUnorderedListReader) {
  2155. return -1;
  2156. }
  2157. return 0;
  2158. }
  2159. }
  2160. /**
  2161. * Block reader for tables.
  2162. *
  2163. * Supports `MDTagModifier` suffix.
  2164. */
  2165. class MDTableReader extends MDReader {
  2166. /**
  2167. * @param {MDState} state
  2168. * @param {boolean} isHeader
  2169. * @return {MDTableRowNode|null}
  2170. */
  2171. #readTableRow(state, isHeader) {
  2172. if (!state.hasLines(1)) return null;
  2173. var p = state.p;
  2174. let line = MDTagModifier.strip(state.lines[p++].trim());
  2175. if (/.*\|.*/.exec(line) === null) return null;
  2176. if (line.startsWith('|')) line = line.substring(1);
  2177. if (line.endsWith('|')) line = line.substring(0, line.length - 1);
  2178. let cellTokens = line.split('|');
  2179. let cells = cellTokens.map(function(token) {
  2180. let content = state.inlineMarkdownToNode(token.trim());
  2181. return isHeader ? new MDTableHeaderCellNode(content) : new MDTableCellNode(content);
  2182. });
  2183. state.p = p;
  2184. return new MDTableRowNode(cells);
  2185. }
  2186. /**
  2187. * @param {string} line
  2188. * @returns {string[]}
  2189. */
  2190. #parseColumnAlignments(line) {
  2191. line = line.trim();
  2192. if (line.startsWith('|')) line = line.substring(1);
  2193. if (line.endsWith('|')) line = line.substring(0, line.length - 1);
  2194. return line.split(/\s*\|\s*/).map(function(token) {
  2195. if (token.startsWith(':')) {
  2196. if (token.endsWith(':')) {
  2197. return 'center';
  2198. }
  2199. return 'left';
  2200. } else if (token.endsWith(':')) {
  2201. return 'right';
  2202. }
  2203. return null;
  2204. });
  2205. }
  2206. static #tableDividerRegex = /^\s*[|]?\s*(?:[:]?-+[:]?)(?:\s*\|\s*[:]?-+[:]?)*\s*[|]?\s*$/;
  2207. readBlock(state) {
  2208. if (!state.hasLines(2)) return null;
  2209. let startP = state.p;
  2210. let firstLine = state.lines[startP];
  2211. var modifier = MDTagModifier.fromLine(firstLine, state)[1];
  2212. let headerRow = this.#readTableRow(state, true);
  2213. if (headerRow === null) {
  2214. state.p = startP;
  2215. return null;
  2216. }
  2217. let dividerLine = state.lines[state.p++];
  2218. let dividerGroups = MDTableReader.#tableDividerRegex.exec(dividerLine);
  2219. if (dividerGroups === null) {
  2220. state.p = startP;
  2221. return null;
  2222. }
  2223. let columnAlignments = this.#parseColumnAlignments(dividerLine);
  2224. var bodyRows = [];
  2225. while (state.hasLines(1)) {
  2226. let row = this.#readTableRow(state, false);
  2227. if (row === null) break;
  2228. bodyRows.push(row);
  2229. }
  2230. let table = new MDTableNode(headerRow, bodyRows);
  2231. table.columnAlignments = columnAlignments;
  2232. if (modifier) modifier.applyTo(table);
  2233. return table;
  2234. }
  2235. }
  2236. /**
  2237. * Block reader for definition lists. Definitions go directly under terms starting
  2238. * with a colon.
  2239. */
  2240. class MDDefinitionListReader extends MDReader {
  2241. readBlock(state) {
  2242. var p = state.p;
  2243. var groups;
  2244. var termCount = 0;
  2245. var definitionCount = 0;
  2246. var defLines = [];
  2247. while (state.hasLines(1, p)) {
  2248. let line = state.lines[p++];
  2249. if (line.trim().length == 0) {
  2250. break;
  2251. }
  2252. if (/^\s+/.exec(line)) {
  2253. if (defLines.length == 0) return null;
  2254. defLines[defLines.length - 1] += "\n" + line;
  2255. } else if (/^:\s+/.exec(line)) {
  2256. defLines.push(line);
  2257. definitionCount++;
  2258. } else {
  2259. defLines.push(line);
  2260. termCount++;
  2261. }
  2262. }
  2263. if (termCount == 0 || definitionCount == 0) return null;
  2264. let blocks = defLines.map(function(line) {
  2265. if (groups = /^:\s+(.*?)$/s.exec(line)) {
  2266. return new MDDefinitionListDefinitionNode(state.inlineMarkdownToNodes(groups[1]));
  2267. } else {
  2268. return new MDDefinitionListTermNode(state.inlineMarkdownToNodes(line));
  2269. }
  2270. });
  2271. state.p = p;
  2272. return new MDDefinitionListNode(blocks);
  2273. }
  2274. }
  2275. /**
  2276. * Block reader for defining footnote contents. Footnotes can be defined anywhere
  2277. * in the document but will always be rendered at the end of a page or end of
  2278. * the document.
  2279. */
  2280. class MDFootnoteReader extends MDReader {
  2281. static #footnoteWithTitleRegex = /^\[\^([^\]]+?)\s+"(.*?)"\]/; // 1=symbol, 2=title
  2282. static #footnoteRegex = /^\[\^([^\]]+?)\]/; // 1=symbol
  2283. /**
  2284. * @param {MDState} state
  2285. * @param {string} symbol
  2286. * @param {MDNode[]} content
  2287. */
  2288. #defineFootnote(state, symbol, footnote) {
  2289. var footnotes = state.root['footnotes'] ?? {};
  2290. footnotes[symbol] = footnote;
  2291. state.root['footnotes'] = footnotes;
  2292. }
  2293. /**
  2294. * @param {MDState} state
  2295. * @param {string} symbol
  2296. * @param {number} unique
  2297. */
  2298. #registerUniqueInstance(state, symbol, unique) {
  2299. var footnoteInstances = state.root['footnoteInstances'];
  2300. var instances = footnoteInstances[symbol] ?? [];
  2301. instances.push(unique);
  2302. footnoteInstances[symbol] = instances;
  2303. }
  2304. #idForFootnoteSymbol(state, symbol) {
  2305. var footnoteIds = state.root['footnoteIds'];
  2306. const existing = footnoteIds[symbol];
  2307. if (existing) return existing;
  2308. var nextFootnoteId = state.root['nextFootnoteId'];
  2309. const id = nextFootnoteId++;
  2310. footnoteIds[symbol] = id;
  2311. state.root['nextFootnoteId'] = nextFootnoteId;
  2312. return id;
  2313. }
  2314. preProcess(state) {
  2315. state.root['footnoteInstances'] = {};
  2316. state.root['footnotes'] = {};
  2317. state.root['footnoteIds'] = {};
  2318. state.root['nextFootnoteId'] = 1;
  2319. }
  2320. /**
  2321. * @param {MDState} state
  2322. */
  2323. readBlock(state) {
  2324. var p = state.p;
  2325. let groups = /^\s*\[\^\s*([^\]]+)\s*\]:\s+(.*)\s*$/.exec(state.lines[p++]);
  2326. if (groups === null) return null;
  2327. let symbol = groups[1];
  2328. let def = groups[2];
  2329. while (state.hasLines(1, p)) {
  2330. let line = state.lines[p++];
  2331. if (/^\s+/.exec(line)) {
  2332. def += "\n" + line;
  2333. } else {
  2334. p--;
  2335. break;
  2336. }
  2337. }
  2338. let content = state.inlineMarkdownToNodes(def);
  2339. this.#defineFootnote(state, symbol, content);
  2340. state.p = p;
  2341. return new MDNode(); // empty
  2342. }
  2343. readToken(state, line) {
  2344. var groups;
  2345. if (groups = MDFootnoteReader.#footnoteWithTitleRegex.exec(line)) {
  2346. return new MDToken(groups[0], MDTokenType.Footnote, groups[1], groups[2]);
  2347. }
  2348. if (groups = MDFootnoteReader.#footnoteRegex.exec(line)) {
  2349. return new MDToken(groups[0], MDTokenType.Footnote, groups[1]);
  2350. }
  2351. return null;
  2352. }
  2353. substituteTokens(state, pass, tokens) {
  2354. var match;
  2355. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Footnote ])) {
  2356. let symbol = match.tokens[0].content;
  2357. tokens.splice(match.index, 1, new MDFootnoteNode(symbol));
  2358. return true;
  2359. }
  2360. return false;
  2361. }
  2362. /**
  2363. * @param {MDState} state
  2364. * @param {MDBlockNode[]} blocks
  2365. */
  2366. postProcess(state, blocks) {
  2367. var nextOccurrenceId = 1;
  2368. for (const block of blocks) {
  2369. const me = this;
  2370. block.visitChildren((function(node) {
  2371. if (!(node instanceof MDFootnoteNode)) return;
  2372. node.footnoteId = me.#idForFootnoteSymbol(state, node.symbol);
  2373. node.occurrenceId = nextOccurrenceId++;
  2374. node.displaySymbol = `${node.footnoteId}`;
  2375. me.#registerUniqueInstance(state, node.symbol, node.occurrenceId);
  2376. }).bind(this));
  2377. }
  2378. if (Object.keys(state.footnotes).length == 0) return;
  2379. blocks.push(new MDFootnoteListNode());
  2380. }
  2381. compareBlockOrdering(other) {
  2382. if (other instanceof MDLinkReader || other instanceof MDImageReader) {
  2383. return -1;
  2384. }
  2385. return 0;
  2386. }
  2387. compareTokenizeOrdering(other) {
  2388. if (other instanceof MDLinkReader || other instanceof MDImageReader) {
  2389. return -1;
  2390. }
  2391. return 0;
  2392. }
  2393. compareSubstituteOrdering(other, pass) {
  2394. if (other instanceof MDLinkReader || other instanceof MDImageReader) {
  2395. return -1;
  2396. }
  2397. return 0;
  2398. }
  2399. }
  2400. /**
  2401. * Block reader for abbreviation definitions. Anywhere the abbreviation appears
  2402. * in plain text will have its definition available when hovering over it.
  2403. * Definitions can appear anywhere in the document. Their content should only
  2404. * contain simple text, not markdown.
  2405. */
  2406. class MDAbbreviationReader extends MDReader {
  2407. /**
  2408. * @param {MDState} state
  2409. * @param {string} abbreviation
  2410. * @param {string} definition
  2411. */
  2412. #defineAbbreviation(state, abbreviation, definition) {
  2413. state.abbreviations[abbreviation] = definition;
  2414. const regex = new RegExp("\\b(" + MDUtils.escapeRegex(abbreviation) + ")\\b", "ig");
  2415. state.abbreviationRegexes[abbreviation] = regex;
  2416. }
  2417. preProcess(state) {
  2418. state.root['abbreviations'] = {};
  2419. state.root['abbreviationRegexes'] = {};
  2420. }
  2421. readBlock(state) {
  2422. var p = state.p;
  2423. let line = state.lines[p++];
  2424. let groups = /^\s*\*\[([^\]]+?)\]:\s+(.*?)\s*$/.exec(line);
  2425. if (groups === null) return null;
  2426. let abbrev = groups[1];
  2427. let def = groups[2];
  2428. this.#defineAbbreviation(state, abbrev, def);
  2429. state.p = p;
  2430. return new MDNode(); // empty
  2431. }
  2432. /**
  2433. * @param {MDState} state
  2434. * @param {MDNode[]} blocks
  2435. */
  2436. postProcess(state, blocks) {
  2437. const abbreviations = state.root['abbreviations'];
  2438. const regexes = state.root['abbreviationRegexes'];
  2439. MDNode.replaceNodes(state, blocks, (original) => {
  2440. if (!(original instanceof MDTextNode)) return null;
  2441. var changed = false;
  2442. var elems = [ original.text ]; // mix of strings and MDNodes
  2443. for (var i = 0; i < elems.length; i++) {
  2444. var text = elems[i];
  2445. if (typeof text !== 'string') continue;
  2446. for (const abbreviation in abbreviations) {
  2447. const groups = regexes[abbreviation].exec(text);
  2448. if (groups === null) continue;
  2449. const definition = abbreviations[abbreviation];
  2450. const prefix = text.substring(0, groups.index);
  2451. const suffix = text.substring(groups.index + groups[0].length);
  2452. elems.splice(i, 1, prefix, new MDAbbreviationNode(groups[0], definition), suffix);
  2453. i = -1; // start over
  2454. changed = true;
  2455. break;
  2456. }
  2457. }
  2458. if (!changed) return null;
  2459. const nodes = elems.map((elem) => typeof elem === 'string' ? new MDTextNode(elem) : elem);
  2460. return new MDNode(nodes);
  2461. });
  2462. }
  2463. }
  2464. /**
  2465. * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
  2466. * whitespace-only) line. This reader is prioritized after every other reader
  2467. * since there is no distinguishing syntax.
  2468. */
  2469. class MDParagraphReader extends MDReader {
  2470. readBlock(state) {
  2471. var paragraphLines = [];
  2472. var p = state.p;
  2473. while (p < state.lines.length) {
  2474. let line = state.lines[p++];
  2475. if (line.trim().length == 0) {
  2476. break;
  2477. }
  2478. paragraphLines.push(line);
  2479. }
  2480. if (state.p == 0 && p >= state.lines.length) {
  2481. // If it's the entire document don't wrap it in a paragraph
  2482. return null;
  2483. }
  2484. if (paragraphLines.length > 0) {
  2485. state.p = p;
  2486. let content = paragraphLines.join("\n");
  2487. return new MDParagraphNode(state.inlineMarkdownToNodes(content));
  2488. }
  2489. return null;
  2490. }
  2491. compareBlockOrdering(other) {
  2492. return 1; // always dead last
  2493. }
  2494. }
  2495. /**
  2496. * Abstract base class for readers that look for one or two delimiting tokens
  2497. * on either side of some content. E.g. `**strong**`.
  2498. */
  2499. class MDSimplePairInlineReader extends MDReader {
  2500. // Passes:
  2501. // 1. Syntaxes with two delimiting tokens, interior tokens of the same
  2502. // kind must be even in number
  2503. // 2. Syntaxes with one delimiting token, interior tokens of the same
  2504. // kind must be even in number
  2505. // 3. Syntaxes with two delimiting tokens, any tokens inside
  2506. // 4. Syntaxes with one delimiting token, any tokens inside
  2507. get substitutionPassCount() { return 4; }
  2508. /**
  2509. * Attempts a substitution of a matched pair of delimiting token types.
  2510. * If successful, the substitution is performed on `tokens` and `true` is
  2511. * returned, otherwise `false` is returned and the array is untouched.
  2512. *
  2513. * If `this.substitutionPassCount` is greater than 1, the first pass
  2514. * will reject matches with the delimiting character inside the content
  2515. * tokens. If the reader uses a single pass or a subsequent pass is performed
  2516. * with multiple pass any contents will be accepted.
  2517. *
  2518. * @param {MDState} state
  2519. * @param {number} pass - pass number, starting with `1`
  2520. * @param {MDToken[]} tokens - tokens/nodes to perform substitution on
  2521. * @param {class} nodeClass - class of the node to return if matched
  2522. * @param {MDTokenType} delimiter - delimiting token
  2523. * @param {number} count - how many times the token is repeated to form the delimiter
  2524. * @param {boolean} plaintext - whether to invoke `nodeClass` with a verbatim
  2525. * content string instead of parsed `MDNode`s
  2526. * @returns {boolean} `true` if substitution was performed, `false` if not
  2527. */
  2528. attemptPair(state, pass, tokens, nodeClass, delimiter, count=1, plaintext=false) {
  2529. // We do four passes. #1: doubles without inner tokens, #2: singles
  2530. // without inner tokens, #3: doubles with paired inner tokens,
  2531. // #4: singles with paired inner tokens
  2532. if (count == 1 && pass != 2 && pass != 4) return;
  2533. if (count > 1 && pass != 1 && pass != 3) return;
  2534. let delimiters = Array(count).fill(delimiter);
  2535. const isFirstOfMultiplePasses = this.substitutionPassCount > 1 && pass == 1;
  2536. let match = MDToken.findPairedTokens(tokens, delimiters, delimiters, function(content) {
  2537. const firstType = content[0] instanceof MDToken ? content[0].type : null;
  2538. const lastType = content[content.length - 1] instanceof MDToken ? content[content.length - 1].type : null;
  2539. if (firstType == MDTokenType.Whitespace) return false;
  2540. if (lastType == MDTokenType.Whitespace) return false;
  2541. for (const token of content) {
  2542. // Don't allow nesting
  2543. if (token.constructor == nodeClass) return false;
  2544. }
  2545. if (isFirstOfMultiplePasses) {
  2546. var innerCount = 0;
  2547. for (let token of content) {
  2548. if (token instanceof MDToken && token.type == delimiter) innerCount++;
  2549. }
  2550. if ((innerCount % 2) != 0) return false;
  2551. }
  2552. return true;
  2553. });
  2554. if (match === null) return false;
  2555. let content = (plaintext)
  2556. ? match.contentTokens.map((token) => token.original).join('')
  2557. : state.tokensToNodes(match.contentTokens);
  2558. tokens.splice(match.startIndex, match.totalLength, new nodeClass(content));
  2559. return true;
  2560. }
  2561. }
  2562. /**
  2563. * Reader for emphasis syntax. Denoted with a single underscore on either side of
  2564. * some text (preferred) or a single asterisk on either side.
  2565. */
  2566. class MDEmphasisReader extends MDSimplePairInlineReader {
  2567. readToken(state, line) {
  2568. if (line.startsWith('_')) return new MDToken('_', MDTokenType.Underscore);
  2569. if (line.startsWith('*')) return new MDToken('*', MDTokenType.Asterisk);
  2570. return null;
  2571. }
  2572. substituteTokens(state, pass, tokens) {
  2573. if (this.attemptPair(state, pass, tokens, MDEmphasisNode, MDTokenType.Underscore)) return true;
  2574. if (this.attemptPair(state, pass, tokens, MDEmphasisNode, MDTokenType.Asterisk)) return true;
  2575. return false;
  2576. }
  2577. compareSubstituteOrdering(other, pass) {
  2578. if (other instanceof MDStrongReader) {
  2579. return 1;
  2580. }
  2581. return 0;
  2582. }
  2583. }
  2584. /**
  2585. * Reader for strong syntax. Denoted with two asterisks on either side of some
  2586. * text (preferred) or two underscores on either side. Note that if
  2587. * `MDUnderlineReader` is in use, it will replace the double-underscore syntax.
  2588. */
  2589. class MDStrongReader extends MDSimplePairInlineReader {
  2590. readToken(state, line) {
  2591. if (line.startsWith('*')) return new MDToken('*', MDTokenType.Asterisk);
  2592. if (line.startsWith('_')) return new MDToken('_', MDTokenType.Underscore);
  2593. return null;
  2594. }
  2595. substituteTokens(state, pass, tokens) {
  2596. if (this.attemptPair(state, pass, tokens, MDStrongNode, MDTokenType.Asterisk, 2)) return true;
  2597. if (this.attemptPair(state, pass, tokens, MDStrongNode, MDTokenType.Underscore, 2)) return true;
  2598. return false;
  2599. }
  2600. compareSubstituteOrdering(other, pass) {
  2601. if (other instanceof MDEmphasisReader) {
  2602. return -1;
  2603. }
  2604. return 0;
  2605. }
  2606. }
  2607. /**
  2608. * Reader for strikethrough syntax. Consists of two tildes on either side of
  2609. * some text (preferred) or single tildes on either side. Note that if
  2610. * `MDSubscriptReader` is in use, it will replace the single-tilde syntax.
  2611. *
  2612. * The number of recognized tildes can be configured.
  2613. */
  2614. class MDStrikethroughReader extends MDSimplePairInlineReader {
  2615. /** @type {boolean} */
  2616. singleTildeEnabled = true;
  2617. /** @type {boolean} */
  2618. doubleTildeEnabled = true;
  2619. readToken(state, line) {
  2620. if (line.startsWith('~')) return new MDToken('~', MDTokenType.Tilde);
  2621. return null;
  2622. }
  2623. substituteTokens(state, pass, tokens) {
  2624. if (this.singleTildeEnabled) {
  2625. if (this.attemptPair(state, pass, tokens, MDStrikethroughNode, MDTokenType.Tilde, 2)) return true;
  2626. }
  2627. if (this.doubleTildeEnabled) {
  2628. if (this.attemptPair(state, pass, tokens, MDStrikethroughNode, MDTokenType.Tilde)) return true;
  2629. }
  2630. return false;
  2631. }
  2632. }
  2633. /**
  2634. * Reader for underline syntax. Consists of two underscores on either side of
  2635. * some text. If used with `MDStrongReader` which also looks for double
  2636. * underscores, this reader will take priority.
  2637. */
  2638. class MDUnderlineReader extends MDSimplePairInlineReader {
  2639. readToken(state, line) {
  2640. if (line.startsWith('_')) return new MDToken('_', MDTokenType.Underscore);
  2641. return null;
  2642. }
  2643. substituteTokens(state, pass, tokens) {
  2644. return this.attemptPair(state, pass, tokens, MDUnderlineNode, MDTokenType.Underscore, 2);
  2645. }
  2646. compareSubstituteOrdering(other, pass) {
  2647. if (other instanceof MDStrongReader) {
  2648. return -1;
  2649. }
  2650. return 0;
  2651. }
  2652. }
  2653. /**
  2654. * Reader for highlight syntax. Consists of pairs of equal signs on either side
  2655. * of some text.
  2656. */
  2657. class MDHighlightReader extends MDSimplePairInlineReader {
  2658. readToken(state, line) {
  2659. if (line.startsWith('=')) return new MDToken('=', MDTokenType.Equal);
  2660. return null;
  2661. }
  2662. substituteTokens(state, pass, tokens) {
  2663. return this.attemptPair(state, pass, tokens, MDHighlightNode, MDTokenType.Equal, 2);
  2664. }
  2665. }
  2666. /**
  2667. * Reader for inline code syntax. Consists of one or two delimiting backticks
  2668. * around text. The contents between the backticks will be rendered verbatim,
  2669. * ignoring any inner markdown syntax. To include a backtick inside, escape it
  2670. * with a backslash.
  2671. */
  2672. class MDCodeSpanReader extends MDSimplePairInlineReader {
  2673. readToken(state, line) {
  2674. if (line.startsWith('`')) return new MDToken('`', MDTokenType.Backtick);
  2675. return null;
  2676. }
  2677. substituteTokens(state, pass, tokens) {
  2678. if (this.attemptPair(state, pass, tokens, MDCodeNode, MDTokenType.Backtick, 2, true)) return true;
  2679. if (this.attemptPair(state, pass, tokens, MDCodeNode, MDTokenType.Backtick, 1, true)) return true;
  2680. }
  2681. }
  2682. /**
  2683. * Reader for subscript syntax. Consists of single tildes on either side of
  2684. * some text. If used with `MDStrikethroughReader`, this reader will take
  2685. * precedence, and strikethrough can only be done with double tildes.
  2686. */
  2687. class MDSubscriptReader extends MDSimplePairInlineReader {
  2688. readToken(state, line) {
  2689. if (line.startsWith('~')) return new MDToken('~', MDTokenType.Tilde);
  2690. return null;
  2691. }
  2692. substituteTokens(state, pass, tokens) {
  2693. return this.attemptPair(state, pass, tokens, MDSubscriptNode, MDTokenType.Tilde);
  2694. }
  2695. compareSubstituteOrdering(other, pass) {
  2696. if (other instanceof MDStrikethroughReader) {
  2697. return -1;
  2698. }
  2699. return 0;
  2700. }
  2701. }
  2702. /**
  2703. * Reader for superscript syntax. Consists of single caret characters on either
  2704. * side of some text.
  2705. */
  2706. class MDSuperscriptReader extends MDSimplePairInlineReader {
  2707. readToken(state, line) {
  2708. if (line.startsWith('^')) return new MDToken('^', MDTokenType.Caret);
  2709. return null;
  2710. }
  2711. substituteTokens(state, pass, tokens) {
  2712. return this.attemptPair(state, pass, tokens, MDSuperscriptNode, MDTokenType.Caret);
  2713. }
  2714. }
  2715. /**
  2716. * Reads a hypertext link. Consists of link text between square brackets
  2717. * followed immediately by a URL in parentheses.
  2718. */
  2719. class MDLinkReader extends MDReader {
  2720. static #simpleEmailRegex = new RegExp("^<(" + MDUtils.baseEmailRegex.source + ")>", "i"); // 1=email
  2721. static #simpleURLRegex = new RegExp("^<(" + MDUtils.baseURLRegex.source + ")>", "i"); // 1=URL
  2722. readToken(state, line) {
  2723. var groups;
  2724. if (groups = MDToken.tokenizeLabel(line)) {
  2725. return new MDToken(groups[0], MDTokenType.Label, groups[1]);
  2726. }
  2727. if (groups = MDToken.tokenizeEmail(line)) {
  2728. return new MDToken(groups[0], MDTokenType.Email, groups[1], groups[2]);
  2729. }
  2730. if (groups = MDToken.tokenizeURL(line)) {
  2731. return new MDToken(groups[0], MDTokenType.URL, groups[1], groups[2]);
  2732. }
  2733. if (groups = MDLinkReader.#simpleEmailRegex.exec(line)) {
  2734. return new MDToken(groups[0], MDTokenType.SimpleEmail, groups[1]);
  2735. }
  2736. if (groups = MDLinkReader.#simpleURLRegex.exec(line)) {
  2737. return new MDToken(groups[0], MDTokenType.SimpleLink, groups[1]);
  2738. }
  2739. return null;
  2740. }
  2741. substituteTokens(state, pass, tokens) {
  2742. var match;
  2743. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.URL ])) {
  2744. let text = match.tokens[0].content;
  2745. let url = match.tokens[match.tokens.length - 1].content;
  2746. let title = match.tokens[match.tokens.length - 1].extra;
  2747. tokens.splice(match.index, match.tokens.length, new MDLinkNode(url, state.inlineMarkdownToNode(text), title));
  2748. return true;
  2749. }
  2750. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.Email ])) {
  2751. let text = match.tokens[0].content;
  2752. let email = match.tokens[match.tokens.length - 1].content;
  2753. let url = `mailto:${email}`;
  2754. let title = match.tokens[match.tokens.length - 1].extra;
  2755. tokens.splice(match.index, match.tokens.length, new MDLinkNode(url, state.inlineMarkdownToNodes(text), title));
  2756. return true;
  2757. }
  2758. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.SimpleEmail ])) {
  2759. const token = match.tokens[0];
  2760. const link = `mailto:${token.content}`;
  2761. const node = new MDLinkNode(link, new MDObfuscatedTextNode(token.content));
  2762. tokens.splice(match.index, 1, node);
  2763. return true;
  2764. }
  2765. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.SimpleLink ])) {
  2766. const token = match.tokens[0];
  2767. const link = token.content;
  2768. const node = new MDLinkNode(link, new MDTextNode(link));
  2769. tokens.splice(match.index, 1, node);
  2770. return true;
  2771. }
  2772. return false;
  2773. }
  2774. }
  2775. /**
  2776. * Reader for referential URL definitions. Consists of link text between square
  2777. * brackets followed immediately by a reference symbol also in square brackets.
  2778. * The URL can be defined elsewhere on a line by itself with the symbol in square
  2779. * brackets, colon, and the URL (and optional title in quotes).
  2780. */
  2781. class MDReferencedLinkReader extends MDLinkReader {
  2782. /**
  2783. * @param {MDState} state
  2784. */
  2785. readBlock(state) {
  2786. var p = state.p;
  2787. let line = state.lines[p++];
  2788. var symbol;
  2789. var url;
  2790. var title = null;
  2791. let groups = /^\s*\[(.+?)]:\s*(\S+)\s+"(.*?)"\s*$/.exec(line);
  2792. if (groups) {
  2793. symbol = groups[1];
  2794. url = groups[2];
  2795. title = groups[3];
  2796. } else {
  2797. groups = /^\s*\[(.+?)]:\s*(\S+)\s*$/.exec(line);
  2798. if (groups) {
  2799. symbol = groups[1];
  2800. url = groups[2];
  2801. } else {
  2802. return null;
  2803. }
  2804. }
  2805. state.defineURL(symbol, url, title);
  2806. state.p = p;
  2807. return new MDNode([]); // empty
  2808. }
  2809. substituteTokens(state, pass, tokens) {
  2810. var match;
  2811. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.Label ])) {
  2812. let text = match.tokens[0].content;
  2813. let ref = match.tokens[match.tokens.length - 1].content;
  2814. tokens.splice(match.index, match.tokens.length, new MDReferencedLinkNode(ref, state.inlineMarkdownToNodes(text)));
  2815. return true;
  2816. }
  2817. return false;
  2818. }
  2819. }
  2820. /**
  2821. * Reader for images. Consists of an exclamation, alt text in square brackets,
  2822. * and image URL in parentheses.
  2823. */
  2824. class MDImageReader extends MDLinkReader {
  2825. readToken(state, line) {
  2826. const s = super.readToken(state, line);
  2827. if (s) return s;
  2828. if (line.startsWith('!')) return new MDToken('!', MDTokenType.Bang);
  2829. return null;
  2830. }
  2831. substituteTokens(state, pass, tokens) {
  2832. var match;
  2833. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Bang, MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.URL ])) {
  2834. let alt = match.tokens[1].content;
  2835. let url = match.tokens[match.tokens.length - 1].content;
  2836. let title = match.tokens[match.tokens.length - 1].extra;
  2837. const node = new MDImageNode(url, alt);
  2838. if (title !== null) {
  2839. node.attributes['title'] = title;
  2840. }
  2841. tokens.splice(match.index, match.tokens.length, node);
  2842. return true;
  2843. }
  2844. return false;
  2845. }
  2846. compareSubstituteOrdering(other, pass) {
  2847. if (other.constructor === MDLinkReader || other.constructor === MDReferencedLinkReader) {
  2848. return -1;
  2849. }
  2850. return 0;
  2851. }
  2852. }
  2853. /**
  2854. * Reader for images with referential URL definitions. Consists of an
  2855. * exclamation, alt text in square brackets, and link symbol in square brackets.
  2856. * URL is defined the same as for `MDReferencedLinkReader`.
  2857. */
  2858. class MDReferencedImageReader extends MDReferencedLinkReader {
  2859. readToken(state, line) {
  2860. const s = super.readToken(state, line);
  2861. if (s) return s;
  2862. if (line.startsWith('!')) return new MDToken('!', MDTokenType.Bang);
  2863. return null;
  2864. }
  2865. substituteTokens(state, pass, tokens) {
  2866. var match;
  2867. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.Bang, MDTokenType.Label, MDTokenType.META_OptionalWhitespace, MDTokenType.Label ])) {
  2868. let alt = match.tokens[1].content;
  2869. let ref = match.tokens[match.tokens.length - 1].content;
  2870. tokens.splice(match.index, match.tokens.length, new MDReferencedImageNode(ref, alt));
  2871. return true;
  2872. }
  2873. return false;
  2874. }
  2875. compareSubstituteOrdering(other, pass) {
  2876. if (other.constructor === MDLinkReader || other.constructor === MDReferencedLinkReader) {
  2877. return -1;
  2878. }
  2879. return 0;
  2880. }
  2881. }
  2882. /**
  2883. * Reads a verbatim HTML tag, and if it passes validation by `MDState.tagFilter`,
  2884. * will be rendered in the final HTML document. Disallowed tags will be rendered
  2885. * as plain text in the resulting document.
  2886. */
  2887. class MDHTMLTagReader extends MDReader {
  2888. readToken(state, line) {
  2889. const tag = MDHTMLTag.fromLineStart(line, state);
  2890. if (tag === null) return null;
  2891. if (!state.root.tagFilter.isValidTagName(tag.tagName)) return null;
  2892. state.root.tagFilter.scrubTag(tag);
  2893. return new MDToken(tag.original, MDTokenType.HTMLTag, tag);
  2894. }
  2895. substituteTokens(state, pass, tokens) {
  2896. var match;
  2897. if (match = MDToken.findFirstTokens(tokens, [ MDTokenType.HTMLTag ])) {
  2898. const tag = match.tokens[0].tag
  2899. tokens.splice(match.index, match.tokens.length, new MDHTMLTagNode(tag))
  2900. return true;
  2901. }
  2902. return false;
  2903. }
  2904. }
  2905. /**
  2906. * Reads tag modifiers. Consists of curly braces with one or more CSS classes,
  2907. * IDs, or custom attributes separated by spaces to apply to the preceding
  2908. * node. Validation is performed on modifiers and only acceptable values are
  2909. * applied.
  2910. */
  2911. class MDModifierReader extends MDReader {
  2912. readToken(state, line) {
  2913. var modifier = MDTagModifier.fromStart(line);
  2914. if (modifier) return new MDToken(modifier.original, MDTokenType.Modifier, modifier);
  2915. return null;
  2916. }
  2917. substituteTokens(state, pass, tokens) {
  2918. // Modifiers are applied elsewhere, and if they're not it's fine if they're
  2919. // rendered as the original syntax.
  2920. return false;
  2921. }
  2922. }
  2923. // -- Document nodes --------------------------------------------------------
  2924. /**
  2925. * Base class for nodes in the assembled document tree.
  2926. */
  2927. class MDNode {
  2928. /**
  2929. * Array of CSS classes to add to the node when rendered as HTML.
  2930. * @type {string[]}
  2931. */
  2932. cssClasses = [];
  2933. /** @type {string|null} */
  2934. cssId = null;
  2935. /**
  2936. * Mapping of CSS attributes to values.
  2937. * @type {object}
  2938. */
  2939. cssStyles = {};
  2940. /**
  2941. * Mapping of arbitrary attributes and values to add to this node's top-level
  2942. * tag when rendered as HTML. For `class`, `id`, and `style` attributes, use
  2943. * `cssClasses`, `cssId`, and `cssStyles` instead.
  2944. * @type {object}
  2945. */
  2946. attributes = {};
  2947. /**
  2948. * All child nodes in this node.
  2949. * @type {MDNode[]}
  2950. */
  2951. children;
  2952. /**
  2953. * @param {MDNode[]} children
  2954. */
  2955. constructor(children=[]) {
  2956. if (children instanceof Array) {
  2957. for (const elem of children) {
  2958. if (!(elem instanceof MDNode)) {
  2959. throw new Error(`${this.constructor.name} expects children of type MDNode[] or MDNode, got array with ${MDUtils.typename(elem)} element`);
  2960. }
  2961. }
  2962. this.children = children;
  2963. } else if (children instanceof MDNode) {
  2964. this.children = [ children ];
  2965. } else {
  2966. throw new Error(`${this.constructor.name} expects children of type MDNode[] or MDNode, got ${MDUtils.typename(children)}`);
  2967. }
  2968. }
  2969. /**
  2970. * Renders this node and any children as an HTML string. If the node has no
  2971. * content an empty string should be returned.
  2972. *
  2973. * @param {MDState} state
  2974. * @returns {string} HTML string
  2975. */
  2976. toHTML(state) {
  2977. return MDNode.toHTML(this.children, state);
  2978. }
  2979. /**
  2980. * Renders this node and any children as a plain text string. The conversion
  2981. * should only render ordinary text, not attempt markdown-like formatting
  2982. * (e.g. list items should not be prefixed with asterisks, only have their
  2983. * content text returned). If the node has no renderable content an empty
  2984. * string should be returned.
  2985. *
  2986. * @param {MDState} state
  2987. * @returns {string} plaintext string
  2988. */
  2989. toPlaintext(state) {
  2990. return MDNode.toPlaintext(this.children, state);
  2991. }
  2992. /**
  2993. * Protected helper method that renders an HTML fragment of the attributes
  2994. * to apply to the root HTML tag representation of this node.
  2995. *
  2996. * Example result with a couple `cssClasses`, a `cssId`, and a custom
  2997. * `attributes` key-value pair:
  2998. *
  2999. * ```
  3000. * class="foo bar" id="baz" lang="en"
  3001. * ```
  3002. *
  3003. * The value includes a leading space if it's non-empty so that it can be
  3004. * concatenated directly after the tag name and before the closing `>`.
  3005. *
  3006. * @returns {string} HTML fragment
  3007. */
  3008. _htmlAttributes() {
  3009. var html = '';
  3010. if (this.cssClasses.length > 0) {
  3011. html += ` class="${this.cssClasses.join(' ')}"`;
  3012. }
  3013. if (this.cssId !== null && this.cssId.length > 0) {
  3014. html += ` id="${this.cssId}"`;
  3015. }
  3016. var styles = [];
  3017. for (const key in this.cssStyles) {
  3018. styles.push(`${key}: ${this.cssStyles[key]};`)
  3019. }
  3020. if (styles.length > 0) {
  3021. html += ` style="${MDUtils.escapeHTML(styles.join(' '))}"`;
  3022. }
  3023. for (const key in this.attributes) {
  3024. if (key == 'class' || key == 'id' || key == 'style') continue;
  3025. const value = `${this.attributes[key]}`;
  3026. const cleanKey = MDUtils.scrubAttributeName(key);
  3027. if (cleanKey.length == 0) continue;
  3028. const cleanValue = MDUtils.escapeHTML(value);
  3029. html += ` ${cleanKey}="${cleanValue}"`;
  3030. }
  3031. return html;
  3032. }
  3033. /**
  3034. * Protected helper that renders and concatenates the HTML of all children
  3035. * of this node. Mostly for use by subclasses in their `toHTML`
  3036. * implementations.
  3037. *
  3038. * @param {MDState} state
  3039. * @returns {string} concatenated HTML
  3040. */
  3041. _childHTML(state) {
  3042. return this.children.map((child) => child.toHTML(state)).join('');
  3043. }
  3044. /**
  3045. * Protected helper for rendering nodes represented by simple paired HTML
  3046. * tags. Custom CSS classes and attributes will be included in the result,
  3047. * and child content will be rendered between the tags.
  3048. *
  3049. * @param {MDState} state
  3050. * @param {string} tagName - HTML tag name, without angle braces
  3051. * @returns {string} HTML string
  3052. */
  3053. _simplePairedTagHTML(state, tagName) {
  3054. const openTagSuffix = this.children[0] instanceof MDBlockNode ? '\n' : ''
  3055. const closeTagPrefix = this.children[this.children.length - 1] instanceof MDBlockNode ? '\n' : '';
  3056. const closeTagSuffix = this instanceof MDBlockNode ? '\n' : '';
  3057. return `<${tagName}${this._htmlAttributes()}>${openTagSuffix}${this._childHTML(state)}${closeTagPrefix}</${tagName}>${closeTagSuffix}`;
  3058. }
  3059. /**
  3060. * Calls the given callback function with every child node, recursively.
  3061. * Nodes are visited depth-first.
  3062. *
  3063. * @param {function} fn - callback that accepts one `MDNode` argument
  3064. */
  3065. visitChildren(fn) {
  3066. if (this.children === undefined || !Array.isArray(this.children)) {
  3067. return;
  3068. }
  3069. for (const child of this.children) {
  3070. fn(child);
  3071. child.visitChildren(fn);
  3072. }
  3073. }
  3074. /**
  3075. * Helper for rendering and concatenating HTML from an array of `MDNode`s.
  3076. *
  3077. * @param {MDNode[]} nodes
  3078. * @param {MDState} state
  3079. * @returns {string} HTML string
  3080. */
  3081. static toHTML(nodes, state) {
  3082. return nodes.map((node) => node.toHTML(state) + (node instanceof MDBlockNode ? '\n' : '')).join('');
  3083. }
  3084. /**
  3085. * Helper for rendering and concatenating plaintext from an array of `MDNode`s.
  3086. *
  3087. * @param {MDNode[]} nodes
  3088. * @param {MDState} state
  3089. * @returns {string} plaintext
  3090. */
  3091. static toPlaintext(nodes, state) {
  3092. return nodes.map((node) => node.toPlaintext(state)).join('');
  3093. }
  3094. /**
  3095. * Recursively searches and replaces nodes in a tree. The given `replacer`
  3096. * is passed every node in the tree. If `replacer` returns a new `MDNode`
  3097. * the original will be replaced with it. If the function returns `null` no
  3098. * change will be made to that node. Traversal is depth-first.
  3099. *
  3100. * @param {MDState} state
  3101. * @param {MDNode[]} nodes
  3102. * @param {function} replacer - takes a node as an argument, returns either
  3103. * a new node or `null` to leave it unchanged
  3104. */
  3105. static replaceNodes(state, nodes, replacer) {
  3106. for (var i = 0; i < nodes.length; i++) {
  3107. var originalNode = nodes[i];
  3108. const replacement = replacer(originalNode);
  3109. if (replacement instanceof MDNode) {
  3110. nodes.splice(i, 1, replacement);
  3111. } else {
  3112. this.replaceNodes(state, originalNode.children, replacer);
  3113. }
  3114. }
  3115. }
  3116. }
  3117. /**
  3118. * Marker subclass that indicates a node represents block syntax.
  3119. */
  3120. class MDBlockNode extends MDNode {}
  3121. /**
  3122. * Paragraph block.
  3123. */
  3124. class MDParagraphNode extends MDBlockNode {
  3125. toHTML(state) {
  3126. return this._simplePairedTagHTML(state, 'p');
  3127. }
  3128. }
  3129. /**
  3130. * A heading block with a level from 1 to 6.
  3131. */
  3132. class MDHeadingNode extends MDBlockNode {
  3133. /** @type {number} */
  3134. level;
  3135. constructor(level, children) {
  3136. super(children);
  3137. if (typeof level !== 'number' || (level < 1 || level > 6)) {
  3138. throw new Error(`${this.constructor.name} requires heading level 1 to 6`);
  3139. }
  3140. this.level = level;
  3141. }
  3142. toHTML(state) {
  3143. return this._simplePairedTagHTML(state, `h${this.level}`);
  3144. }
  3145. }
  3146. /**
  3147. * A sub-text block with smaller, less prominent text.
  3148. */
  3149. class MDSubtextNode extends MDBlockNode {
  3150. toHTML(state) {
  3151. if (this.cssClasses.indexOf('subtext') < 0) {
  3152. this.cssClasses.push('subtext');
  3153. }
  3154. return this._simplePairedTagHTML(state, 'div');
  3155. }
  3156. }
  3157. /**
  3158. * Node for a horizontal dividing line.
  3159. */
  3160. class MDHorizontalRuleNode extends MDBlockNode {
  3161. toHTML(state) {
  3162. return `<hr${this._htmlAttributes()}>`;
  3163. }
  3164. }
  3165. /**
  3166. * A block quote, usually rendered indented from other text.
  3167. */
  3168. class MDBlockquoteNode extends MDBlockNode {
  3169. toHTML(state) {
  3170. return this._simplePairedTagHTML(state, 'blockquote');
  3171. }
  3172. }
  3173. /**
  3174. * A bulleted list. Contains `MDListItemNode` children.
  3175. */
  3176. class MDUnorderedListNode extends MDBlockNode {
  3177. /** @type {MDListItemNode[]} children */
  3178. /**
  3179. * @param {MDListItemNode[]} children
  3180. */
  3181. constructor(children) {
  3182. super(children);
  3183. }
  3184. toHTML(state) {
  3185. return this._simplePairedTagHTML(state, 'ul');
  3186. }
  3187. }
  3188. /**
  3189. * A numbered list. Contains `MDListItemNode` children.
  3190. */
  3191. class MDOrderedListNode extends MDBlockNode {
  3192. /** @type {MDListItemNode[]} children */
  3193. /** @type {number|null} */
  3194. startOrdinal;
  3195. /**
  3196. * @param {MDListItemNode[]} children
  3197. * @param {number|null} startOrdinal
  3198. */
  3199. constructor(children, startOrdinal=null) {
  3200. super(children);
  3201. this.startOrdinal = startOrdinal;
  3202. }
  3203. toHTML(state) {
  3204. if (this.startOrdinal !== null && this.startOrdinal != 1) this.attributes['start'] = this.startOrdinal;
  3205. return this._simplePairedTagHTML(state, 'ol');
  3206. }
  3207. }
  3208. /**
  3209. * An item in a bulleted or numbered list.
  3210. */
  3211. class MDListItemNode extends MDBlockNode {
  3212. /** @type {number|null} */
  3213. ordinal;
  3214. /**
  3215. * @param {MDNode|MDNode[]} children
  3216. * @param {number|null} ordinal
  3217. */
  3218. constructor(children, ordinal=null) {
  3219. super(children);
  3220. this.ordinal = ordinal;
  3221. }
  3222. toHTML(state) {
  3223. return this._simplePairedTagHTML(state, 'li');
  3224. }
  3225. }
  3226. /**
  3227. * A block of preformatted computer code. Inner markdown is ignored.
  3228. */
  3229. class MDCodeBlockNode extends MDBlockNode {
  3230. /** @type {string} */
  3231. text;
  3232. /**
  3233. * The programming language of the content.
  3234. * @type {string|null}
  3235. */
  3236. language;
  3237. /**
  3238. * @param {string} text
  3239. * @param {string|null} language
  3240. */
  3241. constructor(text, language=null) {
  3242. super([]);
  3243. this.text = text;
  3244. this.language = language;
  3245. }
  3246. toHTML(state) {
  3247. const languageModifier = (this.language !== null) ? ` class="language-${this.language}"` : '';
  3248. return `<pre${this._htmlAttributes()}><code${languageModifier}>` +
  3249. `${MDUtils.escapeHTML(this.text)}</code></pre>\n`;
  3250. }
  3251. }
  3252. /**
  3253. * A table node with a single header row and any number of body rows.
  3254. *
  3255. * If modifying the rows, use the `headerRow` and `bodyRows` accessors,
  3256. * otherwise `children` may get out of sync.
  3257. */
  3258. class MDTableNode extends MDBlockNode {
  3259. /** @param {MDTableRowNode[]} children */
  3260. /** @type {MDTableRowNode} */
  3261. get headerRow() { return this.#headerRow; }
  3262. set headerRow(newValue) {
  3263. this.#headerRow = newValue;
  3264. this.#recalculateChildren();
  3265. }
  3266. #headerRow;
  3267. /** @type {MDTableRowNode[]} */
  3268. get bodyRows() { return this.#bodyRows; }
  3269. set bodyRows(newValue) {
  3270. this.#bodyRows = newValue;
  3271. this.#recalculateChildren();
  3272. }
  3273. #bodyRows;
  3274. /**
  3275. * How to align each column. Columns beyond the length of the array or with
  3276. * corresponding `null` elements will have no alignment set. Values should
  3277. * be valid CSS `text-align` values.
  3278. *
  3279. * @type {string[]}
  3280. */
  3281. columnAlignments = [];
  3282. /**
  3283. * @param {MDTableRowNode} headerRow
  3284. * @param {MDTableRowNode[]} bodyRows
  3285. */
  3286. constructor(headerRow, bodyRows) {
  3287. super([ headerRow, ...bodyRows ]);
  3288. this.#headerRow = headerRow;
  3289. this.#bodyRows = bodyRows;
  3290. }
  3291. #recalculateChildren() {
  3292. this.children = [ this.#headerRow, ...this.#bodyRows ];
  3293. }
  3294. #applyAlignments() {
  3295. this.children.forEach((child) => this.#applyAlignmentsToRow(child));
  3296. }
  3297. /**
  3298. * @param {MDTableRowNode} row
  3299. */
  3300. #applyAlignmentsToRow(row) {
  3301. for (const [columnIndex, cell] of row.children.entries()) {
  3302. const alignment = columnIndex < this.columnAlignments.length ? this.columnAlignments[columnIndex] : null;
  3303. this.#applyAlignmentToCell(cell, alignment);
  3304. }
  3305. }
  3306. /**
  3307. * @param {MDTableCellNode} cell
  3308. * @param {string|null} alignment
  3309. */
  3310. #applyAlignmentToCell(cell, alignment) {
  3311. if (alignment) {
  3312. cell.cssStyles['text-align'] = alignment;
  3313. } else {
  3314. delete cell.cssStyles['text-align'];
  3315. }
  3316. }
  3317. toHTML(state) {
  3318. this.#applyAlignments();
  3319. var html = '';
  3320. html += `<table${this._htmlAttributes()}>\n`;
  3321. html += '<thead>\n';
  3322. html += this.headerRow.toHTML(state) + '\n';
  3323. html += '</thead>\n';
  3324. html += '<tbody>\n';
  3325. html += MDNode.toHTML(this.bodyRows, state) + '\n';
  3326. html += '</tbody>\n';
  3327. html += '</table>\n';
  3328. return html;
  3329. }
  3330. }
  3331. /**
  3332. * Node for one row (header or body) in a table.
  3333. */
  3334. class MDTableRowNode extends MDBlockNode {
  3335. /** @type {MDTableCellNode[]} children */
  3336. toHTML(state) {
  3337. return this._simplePairedTagHTML(state, 'tr');
  3338. }
  3339. }
  3340. /**
  3341. * Node for one cell in a table row.
  3342. */
  3343. class MDTableCellNode extends MDBlockNode {
  3344. toHTML(state) {
  3345. return this._simplePairedTagHTML(state, 'td');
  3346. }
  3347. }
  3348. /**
  3349. * Node for a header cell in a header table row.
  3350. */
  3351. class MDTableHeaderCellNode extends MDBlockNode {
  3352. toHTML(state) {
  3353. return this._simplePairedTagHTML(state, 'th');
  3354. }
  3355. }
  3356. /**
  3357. * Definition list with `MDDefinitionListTermNode` and
  3358. * `MDDefinitionListDefinitionNode` children.
  3359. */
  3360. class MDDefinitionListNode extends MDBlockNode {
  3361. toHTML(state) {
  3362. return this._simplePairedTagHTML(state, 'dl');
  3363. }
  3364. }
  3365. /**
  3366. * A word or term in a definition list.
  3367. */
  3368. class MDDefinitionListTermNode extends MDBlockNode {
  3369. toHTML(state) {
  3370. return this._simplePairedTagHTML(state, 'dt');
  3371. }
  3372. }
  3373. /**
  3374. * The definition of a word or term in a definition list. Should follow a
  3375. * definition term, or another definition to serve as an alternate.
  3376. */
  3377. class MDDefinitionListDefinitionNode extends MDBlockNode {
  3378. toHTML(state) {
  3379. return this._simplePairedTagHTML(state, 'dd');
  3380. }
  3381. }
  3382. /**
  3383. * Block at the bottom of a document listing all the footnotes with their
  3384. * content.
  3385. */
  3386. class MDFootnoteListNode extends MDBlockNode {
  3387. /**
  3388. * @param {MDState} state
  3389. * @param {string} symbol
  3390. * @return {number}
  3391. */
  3392. #footnoteId(state, symbol) {
  3393. const lookup = state.root['footnoteIds'];
  3394. if (!lookup) return null;
  3395. return lookup[symbol] ?? null;
  3396. }
  3397. toHTML(state) {
  3398. const footnotes = state.footnotes;
  3399. var symbolOrder = Object.keys(footnotes);
  3400. if (Object.keys(footnotes).length == 0) return '';
  3401. const footnoteUniques = state.root.footnoteInstances;
  3402. var html = '';
  3403. html += '<div class="footnotes"><hr/>';
  3404. html += '<ol>';
  3405. for (const symbol of symbolOrder) {
  3406. /** @type {MDNode[]} */
  3407. let content = footnotes[symbol];
  3408. if (!content) continue;
  3409. let footnoteId = this.#footnoteId(state, symbol);
  3410. const contentHTML = MDNode.toHTML(content, state);
  3411. html += `<li value="${symbol}" id="${state.root.elementIdPrefix}footnote_${footnoteId}">${contentHTML}`;
  3412. const uniques = footnoteUniques[symbol];
  3413. if (uniques) {
  3414. for (const unique of uniques) {
  3415. html += ` <a href="#${state.root.elementIdPrefix}footnoteref_${unique}" class="footnote-backref">↩︎</a>`;
  3416. }
  3417. }
  3418. html += `</li>\n`;
  3419. }
  3420. html += '</ol>';
  3421. html += '</div>';
  3422. return html;
  3423. }
  3424. toPlaintext(state) {
  3425. const footnotes = state.footnotes;
  3426. var symbolOrder = Object.keys(footnotes);
  3427. if (Object.keys(footnotes).length == 0) return '';
  3428. var text = '';
  3429. for (const symbol of symbolOrder) {
  3430. let content = footnotes[symbol];
  3431. if (!content) continue;
  3432. text += `${symbol}. ${content.toPlaintext(state)}\n`;
  3433. }
  3434. return text.trim();
  3435. }
  3436. }
  3437. /**
  3438. * Marker subclass that indicates a node represents inline syntax.
  3439. */
  3440. class MDInlineNode extends MDNode {}
  3441. /**
  3442. * Contains plain text. Special HTML characters are escaped when rendered.
  3443. */
  3444. class MDTextNode extends MDInlineNode {
  3445. text;
  3446. constructor(text) {
  3447. super([]);
  3448. this.text = text;
  3449. }
  3450. toHTML(state) {
  3451. return MDUtils.escapeHTML(this.text);
  3452. }
  3453. toPlaintext(state) {
  3454. return this.text;
  3455. }
  3456. }
  3457. /**
  3458. * Contains plain text which is rendered with HTML entities when rendered to
  3459. * be marginally more difficult for web scapers to decipher. Used for
  3460. * semi-sensitive info like email addresses.
  3461. */
  3462. class MDObfuscatedTextNode extends MDTextNode {
  3463. toHTML(state) {
  3464. return MDUtils.escapeObfuscated(this.text);
  3465. }
  3466. }
  3467. /**
  3468. * Emphasized (italicized) content.
  3469. */
  3470. class MDEmphasisNode extends MDInlineNode {
  3471. toHTML(state) {
  3472. return this._simplePairedTagHTML(state, 'em');
  3473. }
  3474. }
  3475. /**
  3476. * Strong (bold) content.
  3477. */
  3478. class MDStrongNode extends MDInlineNode {
  3479. toHTML(state) {
  3480. return this._simplePairedTagHTML(state, 'strong');
  3481. }
  3482. }
  3483. /**
  3484. * Content rendered with a line through it.
  3485. */
  3486. class MDStrikethroughNode extends MDInlineNode {
  3487. toHTML(state) {
  3488. return this._simplePairedTagHTML(state, 's');
  3489. }
  3490. }
  3491. /**
  3492. * Underlined content.
  3493. */
  3494. class MDUnderlineNode extends MDInlineNode {
  3495. toHTML(state) {
  3496. return this._simplePairedTagHTML(state, 'u');
  3497. }
  3498. }
  3499. /**
  3500. * Highlighted content. Usually rendered with a bright colored background.
  3501. */
  3502. class MDHighlightNode extends MDInlineNode {
  3503. toHTML(state) {
  3504. return this._simplePairedTagHTML(state, 'mark');
  3505. }
  3506. }
  3507. /**
  3508. * Superscripted content.
  3509. */
  3510. class MDSuperscriptNode extends MDInlineNode {
  3511. toHTML(state) {
  3512. return this._simplePairedTagHTML(state, 'sup');
  3513. }
  3514. }
  3515. /**
  3516. * Subscripted content.
  3517. */
  3518. class MDSubscriptNode extends MDInlineNode {
  3519. toHTML(state) {
  3520. return this._simplePairedTagHTML(state, 'sub');
  3521. }
  3522. }
  3523. /**
  3524. * Inline plaintext indicating computer code.
  3525. */
  3526. class MDCodeNode extends MDInlineNode {
  3527. /** @type {string} */
  3528. text;
  3529. constructor(text) {
  3530. super([]);
  3531. this.text = text;
  3532. }
  3533. toHTML(state) {
  3534. return `<code${this._htmlAttributes()}>${MDUtils.escapeHTML(this.text)}</code>`;
  3535. }
  3536. }
  3537. /**
  3538. * A footnote symbol in a document. Denoted as a superscripted number that can
  3539. * be clicked to go to its content at the bottom of the document.
  3540. */
  3541. class MDFootnoteNode extends MDInlineNode {
  3542. /**
  3543. * Symbol the author used to match up the footnote to its content definition.
  3544. * @type {string}
  3545. */
  3546. symbol;
  3547. /**
  3548. * The superscript symbol rendered in HTML. May be the same or different
  3549. * than `symbol`.
  3550. * @type {string} display symbol
  3551. */
  3552. displaySymbol = null;
  3553. /**
  3554. * Unique ID for the footnote definition.
  3555. * @type {number|null}
  3556. */
  3557. footnoteId = null;
  3558. /**
  3559. * Unique number for backlinking to a footnote occurrence. Populated by
  3560. * `MDFootnoteReader.postProcess`.
  3561. * @type {number|null}
  3562. */
  3563. occurrenceId = null;
  3564. /**
  3565. * @param {string} symbol
  3566. * @param {string|null} title
  3567. */
  3568. constructor(symbol, title=null) {
  3569. super([]);
  3570. this.symbol = symbol;
  3571. if (title) this.attributes['title'] = title;
  3572. }
  3573. toHTML(state) {
  3574. if (this.differentiator !== null) {
  3575. return `<sup id="${state.root.elementIdPrefix}footnoteref_${this.occurrenceId}"${this._htmlAttributes()}>` +
  3576. `<a href="#${state.root.elementIdPrefix}footnote_${this.footnoteId}">${MDUtils.escapeHTML(this.displaySymbol ?? this.symbol)}</a></sup>`;
  3577. }
  3578. return `<!--FNREF:{${this.symbol}}-->`;
  3579. }
  3580. }
  3581. /**
  3582. * A clickable hypertext link.
  3583. */
  3584. class MDLinkNode extends MDInlineNode {
  3585. /** @type {string} */
  3586. href;
  3587. /**
  3588. * @param {string} href
  3589. * @param {MDNode[]|MDNode} children
  3590. */
  3591. constructor(href, children, title=null) {
  3592. super(children);
  3593. this.href = href;
  3594. if (title !== null) this.attributes['title'] = title;
  3595. }
  3596. toHTML(state) {
  3597. var escapedLink;
  3598. if (this.href.startsWith('mailto:')) {
  3599. escapedLink = MDUtils.escapeObfuscated(this.href);
  3600. } else {
  3601. escapedLink = MDUtils.escapeHTML(this.href);
  3602. }
  3603. return `<a href="${escapedLink}"${this._htmlAttributes()}>${this._childHTML(state)}</a>`;
  3604. }
  3605. }
  3606. /**
  3607. * A clickable hypertext link where the URL is defined elsewhere by reference.
  3608. */
  3609. class MDReferencedLinkNode extends MDLinkNode {
  3610. /** @type {string} */
  3611. reference;
  3612. constructor(reference, children) {
  3613. super('', children);
  3614. this.reference = reference;
  3615. }
  3616. /**
  3617. * @param {MDState} state
  3618. */
  3619. toHTML(state) {
  3620. if (this.href === '') {
  3621. const url = state.urlForReference(this.reference);
  3622. if (url) this.href = url;
  3623. const title = state.urlTitleForReference(this.reference);
  3624. if (title) this.attributes['title'] = title;
  3625. }
  3626. return super.toHTML(state);
  3627. }
  3628. }
  3629. /**
  3630. * An inline image.
  3631. */
  3632. class MDImageNode extends MDInlineNode {
  3633. /** @type {string} */
  3634. src;
  3635. /** @type {string|null} */
  3636. alt;
  3637. /**
  3638. * @param {string} src
  3639. * @param {string|null} alt
  3640. */
  3641. constructor(src, alt) {
  3642. super([]);
  3643. this.src = src;
  3644. this.alt = alt;
  3645. }
  3646. toHTML(state) {
  3647. var html = `<img src="${MDUtils.escapeHTML(this.src)}"`;
  3648. if (this.alt) html += ` alt="${MDUtils.escapeHTML(this.alt)}"`;
  3649. html += `${this._htmlAttributes()}>`;
  3650. return html;
  3651. }
  3652. }
  3653. /**
  3654. * An inline image where the URL is defined elsewhere by reference.
  3655. */
  3656. class MDReferencedImageNode extends MDImageNode {
  3657. /** @type {string} */
  3658. reference;
  3659. /**
  3660. * @param {string} reference
  3661. * @param {string|null} alt
  3662. */
  3663. constructor(reference, alt='') {
  3664. super('', alt, []);
  3665. this.reference = reference;
  3666. }
  3667. toHTML(state) {
  3668. if (this.src === '') {
  3669. this.src = state.urlForReference(this.reference);
  3670. this.attributes['title'] = state.urlTitleForReference(this.reference);
  3671. }
  3672. return super.toHTML(state);
  3673. }
  3674. }
  3675. /**
  3676. * An abbreviation that can be hovered over to see its full expansion.
  3677. */
  3678. class MDAbbreviationNode extends MDInlineNode {
  3679. /** @type {string} */
  3680. abbreviation;
  3681. /** @type {string} */
  3682. get definition() { return this.attributes['title'] ?? null; }
  3683. set definition(newValue) { this.attributes['title'] = newValue; }
  3684. /**
  3685. * @param {string} abbreviation
  3686. * @param {string} definition
  3687. */
  3688. constructor(abbreviation, definition) {
  3689. super([]);
  3690. this.abbreviation = abbreviation;
  3691. this.attributes['title'] = definition;
  3692. }
  3693. toHTML(state) {
  3694. return `<abbr${this._htmlAttributes()}>${MDUtils.escapeHTML(this.abbreviation)}</abbr>`;
  3695. }
  3696. }
  3697. /**
  3698. * A line break that is preserved when rendered to HTML.
  3699. */
  3700. class MDLineBreakNode extends MDInlineNode {
  3701. toHTML(state) {
  3702. return '<br>';
  3703. }
  3704. toPlaintext(state) {
  3705. return '\n';
  3706. }
  3707. }
  3708. /**
  3709. * A verbatim HTML tag. May be altered to strip out disallowed attributes or
  3710. * CSS values.
  3711. */
  3712. class MDHTMLTagNode extends MDInlineNode {
  3713. /** @type {MDHTMLTag} */
  3714. tag;
  3715. constructor(tag) {
  3716. super([]);
  3717. this.tag = tag;
  3718. }
  3719. toHTML(state) {
  3720. return this.tag.toString();
  3721. }
  3722. }
  3723. // -- Other -----------------------------------------------------------------
  3724. /**
  3725. * Markdown parser.
  3726. */
  3727. class Markdown {
  3728. /**
  3729. * Set of standard readers.
  3730. * @type {MDReader[]}
  3731. */
  3732. static standardReaders = [
  3733. new MDUnderlinedHeadingReader(),
  3734. new MDHashHeadingReader(),
  3735. new MDBlockQuoteReader(),
  3736. new MDHorizontalRuleReader(),
  3737. new MDUnorderedListReader(),
  3738. new MDOrderedListReader(),
  3739. new MDFencedCodeBlockReader(),
  3740. new MDIndentedCodeBlockReader(),
  3741. new MDParagraphReader(),
  3742. new MDStrongReader(),
  3743. new MDEmphasisReader(),
  3744. new MDCodeSpanReader(),
  3745. new MDImageReader(),
  3746. new MDLinkReader(),
  3747. new MDHTMLTagReader(),
  3748. ];
  3749. /**
  3750. * All supported readers.
  3751. * @type {MDReader[]}
  3752. */
  3753. static allReaders = [
  3754. ...this.standardReaders,
  3755. new MDSubtextReader(),
  3756. new MDTableReader(),
  3757. new MDDefinitionListReader(),
  3758. new MDFootnoteReader(),
  3759. new MDAbbreviationReader(),
  3760. new MDUnderlineReader(),
  3761. new MDSubscriptReader(),
  3762. new MDStrikethroughReader(),
  3763. new MDHighlightReader(),
  3764. new MDSuperscriptReader(),
  3765. new MDReferencedImageReader(),
  3766. new MDReferencedLinkReader(),
  3767. new MDModifierReader(),
  3768. ];
  3769. /**
  3770. * Shared instance of a parser with standard syntax.
  3771. */
  3772. static standardParser = new Markdown(this.standardReaders);
  3773. /**
  3774. * Shared instance of a parser with all supported syntax.
  3775. */
  3776. static completeParser = new Markdown(this.allReaders);
  3777. /**
  3778. * Filter for what non-markdown HTML is permitted. HTML generated as a
  3779. * result of markdown is unaffected.
  3780. */
  3781. tagFilter = new MDHTMLFilter();
  3782. /** @type {MDReader[]} */
  3783. #readers;
  3784. /** @type {MDReader[]} */
  3785. #readersByBlockPriority;
  3786. /** @type {MDReader[]} */
  3787. #readersByTokenPriority;
  3788. /** @type {Array} */
  3789. #readersBySubstitutePriority;
  3790. /**
  3791. * Creates a Markdown parser with the given syntax readers.
  3792. *
  3793. * @param {MDReader[]} readers
  3794. */
  3795. constructor(readers=Markdown.allReaders) {
  3796. this.#readers = readers;
  3797. this.#readersByBlockPriority = MDReader.sortReaderForBlocks(readers);
  3798. this.#readersByTokenPriority = MDReader.sortReadersForTokenizing(readers);
  3799. this.#readersBySubstitutePriority = MDReader.sortReadersForSubstitution(readers);
  3800. }
  3801. /**
  3802. * Converts a markdown string to an HTML string.
  3803. *
  3804. * @param {string} markdown
  3805. * @param {string} elementIdPrefix - Optional prefix for generated element
  3806. * `id`s and links to them. For differentiating multiple markdown docs in
  3807. * the same HTML page.
  3808. * @returns {string} HTML
  3809. */
  3810. toHTML(markdown, elementIdPrefix='') {
  3811. const lines = markdown.split(/(?:\n|\r|\r\n)/);
  3812. try {
  3813. return this.#parse(lines, elementIdPrefix);
  3814. } catch (e) {
  3815. this.#investigateException(lines, elementIdPrefix);
  3816. throw e;
  3817. }
  3818. }
  3819. /**
  3820. * @param {string[]} lines
  3821. * @param {string} elementIdPrefix
  3822. */
  3823. #parse(lines, elementIdPrefix) {
  3824. const state = new MDState(lines);
  3825. state.readersByBlockPriority = this.#readersByBlockPriority;
  3826. state.readersByTokenPriority = this.#readersByTokenPriority
  3827. state.readersBySubstitutePriority = this.#readersBySubstitutePriority
  3828. state.tagFilter = this.tagFilter;
  3829. state.elementIdPrefix = elementIdPrefix;
  3830. for (const reader of this.#readers) {
  3831. reader.preProcess(state);
  3832. }
  3833. const nodes = state.readBlocks();
  3834. for (const reader of this.#readers) {
  3835. reader.postProcess(state, nodes);
  3836. }
  3837. return MDNode.toHTML(nodes, state);
  3838. }
  3839. /**
  3840. * Keeps removing first and last lines of markdown to locate the source of
  3841. * an exception.
  3842. *
  3843. * @param {string[]} lines
  3844. * @param {string} elementIdPrefix
  3845. */
  3846. #investigateException(lines, elementIdPrefix) {
  3847. var startIndex = 0;
  3848. var endIndex = lines.length;
  3849. // Keep stripping away first line until an exception stops being thrown
  3850. for (var i = 0; i < lines.length; i++) {
  3851. try {
  3852. this.#parse(lines.slice(i, endIndex), elementIdPrefix);
  3853. break;
  3854. } catch (e0) {
  3855. startIndex = i;
  3856. }
  3857. }
  3858. // Keep stripping away last line until an exception stops being thrown
  3859. for (var i = lines.length; i > startIndex; i--) {
  3860. try {
  3861. this.#parse(lines.slice(startIndex, i), elementIdPrefix);
  3862. break;
  3863. } catch (e0) {
  3864. endIndex = i;
  3865. }
  3866. }
  3867. const problematicMarkdown = lines.slice(startIndex, endIndex).join("\n");
  3868. console.error(`This portion of markdown caused an unexpected exception: ${problematicMarkdown}`);
  3869. }
  3870. }