PHP and Javascript implementations of a simple markdown parser
Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

markdown.js 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. // Blocks
  2. // - Paragraph
  3. // - Header 1-6 # ## ### #### ##### ###### or === ---
  4. // - Blockquote (nestable) >
  5. // - Unordered list (nestable) *_
  6. // - Ordered list (nestable) 1._
  7. // - Code block ```\ncode\n``` or 4 spaces/tab indent
  8. // - Horizontal rule --- - - - * * * etc
  9. // - Table -|-
  10. // - Definition list term\n: definition\n: alternate definition
  11. // - Footnote (bottom) citation[^1]
  12. // - Abbreviation (definition) *[ABC]: Abbrev Blah Cat
  13. // Inline
  14. // - Link [text](https://url)
  15. // - Emphasis *emphasized*
  16. // - Strong **bold**
  17. // - Inline code `code`
  18. // - Strikethrough ~strike~
  19. // - Image ![alt text](https://image){.cssclass}
  20. // - Footnote (inline) [^1]: footnote text
  21. // - Abbreviation (inline)
  22. class _MDBlock {
  23. toHTML(config) {
  24. throw new Error(self.constructor.name + ".toHTML not implemented");
  25. }
  26. /**
  27. * @param {_MDBlock[]} blocks
  28. * @returns {String}
  29. */
  30. static toHTML(blocks, config) {
  31. return blocks.map((block) => block.toHTML(config)).join("\n");
  32. }
  33. }
  34. class _MDMultiBlock extends _MDBlock {
  35. /** @var {_MDBlock[]} */
  36. #blocks;
  37. /**
  38. * @param {_MDBlock[]} blocks
  39. */
  40. constructor(blocks) {
  41. super();
  42. this.#blocks = blocks;
  43. }
  44. toHTML(config) {
  45. return _MDBlock.toHTML(this.#blocks, config);
  46. }
  47. }
  48. class _MDParagraph extends _MDBlock {
  49. /** @var {_MDBlock} */
  50. content;
  51. /**
  52. * @param {_MDBlock} content
  53. */
  54. constructor(content) {
  55. super();
  56. this.content = content;
  57. }
  58. toHTML(config) {
  59. let contentHTML = this.content.toHTML(config);
  60. return `<p>${contentHTML}</p>\n`;
  61. }
  62. }
  63. class _MDHeader extends _MDBlock {
  64. /** @var {number} */
  65. level;
  66. /** @var {_MDBlock} */
  67. content;
  68. /**
  69. * @param {number} level
  70. * @param {_MDBlock} content
  71. */
  72. constructor(level, content) {
  73. super();
  74. this.level = level;
  75. this.content = content;
  76. }
  77. toHTML(config) {
  78. let contentHTML = this.content.toHTML(config);
  79. return `<h${this.level}>${contentHTML}</h${this.level}>\n`;
  80. }
  81. }
  82. class _MDBlockquote extends _MDBlock {
  83. /** @var {_MDBlock[]} */
  84. content;
  85. /**
  86. * @param {_MDBlock[]} content
  87. */
  88. constructor(content) {
  89. super();
  90. this.content = content;
  91. }
  92. toHTML(config) {
  93. let contentHTML = _MDBlock.toHTML(this.content, config);
  94. return `<blockquote>\n${contentHTML}\n</blockquote>`;
  95. }
  96. }
  97. class _MDUnorderedList extends _MDBlock {
  98. /** @var {_MDListItem[]} */
  99. items;
  100. /**
  101. * @param {_MDListItem[]} items
  102. */
  103. constructor(items) {
  104. super();
  105. this.items = items;
  106. }
  107. toHTML(config) {
  108. let contentHTML = _MDBlock.toHTML(this.items);
  109. return `<ul>\n${contentHTML}\n</ul>`;
  110. }
  111. }
  112. class _MDOrderedList extends _MDBlock {
  113. /** @var {_MDListItem[]} */
  114. items;
  115. /**
  116. * @param {_MDListItem[]} items
  117. */
  118. constructor(items) {
  119. super();
  120. this.items = items;
  121. }
  122. toHTML(config) {
  123. let contentHTML = _MDBlock.toHTML(this.items);
  124. return `<ol>\n${contentHTML}\n</ol>`;
  125. }
  126. }
  127. class _MDListItem extends _MDBlock {
  128. /** @var {_MDBlock} */
  129. content;
  130. /**
  131. * @param {_MDBlock} content
  132. */
  133. constructor(content) {
  134. super();
  135. this.content = content;
  136. }
  137. toHTML(config) {
  138. let contentHTML = this.content.toHTML(config);
  139. return `<li>${contentHTML}</li>`;
  140. }
  141. }
  142. class _MDCodeBlock extends _MDBlock {
  143. /** @var {String} */
  144. #code;
  145. /**
  146. * @param {String} code
  147. */
  148. constructor(code) {
  149. super();
  150. this.#code = code;
  151. }
  152. toHTML(config) {
  153. return `<pre><code>${this.#code}</code></pre>`;
  154. }
  155. }
  156. class _MDHorizontalRule extends _MDBlock {
  157. toHTML(config) {
  158. return "<hr>\n";
  159. }
  160. }
  161. class _MDTableHeaderCell extends _MDBlock {
  162. /** @var {_MDBlock} */
  163. #content;
  164. /**
  165. * @param {_MDBlock} content
  166. */
  167. constructor(content) {
  168. super();
  169. this.#content = content;
  170. }
  171. toHTML(config) {
  172. let contentHTML = this.#content.toHTML(config);
  173. return `<th>${contentHTML}</th>`;
  174. }
  175. }
  176. class _MDTableCell extends _MDBlock {
  177. /** @var {_MDBlock} */
  178. #content;
  179. /**
  180. * @param {_MDBlock} content
  181. */
  182. constructor(content) {
  183. super();
  184. this.#content = content;
  185. }
  186. toHTML(config) {
  187. let contentHTML = this.#content.toHTML(config);
  188. return `<th>${contentHTML}</th>`;
  189. }
  190. }
  191. class _MDTableRow extends _MDBlock {
  192. /** @var {_MDTableCell[]|_MDTableHeaderCell[]} */
  193. #cells;
  194. /**
  195. * @param {_MDTableCell[]|_MDTableHeaderCell[]} cells
  196. */
  197. constructor(cells) {
  198. super();
  199. this.#cells = cells;
  200. }
  201. toHTML(config) {
  202. cellsHTML = _MDBlock.toHTML(this.#cells, config);
  203. return `<tr>\n${cellsHTML}\n</tr>`;
  204. }
  205. }
  206. class _MDTable extends _MDBlock {
  207. /** @var {_MDTableRow} */
  208. #headerRow;
  209. /** @var {_MDTableRow[]} */
  210. #bodyRows;
  211. toHTML(config) {
  212. let headerRowHTML = this.#headerRow.toHTML(config);
  213. let bodyRowsHTML = _MDBlock.toHTML(this.#bodyRows);
  214. return `<table>\n<thead>\n${headerRowHTML}\n</thead>\n<tbody>\n${bodyRowsHTML}\n</tbody>\n</table>`;
  215. }
  216. }
  217. class _MDDefinitionList extends _MDBlock {
  218. /** @var {_MDBlock[]} */
  219. #content;
  220. /**
  221. * @param {_MDBlock[]} content
  222. */
  223. constructor(content) {
  224. super();
  225. this.#content = content;
  226. }
  227. toHTML(config) {
  228. let contentHTML = _MDBlock.toHTML(this.#content);
  229. return `<dl>\n${contentHTML}\n</dl>`;
  230. }
  231. }
  232. class _MDDefinitionTerm extends _MDBlock {
  233. /** @var {_MDBlock} */
  234. #content;
  235. /**
  236. * @param {_MDBlock} content
  237. */
  238. constructor(content) {
  239. super();
  240. this.#content = content;
  241. }
  242. toHTML(config) {
  243. let contentHTML = this.#content.toHTML(config);
  244. return `<dt>${contentHTML}</dt>`;
  245. }
  246. }
  247. class _MDDefinitionDefinition extends _MDBlock {
  248. /** @var {_MDBlock} */
  249. #content;
  250. /**
  251. * @param {_MDBlock} content
  252. */
  253. constructor(content) {
  254. super();
  255. this.#content = content;
  256. }
  257. toHTML(config) {
  258. let contentHTML = this.#content.toHTML(config);
  259. return `<dd>${contentHTML}</dd>`;
  260. }
  261. }
  262. class _MDFootnoteReference extends _MDBlock {
  263. /** @var {String} */
  264. #id;
  265. /**
  266. * @param {String} id
  267. */
  268. constructor(id) {
  269. super();
  270. this.#id = id;
  271. }
  272. toHTML(config) {
  273. return `<sup><a href="#footnote${this.#id}">${this.#id}</a></sup>`;
  274. }
  275. }
  276. class _MDFootnoteContent extends _MDBlock {
  277. /** @var {String} */
  278. #id;
  279. /** @var {_MDBlock} */
  280. #content;
  281. /**
  282. * @param {String} id
  283. * @param {_MDBlock} content
  284. */
  285. constructor(id, content) {
  286. super();
  287. this.#id = id;
  288. this.#content = content;
  289. }
  290. toHTML(config) {
  291. // TODO: Forward and back links
  292. // TODO: Deferring footnotes to end of document
  293. //<ol>
  294. //<li id="fn:1" role="doc-endnote">
  295. //<p>Footnote&nbsp;<a href="#fnref:1" class="footnote-backref" role="doc-backlink">↩︎</a></p>
  296. //</li>
  297. //</ol>
  298. return '';
  299. }
  300. }
  301. class _MDAbbreviationOccurrence extends _MDBlock {
  302. /** @var {String} */
  303. #label;
  304. /** @var {String} */
  305. #definition;
  306. /**
  307. * @param {String} label
  308. * @param {String} definition
  309. */
  310. constructor(label, definition) {
  311. super();
  312. this.#label = label;
  313. this.#definition = definition;
  314. }
  315. toHTML(config) {
  316. return `<abbr title="${this.#definition.replace('"', '&quot;')}">${this.#label}</abbr>`;
  317. }
  318. }
  319. class _MDInline extends _MDBlock {
  320. /** @var {String} */
  321. #raw;
  322. /**
  323. * @param {String} raw
  324. */
  325. constructor(raw) {
  326. super();
  327. this.#raw = raw;
  328. }
  329. toHTML(config) {
  330. return this.#raw;
  331. }
  332. }
  333. // Blocks that immediately start a new block
  334. // - Headers
  335. // - Blockquote
  336. // - Code block ```\ncode\n```
  337. // Blocks that need blank line first
  338. // - HR --- - - - *** * * * * * *
  339. // - Lists
  340. // - Table
  341. // - Code block [4+spaces]code
  342. // - Definition list term\n: definition\n: alternate def
  343. // Unknown blocks
  344. // - Footnotes some text[^1] [^1]: first footnote content
  345. // - Abbreviations *[HTML]: Hyper Text
  346. // Inline styles
  347. // - Links
  348. // - Italic
  349. // - Bold
  350. // - `code`
  351. // - Strikethrough
  352. // - Images ![alt text](url){.cssclass}
  353. // - Literals \*
  354. class _MDState {
  355. /** @var {String[]} */
  356. lines = [];
  357. /** @var {object} */
  358. abbreviations = {};
  359. /** @var {object} */
  360. footnotes = {};
  361. /** @var {number} */
  362. p = 0;
  363. copy() {
  364. let cp = new _MDState();
  365. cp.abbreviations = this.abbreviations;
  366. cp.footnotes = this.footnotes;
  367. cp.p = this.p;
  368. return cp;
  369. }
  370. /** @param {_MDState} other */
  371. apply(other) {
  372. this.abbreviations = other.abbreviations;
  373. this.footnotes = other.footnotes;
  374. this.p = other.p;
  375. }
  376. hasLines(minCount) {
  377. return this.p + minCount <= this.lines.length;
  378. }
  379. }
  380. class MDConfig {
  381. }
  382. class Markdown {
  383. /**
  384. * @param {String} line
  385. */
  386. static #stripIndent(line) {
  387. return line.replace(/^(?: {1,4}|\t)/, '');
  388. }
  389. /**
  390. * @param {_MDState} state
  391. * @returns {_MDBlock[]}
  392. */
  393. static #readBlocks(state) {
  394. var blocks = [];
  395. while (state.hasLines(1)) {
  396. let block = this.#readNextBlock(state);
  397. if (block) {
  398. blocks.push(block);
  399. } else {
  400. break;
  401. }
  402. }
  403. return blocks;
  404. }
  405. /**
  406. * @param {_MDState} state
  407. * @returns {_MDBlock}
  408. */
  409. static #readNextBlock(state) {
  410. while (state.hasLines(1) && state.lines[state.p].trim().length == 0) {
  411. console.info("Skipping blank line " + state.p);
  412. state.p++;
  413. }
  414. var block;
  415. block = this.#readUnderlineHeader(state); if (block) return block;
  416. block = this.#readHashHeader(state); if (block) return block;
  417. block = this.#readBlockQuote(state); if (block) return block;
  418. block = this.#readUnorderedList(state); if (block) return block;
  419. block = this.#readOrderedList(state); if (block) return block;
  420. block = this.#readFencedCodeBlock(state); if (block) return block;
  421. block = this.#readIndentedCodeBlock(state); if (block) return block;
  422. block = this.#readHorizontalRule(state); if (block) return block;
  423. block = this.#readTable(state); if (block) return block;
  424. block = this.#readDefinitionList(state); if (block) return block;
  425. block = this.#readFootnoteDef(state); if (block) return block;
  426. block = this.#readAbbreviationDef(state); if (block) return block;
  427. block = this.#readParagraph(state); if (block) return block;
  428. return null;
  429. }
  430. /**
  431. * @param {_MDState} state
  432. * @returns {_MDBlock|null}
  433. */
  434. static #readInline(state, line) {
  435. return new _MDInline(line);
  436. }
  437. /**
  438. * Reads the contents of something like a list item
  439. * @param {_MDState} state
  440. * @param {number} firstLineStartPos
  441. * @param {RegExp} stopRegex
  442. * @returns {_MDBlock}
  443. */
  444. static #readInteriorContent(state, firstLineStartPos, stopRegex) {
  445. var p = state.p;
  446. var seenBlankLine = false;
  447. var needsBlocks = false;
  448. var lines = [];
  449. while (p < state.lines.length) {
  450. let line = state.lines[p++];
  451. if (p == state.p + 1) {
  452. line = line.substring(firstLineStartPos);
  453. }
  454. let isBlank = line.trim().length == 0;
  455. let isIndented = /^\s+/.exec(line) !== null;
  456. if (isBlank) {
  457. seenBlankLine = true;
  458. lines.push(line.trim());
  459. } else if (stopRegex && stopRegex.exec(line)) {
  460. p--;
  461. break;
  462. } else if (isIndented) {
  463. if (seenBlankLine) {
  464. needsBlocks = true;
  465. }
  466. lines.push(this.#stripIndent(line));
  467. } else {
  468. if (seenBlankLine) {
  469. p--;
  470. break;
  471. }
  472. lines.push(this.#stripIndent(line));
  473. }
  474. }
  475. while (lines.length > 0 && lines[lines.length - 1].trim().length == 0) {
  476. lines.pop();
  477. }
  478. if (needsBlocks) {
  479. let substate = new _MDState();
  480. substate.lines = lines;
  481. substate.abbreviations = state.abbreviations;
  482. substate.footnotes = state.footnotes;
  483. let blocks = this.#readBlocks(substate);
  484. state.p = p;
  485. return new _MDMultiBlock(blocks);
  486. } else {
  487. state.p = p;
  488. return this.#readInline(state, lines.join("\n"));
  489. }
  490. }
  491. /**
  492. * @param {_MDState} state
  493. * @returns {_MDBlock|null}
  494. */
  495. static #readUnderlineHeader(state) {
  496. var p = state.p;
  497. if (!state.hasLines(2)) return null;
  498. let contentLine = state.lines[p++].trim();
  499. let underLine = state.lines[p++].trim();
  500. if (contentLine == '') return null;
  501. if (/^=+$/.exec(underLine)) {
  502. state.p = p;
  503. return new _MDHeader(1, this.#readInline(state, contentLine));
  504. }
  505. if (/^\-+$/.exec(underLine)) {
  506. state.p = p;
  507. return new _MDHeader(2, this.#readInline(state, contentLine));
  508. }
  509. return null;
  510. }
  511. /**
  512. * @param {_MDState} state
  513. * @returns {_MDBlock|null}
  514. */
  515. static #readHashHeader(state) {
  516. var p = state.p;
  517. var groups = /^(#{1,6})\s*([^#].*)\s*$/.exec(state.lines[p++]);
  518. if (groups === null) return null;
  519. state.p = p;
  520. return new _MDHeader(groups[1].length, this.#readInline(state, groups[2]));
  521. }
  522. /**
  523. * @param {_MDState} state
  524. * @returns {_MDBlock|null}
  525. */
  526. static #readBlockQuote(state) {
  527. var blockquoteLines = [];
  528. var p = state.p;
  529. while (p < state.lines.length) {
  530. let line = state.lines[p++];
  531. if (line.startsWith(">")) {
  532. blockquoteLines.push(line);
  533. } else {
  534. break;
  535. }
  536. }
  537. if (blockquoteLines.length > 0) {
  538. let contentLines = blockquoteLines.map(function(line) {
  539. return line.substring(1).replace(/^ {0,3}\t?/, '');
  540. });
  541. let substate = new _MDState();
  542. substate.lines = contentLines;
  543. substate.abbreviations = state.abbreviations;
  544. substate.footnotes = state.footnotes;
  545. let quotedBlocks = this.#readBlocks(substate);
  546. state.p = p;
  547. return new _MDBlockquote(quotedBlocks);
  548. }
  549. return null;
  550. }
  551. /**
  552. * @param {_MDState} state
  553. * @returns {_MDListItem|null}
  554. */
  555. static #readUnorderedListItem(state) {
  556. var p = state.p;
  557. let line = state.lines[p];
  558. let groups = /^([\*\+\-]\s+)(.*)$/.exec(line);
  559. if (groups === null) return null;
  560. return new _MDListItem(this.#readInteriorContent(state, groups[1].length, /^[\*\+\-]\s+/));
  561. }
  562. /**
  563. * @param {_MDState} state
  564. * @returns {_MDBlock|null}
  565. */
  566. static #readUnorderedList(state) {
  567. var p = state.p;
  568. var items = [];
  569. var item = null;
  570. do {
  571. item = this.#readUnorderedListItem(state);
  572. if (item) items.push(item);
  573. } while (item);
  574. if (items.length == 0) return null;
  575. return new _MDUnorderedList(items);
  576. }
  577. /**
  578. * @param {_MDState} state
  579. * @returns {_MDBlock|null}
  580. */
  581. static #readOrderedList(state) {
  582. return null;
  583. }
  584. /**
  585. * @param {_MDState} state
  586. * @returns {_MDBlock|null}
  587. */
  588. static #readFencedCodeBlock(state) {
  589. return null;
  590. }
  591. /**
  592. * @param {_MDState} state
  593. * @returns {_MDBlock|null}
  594. */
  595. static #readIndentedCodeBlock(state) {
  596. return null;
  597. }
  598. /**
  599. * @param {_MDState} state
  600. * @returns {_MDBlock|null}
  601. */
  602. static #readHorizontalRule(state) {
  603. var p = state.p;
  604. let line = state.lines[p++];
  605. if (/^\s*(?:\-(?:\s*\-){2,}|\*(?:\s*\*){2,})\s*$/.exec(line)) {
  606. state.p = p;
  607. return new _MDHorizontalRule();
  608. }
  609. return null;
  610. }
  611. /**
  612. * @param {_MDState} state
  613. * @returns {_MDBlock|null}
  614. */
  615. static #readTable(state) {
  616. return null;
  617. }
  618. /**
  619. * @param {_MDState} state
  620. * @returns {_MDBlock|null}
  621. */
  622. static #readDefinitionList(state) {
  623. return null;
  624. }
  625. /**
  626. * @param {_MDState} state
  627. * @returns {_MDBlock|null}
  628. */
  629. static #readFootnoteDef(state) {
  630. return null;
  631. }
  632. /**
  633. * @param {_MDState} state
  634. * @returns {_MDBlock|null}
  635. */
  636. static #readAbbreviationDef(state) {
  637. return null;
  638. }
  639. /**
  640. * @param {_MDState} state
  641. * @returns {_MDBlock|null}
  642. */
  643. static #readParagraph(state) {
  644. if (!state.hasLines(1)) return null;
  645. var paragraphLines = [];
  646. var p = state.p;
  647. while (p < state.lines.length) {
  648. let line = state.lines[p++];
  649. if (line.trim().length == 0) {
  650. break;
  651. }
  652. paragraphLines.push(line);
  653. }
  654. if (paragraphLines.length > 0) {
  655. state.p = p;
  656. let content = paragraphLines.join("\n");
  657. return new _MDParagraph(this.#readInline(state, content));
  658. }
  659. return null;
  660. }
  661. /**
  662. * @param {String} markdown
  663. * @returns {String} HTML
  664. */
  665. static toHTML(markdown, config=new MDConfig()) {
  666. var state = new _MDState();
  667. let lines = markdown.replace("\r", "").split("\n");
  668. state.lines = lines;
  669. let blocks = this.#readBlocks(state);
  670. let html = _MDBlock.toHTML(blocks);
  671. return html;
  672. }
  673. }