瀏覽代碼

All block readers ported

main
Rocketsoup 1 年之前
父節點
當前提交
42b74753c1
共有 3 個文件被更改,包括 902 次插入18 次删除
  1. 1
    1
      js/markdown.js
  2. 1
    1
      js/markdown.min.js
  3. 900
    16
      php/markdown.php

+ 1
- 1
js/markdown.js 查看文件

@@ -2644,7 +2644,7 @@ class MDParagraphReader extends MDReader {
2644 2644
 	readBlock(state) {
2645 2645
 		var paragraphLines = [];
2646 2646
 		var p = state.p;
2647
-		while (p < state.lines.length) {
2647
+		while (state.hasLines(1, $p)) {
2648 2648
 			let line = state.lines[p++];
2649 2649
 			if (line.trim().length == 0) {
2650 2650
 				break;

+ 1
- 1
js/markdown.min.js
文件差異過大導致無法顯示
查看文件


+ 900
- 16
php/markdown.php 查看文件

@@ -1459,37 +1459,921 @@ class MDTagModifier {
1459 1459
 // -- Readers ---------------------------------------------------------------
1460 1460
 
1461 1461
 
1462
-class MDReader {}
1462
+/**
1463
+ * Base class for readers of various markdown syntax. A `Markdown` instance can
1464
+ * be created with any combination of subclasses of these to customize the
1465
+ * flavor of markdown parsed.
1466
+ * 
1467
+ * @see {@link custom.md} for details on subclassing
1468
+ */
1469
+class MDReader {
1470
+	/**
1471
+	 * Called before processing begins. `state.lines` is populated and the
1472
+	 * line pointer `state.p` will be at `0`.
1473
+	 *
1474
+	 * Default implementation does nothing.
1475
+	 */
1476
+	public function preProcess(MDState $state) {}
1463 1477
 
1464
-class MDUnderlinedHeadingReader extends MDReader {}
1478
+	/**
1479
+	 * Attempts to read an `MDBlockNode` subclass at the current line pointer
1480
+	 * `state.p`. Only matches if the block pattern starts at the line pointer,
1481
+	 * not elsewhere in the `state.lines` array. If a block is found, `state.p`
1482
+	 * should be incremented to the next line _after_ the block structure and
1483
+	 * a `MDBlockNode` subclass instance is returned. If no block is found,
1484
+	 * returns `null`.
1485
+	 *
1486
+	 * Default implementation always returns `null`.
1487
+	 */
1488
+	public function readBlock(MDState $state): ?MDBlockNode { return null; }
1465 1489
 
1466
-class MDHashHeadingReader extends MDReader {}
1490
+	/**
1491
+	 * Attempts to read an inline token from the beginning of `line`. Only the
1492
+	 * start of the given `line` is considered. If a matching token is found, an
1493
+	 * `MDToken` is returned. Otherwise `null` is returned.
1494
+	 *
1495
+	 * Default implementation always returns `null`.
1496
+	 */
1497
+	public function readToken(MDState $state, string $line): ?MDToken { return null; }
1467 1498
 
1468
-class MDSubtextReader extends MDReader {}
1499
+	/**
1500
+	 * Attempts to find a pattern anywhere in `tokens` and perform a _single_
1501
+	 * in-place substitution with one or more `MDNode` subclass instances.
1502
+	 * If a substitution is performed, must return `true`, otherwise `false`.
1503
+	 *
1504
+	 * Default implementation always returns `false`.
1505
+	 *
1506
+	 * @param MDState $state
1507
+	 * @param int $pass  what substitution pass this is, starting with 1
1508
+	 * @param (MDToken|MDInlineNode)[] $tokens  mixed array of `MDToken` and `MDInlineNode` elements
1509
+	 * @return bool  `true` if a substitution was performed, `false` if not
1510
+	 */
1511
+	public function substituteTokens(MDState $state, int $pass, array $tokens): bool { return false; }
1469 1512
 
1470
-class MDBlockQuoteReader extends MDReader {}
1513
+	/**
1514
+	 * Called after all parsing has completed. An array `blocks` is passed of
1515
+	 * all the top-level `MDBlockNode` elements in the document which this
1516
+	 * method can traverse or alter in-place via `.splice` operations if
1517
+	 * necessary.
1518
+	 *
1519
+	 * `MDNode.visitChildren` is useful for recursively looking for certain
1520
+	 * `MDNode` instances. `MDNode.replaceNodes` is useful for swapping in
1521
+	 * replacements.
1522
+	 *
1523
+	 * Default implementation does nothing.
1524
+	 *
1525
+	 * @param MDState $state
1526
+	 * @param MDBlockNode[] $blocks
1527
+	 */
1528
+	public function postProcess(MDState $state, array $blocks) {}
1471 1529
 
1472
-class _MDListReader extends MDReader {}
1530
+	/**
1531
+	 * Can be overridden to influence ordering of this reader with respect to
1532
+	 * another during the block parsing phase. Return `-1` to be ordered before
1533
+	 * the given reader, `1` to be ordered after it, or `0` for no preference.
1534
+	 * Only return non-`0` values to resolve specific conflicts.
1535
+	 *
1536
+	 * Default implementation always returns `0` (no preference).
1537
+	 *
1538
+	 * @param MDReader $other
1539
+	 * @return int  a negative, positive, or 0 value to be ordered before,
1540
+	 *   after, or anwhere relative to `other`, respectively
1541
+	 */
1542
+	public function compareBlockOrdering(MDReader $other): int {
1543
+		return 0;
1544
+	}
1473 1545
 
1474
-class MDUnorderedListReader extends _MDListReader {}
1546
+	/**
1547
+	 * Can be overridden to influence ordering of this reader with respect to
1548
+	 * another during the tokenizing phase. Return `-1` to be ordered before
1549
+	 * the given reader, `1` to be ordered after it, or `0` for no preference.
1550
+	 * Only return non-`0` values to resolve specific conflicts.
1551
+	 *
1552
+	 * Default implementation always returns `0` (no preference).
1553
+	 *
1554
+	 * @param MDReader $other
1555
+	 * @return int  a negative, positive, or 0 value to be ordered before,
1556
+	 *   after, or anwhere relative to `other`, respectively
1557
+	 */
1558
+	public function compareTokenizeOrdering(MDReader $other): int {
1559
+		return 0;
1560
+	}
1561
+
1562
+	/**
1563
+	 * Can be overridden to influence ordering of this reader with respect to
1564
+	 * another during the substitution phase. Return `-1` to be ordered before
1565
+	 * the given reader, `1` to be ordered after it, or `0` for no preference.
1566
+	 * Only return non-`0` values to resolve specific conflicts.
1567
+	 *
1568
+	 * Readers are sorted within each substitution pass. All pass 1 readers are
1569
+	 * processed first, then all pass 2 readers, etc. The number of passes this
1570
+	 * reader participates in is dictated by `substitionPassCount`.
1571
+	 *
1572
+	 * Default implementation always returns `0` (no preference).
1573
+	 *
1574
+	 * @param MDReader $other
1575
+	 * @param int $pass  substitution pass, with numbering starting at `1`
1576
+	 * @return int  a negative, positive, or 0 value to be ordered before,
1577
+	 *   after, or anwhere relative to `other`, respectively
1578
+	 */
1579
+	public function compareSubstituteOrdering(MDReader $other, int $pass): int {
1580
+		return 0;
1581
+	}
1475 1582
 
1476
-class MDOrderedListReader extends _MDListReader {}
1583
+	/**
1584
+	 * How many substitution passes this reader requires. Substitution allows
1585
+	 * all pass 1 readers to process first, then all pass 2 readers, etc.
1586
+	 */
1587
+	public function substitutionPassCount(): int { return 1; }
1477 1588
 
1478
-class MDFencedCodeBlockReader extends MDReader {}
1589
+	/**
1590
+	 * For sorting readers with ordering preferences. The `compare` methods
1591
+	 * don't have the properties of normal sorting compares so need to sort
1592
+	 * differently.
1593
+	 *
1594
+	 * @param MDReader[] $arr  array to sort
1595
+	 * @param callable $compareFn  comparison function, taking two array element
1596
+	 *   arguments and returning -1, 0, or 1 for a < b, a == b, and a > b,
1597
+	 *   respectively
1598
+	 * @param callable $idFn  function for returning a unique hashable id for
1599
+	 *   the array element
1600
+	 * @return MDReader[]  sorted array
1601
+	 */
1602
+	private static function kahnTopologicalSort(array $arr, callable $compareFn, callable $idFn): array {
1603
+		$graph = [];
1604
+		$inDegrees = [];
1605
+		$valuesById = [];
1606
+
1607
+		// Build the graph and compute in-degrees
1608
+		foreach ($arr as $elem) {
1609
+			$id = $idFn($elem);
1610
+			$graph[$id] = [];
1611
+			$inDegrees[$id] = 0;
1612
+			$valuesById[$id] = $elem;
1613
+		}
1479 1614
 
1480
-class MDIndentedCodeBlockReader extends MDReader {}
1615
+		for ($i = 0; $i < sizeof($arr); $i++) {
1616
+			$elemA = $arr[$i];
1617
+			$idA = $idFn($elemA);
1618
+			for ($j = 0; $j < sizeof($arr); $j++) {
1619
+				if ($i === $j) continue;
1620
+				$elemB = $arr[$j];
1621
+				$idB = $idFn($elemB);
1622
+				$comparisonResult = $compareFn($elemA, $elemB);
1623
+				if ($comparisonResult < 0) {
1624
+					array_push($graph[$idA], push($idB));
1625
+					$inDegrees[$idB]++;
1626
+				} elseif ($comparisonResult > 0) {
1627
+					array_push($graph[$idB], $idA);
1628
+					$inDegrees[$idA]++;
1629
+				}
1630
+			}
1631
+		}
1481 1632
 
1482
-class MDHorizontalRuleReader extends MDReader {}
1633
+		// Initialize the queue with zero-inDegree nodes
1634
+		$queue = [];
1635
+		foreach ($inDegrees as $elemId) {
1636
+			if ($inDegrees[$elemId] === 0) {
1637
+				array_push($queue, $elemId);
1638
+			}
1639
+		}
1483 1640
 
1484
-class MDTableReader extends MDReader {}
1641
+		// Process the queue and build the topological order list
1642
+		$sorted = [];
1643
+		while (sizeof($queue) > 0) {
1644
+			$elemId = array_shift($queue);
1645
+			array_push($sorted, $valuesById[$elemId]);
1646
+			unset($valuesById[$elemId]);
1647
+
1648
+			foreach ($graph[$elemId] as $neighbor) {
1649
+				$inDegrees[$neighbor]--;
1650
+				if ($inDegrees[$neighbor] === 0) {
1651
+					array_push($queue, $neighbor);
1652
+				}
1653
+			}
1654
+		}
1655
+		// Anything left over can go at the end. No ordering dependencies.
1656
+		foreach ($valuesById as $elemId => $value) {
1657
+			array_push($sorted, $value);
1658
+		}
1485 1659
 
1486
-class MDDefinitionListReader extends MDReader {}
1660
+		return $sorted;
1661
+	}
1487 1662
 
1488
-class MDFootnoteReader extends MDReader {}
1663
+	/**
1664
+	 * Returns a sorted array of readers by their block priority preferences.
1665
+	 *
1666
+	 * @param MDReader[] $readers
1667
+	 * @return MDReader[]  sorted readers
1668
+	 */
1669
+	public static function sortReaderForBlocks(array &$readers) {
1670
+		$sorted = $readers;
1671
+		return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
1672
+			return $a->compareBlockOrdering($b);
1673
+		}, fn($elem) => get_class($elem));
1674
+	}
1489 1675
 
1490
-class MDAbbreviationReader extends MDReader {}
1676
+	/**
1677
+	 * Returns a sorted array of readers by their tokenization priority preferences.
1678
+	 *
1679
+	 * @param MDReader[] $readers
1680
+	 * @return MDReader[]  sorted readers
1681
+	 */
1682
+	public static function sortReadersForTokenizing(array $readers): array {
1683
+		$sorted = $readers;
1684
+		return self::kahnTopologicalSort($sorted, function(MDReader $a, MDReader $b): int {
1685
+			return $a->compareTokenizeOrdering($b);
1686
+		}, fn($elem) => get_class($elem));
1687
+	}
1491 1688
 
1492
-class MDParagraphReader extends MDReader {}
1689
+	/**
1690
+	 * Returns a sorted array of tuples (arrays) containing the substitution
1691
+	 * pass number and reader instance, sorted by their substitution priority
1692
+	 * preferences.
1693
+	 *
1694
+	 * For readers with `substitutionPassCount` > `1`, the same reader will
1695
+	 * appear multiple times in the resulting array, one per pass.
1696
+	 *
1697
+	 * @param MDReader[] $readers
1698
+	 * @return MDReader[]  sorted array of tuples with the pass number and
1699
+	 *   reader instance in each
1700
+	 */
1701
+	public static function sortReadersForSubstitution(array $readers): array {
1702
+		$tuples = [];
1703
+		$maxPass = 1;
1704
+		foreach ($readers as $reader) {
1705
+			$passCount = $reader->substitutionPassCount();
1706
+			for ($pass = 1; $pass <= $passCount; $pass++) {
1707
+				array_push($tuples, [ $pass, $reader ]);
1708
+			}
1709
+			$maxPass = max($maxPass, $pass);
1710
+		}
1711
+		$result = [];
1712
+		for ($pass = 1; $pass <= $maxPass; $pass++) {
1713
+			$readersThisPass = array_filter(tuples, fn($tup) => $tup[0] == $pass);
1714
+			$passResult = self::kahnTopologicalSort($readersThisPass, function(MDReader $a, MDReader $b): int {
1715
+				$aReader = $a[1];
1716
+				$bReader = $b[1];
1717
+				return $aReader->compareSubstituteOrdering($bReader, $pass);
1718
+			}, fn($elem) => get_class($elem[1]));
1719
+			$result = array_merge($result, $passResult);
1720
+		}
1721
+		return $result;
1722
+	}
1723
+}
1724
+
1725
+/**
1726
+ * Reads markdown blocks for headings denoted with the underline syntax.
1727
+ *
1728
+ * Supports `MDTagModifier` suffixes.
1729
+ */
1730
+class MDUnderlinedHeadingReader extends MDReader {
1731
+	public function readBlock(MDState $state): ?MDBlockNode {
1732
+		$p = $state->p;
1733
+		if (!$state->hasLines(2)) return null;
1734
+		$modifier;
1735
+		$contentLine = trim($state->lines[$p++]);
1736
+		[$contentLine, $modifier] = MDTagModifier.fromLine(contentLine, state);
1737
+		$underLine = trim($state->lines[$p++]);
1738
+		if ($contentLine == '') return null;
1739
+		if (mb_eregi('^=+$', $underLine)) {
1740
+			$state->p = $p;
1741
+			$block = new MDHeadingNode(1, $state->inlineMarkdownToNodes($contentLine));
1742
+			if ($modifier) $modifier->applyTo($block);
1743
+			return $block;
1744
+		}
1745
+		if (mb_eregi('^\-+$', $underLine)) {
1746
+			$state->p = $p;
1747
+			$block = new MDHeadingNode(2, $state->inlineMarkdownToNodes($contentLine));
1748
+			if ($modifier) $modifier->applyTo($block);
1749
+			return $block;
1750
+		}
1751
+		return null;
1752
+	}
1753
+}
1754
+
1755
+/**
1756
+ * Reads markdown blocks for headings denoted with hash marks. Heading levels 1
1757
+ * to 6 are supported.
1758
+ *
1759
+ * Supports `MDTagModifier` suffixes.
1760
+ */
1761
+class MDHashHeadingReader extends MDReader {
1762
+	private static $hashHeadingRegex = '^(#{1,6})\\s*([^#].*?)\\s*\\#*\\s*$';  // 1=hashes, 2=content
1763
+
1764
+	public function readBlock(MDState $state): ?MDBlockNode {
1765
+		$p = $state->p;
1766
+		$line = $state->lines[$p++];
1767
+		$modifier;
1768
+		[$line, $modifier] = MDTagModifier::fromLine($line, $state);
1769
+		if (!mb_eregi(self::hashHeadingRegex, $line, $groups)) return null;
1770
+		$state->p = $p;
1771
+		$level = mb_strlen($groups[1]);
1772
+		$content = $groups[2];
1773
+		$block = new MDHeadingNode($level, $state->inlineMarkdownToNodes($content));
1774
+		if ($modifier) $modifier->applyTo($block);
1775
+		return $block;
1776
+	}
1777
+}
1778
+
1779
+/**
1780
+ * Reads subtext blocks. Subtext is smaller, fainter text for things like
1781
+ * disclaimers or sources.
1782
+ *
1783
+ * Supports `MDTagModifier` suffixes.
1784
+ */
1785
+class MDSubtextReader extends MDReader {
1786
+	private static $subtextRegex = '^\\-#\\s*(.*?)\\s*$';  // 1=content
1787
+
1788
+	public function readBlock(MDState $state): ?MDBlockNode {
1789
+		$p = $state->p;
1790
+		$line = $state->lines[$p++];
1791
+		$modifier;
1792
+		[$line, $modifier] = MDTagModifier::fromLine($line, $state);
1793
+		if (!mb_eregi(self::subtextRegex, $line, $groups)) return null;
1794
+		$state->p = $p;
1795
+		$content = $groups[1];
1796
+		$block = new MDSubtextNode($state->inlineMarkdownToNodes($content));
1797
+		if ($modifier) $modifier->applyTo($block);
1798
+		return $block;
1799
+	}
1800
+
1801
+	public function compareBlockOrdering(MDReader $other): int {
1802
+		if ($other instanceof MDUnorderedListReader) {
1803
+			return -1;
1804
+		}
1805
+		return 0;
1806
+	}
1807
+}
1808
+
1809
+/**
1810
+ * Reads markdown blocks for blockquoted text.
1811
+ */
1812
+class MDBlockQuoteReader extends MDReader {
1813
+	public function readBlock(MDState $state): ?MDBlockNode {
1814
+		$blockquoteLines = [];
1815
+		$p = $state->p;
1816
+		while ($p < sizeof($state->lines)) {
1817
+			$line = $state->lines[$p++];
1818
+			if (str_starts_with($line, ">")) {
1819
+				array_push($blockquoteLines, $line);
1820
+			} else {
1821
+				break;
1822
+			}
1823
+		}
1824
+		if (sizeof($blockquoteLines) == 0) return null;
1825
+		$contentLines = array_map(fn($line) => mb_eregi_replace('^ {0,3}\\t?', '', mb_substr($line, 1)), $blockquoteLines);
1826
+		$substate = $state->copy($contentLines);
1827
+		$quotedBlocks = $substate->readBlocks();
1828
+		$state->p = $p;
1829
+		return new MDBlockquoteNode($quotedBlocks);
1830
+	}
1831
+}
1832
+
1833
+/**
1834
+ * Internal abstract base class for ordered and unordered lists.
1835
+ */
1836
+class _MDListReader extends MDReader {
1837
+	private static function readItemLines(MDState $state, int $firstLineStartPos): array {
1838
+		$p = $state->p;
1839
+		$lines = [];
1840
+		$seenBlankLine = false;
1841
+		$stripTrailingBlankLines = true;
1842
+		while ($state->hasLines(1, $p)) {
1843
+			$isFirstLine = ($p == $state->p);
1844
+			$line = $state->lines[$p++];
1845
+			if ($isFirstLine) {
1846
+				$line = mb_substr($line, $firstLineStartPos);
1847
+			}
1848
+			if (mb_eregi('^(?:\\*|\\+|\\-|\\d+\\.)\\s+', $line)) {
1849
+				// Found next list item
1850
+				$stripTrailingBlankLines = false; // because this signals extra spacing intended
1851
+				break;
1852
+			}
1853
+			$isBlankLine = trim($line) == '';
1854
+			$isIndented = mb_eregi('^\\s+\\S', $line);
1855
+			if ($isBlankLine) {
1856
+				$seenBlankLine = true;
1857
+			} elseif (!$isIndented && $seenBlankLine) {
1858
+				// Post-list content
1859
+				break;
1860
+			}
1861
+			array_push($lines, $line);
1862
+		}
1863
+		$lines = MDUtils::withoutTrailingBlankLines($lines);
1864
+		return MDUtils::stripIndent($lines);
1865
+	}
1866
+
1867
+	protected function readListItemContent(MDState $state, int $firstLineStartPos): MDBlockNode {
1868
+		$itemLines = $this->readItemLines($state, $firstLineStartPos);
1869
+		$state->p += max(sizeof($itemLines), 1);
1870
+
1871
+		if (sizeof($itemLines) == 1) {
1872
+			return $state->inlineMarkdownToNode($itemLines[0]);
1873
+		}
1874
+
1875
+		$hasBlankLines = sizeof(array_filter($itemLines, fn($line) => trim($line) == '')) > 0;
1876
+		if ($hasBlankLines) {
1877
+			$substate = $state->copy($itemLines);
1878
+			$blocks = $substate->readBlocks();
1879
+			return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
1880
+		}
1881
+
1882
+		// Multiline content with no blank lines. Search for new block
1883
+		// boundaries without the benefit of a blank line to demarcate it.
1884
+		for ($p = 1; $p < sizeof($itemLines); $p++) {
1885
+			$line = $itemLines[p];
1886
+			if (mb_eregi('^(?:\\*|\\-|\\+|\\d+\\.)\\s+', $line)) {
1887
+				// Nested list found
1888
+				$firstBlock = $state->inlineMarkdownToNode(implode("\n", array_slice($itemLines, 0, $p)));
1889
+				$substate = $state->copy(array_slice($itemLines, $p));
1890
+				$blocks = $substate->readBlocks();
1891
+				return array_merge([ $firstBlock, $blocks ]);
1892
+			}
1893
+		}
1894
+
1895
+		// Ok, give up and just do a standard block read
1896
+		{
1897
+			$substate = $state->copy($itemLines);
1898
+			$blocks = $substate->readBlocks();
1899
+			return (sizeof($blocks) == 1) ? $blocks[0] : new MDNode($blocks);
1900
+		}
1901
+	}
1902
+
1903
+	public function readBlock(MDState $state): ?MDBlockNode {
1904
+		throw new Error(`Abstract readBlock must be overridden in ${this.constructor.name}`);
1905
+	}
1906
+}
1907
+
1908
+/**
1909
+ * Block reader for unordered (bulleted) lists.
1910
+ */
1911
+class MDUnorderedListReader extends _MDListReader {
1912
+	private static string $unorderedListRegex = '^([\\*\\+\\-]\\s+)(.*)$';  // 1=bullet, 2=content
1913
+
1914
+	private function readUnorderedListItem(MDState $state): ?MDListItemNode {
1915
+		$p = $state->p;
1916
+		$line = $state->lines[$p];
1917
+		if (!mb_eregi(self::$unorderedListRegex, $line, $groups)) return null;
1918
+		$firstLineOffset = mb_strlen($groups[1]);
1919
+		return new MDListItemNode($this->readListItemContent($state, $firstLineOffset));
1920
+	}
1921
+
1922
+	public function readBlock(MDState $state): ?MDBlockNode {
1923
+		$items = [];
1924
+		$item = null;
1925
+		do {
1926
+			$item = $this->readUnorderedListItem($state);
1927
+			if ($item) array_push($items, $item);
1928
+		} while ($item);
1929
+		if (sizeof($items) == 0) return null;
1930
+		return new MDUnorderedListNode($items);
1931
+	}
1932
+}
1933
+
1934
+/**
1935
+ * Block reader for ordered (numbered) lists. The number of the first item is
1936
+ * used to begin counting. The subsequent items increase by 1, regardless of
1937
+ * their value.
1938
+ */
1939
+class MDOrderedListReader extends _MDListReader {
1940
+	private static string $orderedListRegex = '^(\\d+)(\\.\\s+)(.*)$';  // 1=number, 2=dot, 3=content
1941
+
1942
+	private function readOrderedListItem(MDState $state): ?MDListItemNode {
1943
+		$p = $state->p;
1944
+		$line = $state->lines[$p];
1945
+		if (!mb_eregi(self::$orderedListRegex, $line, $groups)) return null;
1946
+		$ordinal = intval($groups[1]);
1947
+		$firstLineOffset = mb_strlen($groups[1]) + mb_strlen($groups[2]);
1948
+		return new MDListItemNode($this->readListItemContent($state, $firstLineOffset), $ordinal);
1949
+	}
1950
+
1951
+	public function readBlock(MDState $state): ?MDBlockNode {
1952
+		$items = [];
1953
+		$item = null;
1954
+		do {
1955
+			$item = $this->readOrderedListItem($state);
1956
+			if ($item) array_push($items, $item);
1957
+		} while ($item);
1958
+		if (sizeof($items)) return null;
1959
+		return new MDOrderedListNode($items, $items[0]->ordinal);
1960
+	}
1961
+}
1962
+
1963
+/**
1964
+ * Block reader for code blocks denoted by pairs of triple tickmarks. If
1965
+ * a programming language name, _xyz_, immediately follows the backticks, a
1966
+ * `language-xyz` CSS class will be added to the resulting `<code>`
1967
+ * element.
1968
+ *
1969
+ * Supports `MDTagModifier` suffix.
1970
+ */
1971
+class MDFencedCodeBlockReader extends MDReader {
1972
+	public function readBlock(MDState $state): ?MDBlockNode {
1973
+		if (!$state->hasLines(2)) return null;
1974
+		$p = $state->p;
1975
+		$openFenceLine = $state->lines[$p++];
1976
+		[$openFenceLine, $modifier] = MDTagModifier->fromLine($openFenceLine, $state);
1977
+		if (!mb_eregi('```\s*([a-z0-9]*)\s*$', $openFenceLine, $groups)) return null;
1978
+		$language = mb_strlen($groups[1]) > 0 ? $groups[1] : null;
1979
+		$codeLines = [];
1980
+		while ($state->hasLines(1, $p)) {
1981
+			$line = $state->lines[$p++];
1982
+			if (trim($line) == '```') {
1983
+				$state->p = $p;
1984
+				$block = new MDCodeBlockNode(implode("\n", $codeLines), $language);
1985
+				if ($modifier) $modifier->applyTo($block);
1986
+				return $block;
1987
+			}
1988
+			array_push($codeLines, $line);
1989
+		}
1990
+		return null;
1991
+	}
1992
+}
1993
+
1994
+/**
1995
+ * Block reader for code blocks denoted by indenting text.
1996
+ */
1997
+class MDIndentedCodeBlockReader extends MDReader {
1998
+	public function readBlock(MDState $state): ?MDBlockNode {
1999
+		$p = $state->p;
2000
+		$codeLines = [];
2001
+		while ($state->hasLines(1, $p)) {
2002
+			$line = $state->lines[$p++];
2003
+			if (MDUtils::countIndents($line, true) < 1) {
2004
+				$p--;
2005
+				break;
2006
+			}
2007
+			array_push($codeLines, MDUtils::stripIndent($line));
2008
+		}
2009
+		if (sizeof($codeLines) == 0) return null;
2010
+		$state->p = $p;
2011
+		return new MDCodeBlockNode(implode("\n", $codeLines));
2012
+	}
2013
+}
2014
+
2015
+/**
2016
+ * Block reader for horizontal rules. Composed of three or more hypens or
2017
+ * asterisks on a line by themselves, with or without intermediate whitespace.
2018
+ */
2019
+class MDHorizontalRuleReader extends MDReader {
2020
+	private static string $horizontalRuleRegex = '^\\s*(?:\\-(?:\\s*\\-){2,}|\\*(?:\\s*\\*){2,})\\s*$';
2021
+
2022
+	public function readBlock(MDState $state): ?MDBlockNode {
2023
+		$p = $state->p;
2024
+		$line = $state->lines[$p++];
2025
+		[$line, $modifier] = MDTagModifier::fromLine($line, $state);
2026
+		if (mb_eregi(self::horizontalRuleRegex, $line)) {
2027
+			$state->p = $p;
2028
+			$block = new MDHorizontalRuleNode();
2029
+			if ($modifier) $modifier->applyTo($block);
2030
+			return $block;
2031
+		}
2032
+		return null;
2033
+	}
2034
+
2035
+	public function compareBlockOrdering(MDReader $other): int {
2036
+		if ($other instanceof MDUnorderedListReader) {
2037
+			return -1;
2038
+		}
2039
+		return 0;
2040
+	}
2041
+}
2042
+
2043
+/**
2044
+ * Block reader for tables.
2045
+ *
2046
+ * Supports `MDTagModifier` suffix.
2047
+ */
2048
+class MDTableReader extends MDReader {
2049
+	private function readTableRow(MDState $state, bool $isHeader): ?MDTableRowNode {
2050
+		if (!$state->hasLines(1)) return null;
2051
+		$p = $state->p;
2052
+		$line = MDTagModifier::strip(trim($state->lines[$p++]));
2053
+		if (!mb_eregi('.*\\|.*', $line)) return null;
2054
+		if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
2055
+		if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
2056
+		$cellTokens = explode('|', $line);
2057
+		$cells = array_map(function($token) use ($isHeader) {
2058
+			$content = $state->inlineMarkdownToNode(trim($token));
2059
+			return $isHeader ? new MDTableHeaderCellNode($content) : new MDTableCellNode($content);
2060
+		}, $cellTokens);
2061
+		$state->p = $p;
2062
+		return new MDTableRowNode($cells);
2063
+	}
2064
+
2065
+	/**
2066
+	 * @param string $line
2067
+	 * @return string[]
2068
+	 */
2069
+	private function parseColumnAlignments(string $line): array {
2070
+		$line = trim($line);
2071
+		if (str_starts_with($line, '|')) $line = mb_substr($line, 1);
2072
+		if (str_ends_with($line, '|')) $line = mb_substr($line, 0, mb_strlen($line) - 1);
2073
+		return array_map(function($token) {
2074
+			if (str_starts_with($token, ':')) {
2075
+				if (str_ends_with($token, ':')) {
2076
+					return 'center';
2077
+				}
2078
+				return 'left';
2079
+			} elseif (str_ends_with($token, ':')) {
2080
+				return 'right';
2081
+			}
2082
+			return null;
2083
+		}, mb_split('\\s*\\|\\s*', $line));
2084
+	}
2085
+
2086
+	private static string $tableDividerRegex = '^\\s*[|]?\\s*(?:[:]?-+[:]?)(?:\\s*\\|\\s*[:]?-+[:]?)*\\s*[|]?\\s*$';
2087
+
2088
+	public function readBlock(MDState $state): ?MDBlockNode {
2089
+		if (!$state->hasLines(2)) return null;
2090
+		$startP = $state->p;
2091
+		$firstLine = $state->lines[$startP];
2092
+		$modifier = MDTagModifier::fromLine($firstLine, $state)[1];
2093
+		$headerRow = $this->readTableRow($state, true);
2094
+		if ($headerRow === null) {
2095
+			$state->p = $startP;
2096
+			return null;
2097
+		}
2098
+		$dividerLine = $state->lines[$state->p++];
2099
+		if (!mb_eregi(self::$tableDividerRegex, $dividerLine, $dividerGroups)) {
2100
+			$state->p = $startP;
2101
+			return null;
2102
+		}
2103
+		$columnAlignments = $this->parseColumnAlignments($dividerLine);
2104
+		$bodyRows = [];
2105
+		while ($state->hasLines(1)) {
2106
+			$row = $this->readTableRow($state, false);
2107
+			if ($row === null) break;
2108
+			array_push($bodyRows, $row);
2109
+		}
2110
+		$table = new MDTableNode($headerRow, $bodyRows);
2111
+		$table->columnAlignments = $columnAlignments;
2112
+		if ($modifier) $modifier->applyTo($table);
2113
+		return $table;
2114
+	}
2115
+}
2116
+
2117
+/**
2118
+ * Block reader for definition lists. Definitions go directly under terms starting
2119
+ * with a colon.
2120
+ */
2121
+class MDDefinitionListReader extends MDReader {
2122
+	public function readBlock(MDState $state): ?MDBlockNode {
2123
+		$p = $state->p;
2124
+		$groups;
2125
+		$termCount = 0;
2126
+		$definitionCount = 0;
2127
+		$defLines = [];
2128
+		while ($state->hasLines(1, $p)) {
2129
+			$line = $state->lines[$p++];
2130
+			if (trim($line) === '') {
2131
+				break;
2132
+			}
2133
+			if (mb_eregi('^\\s+', $line)) {
2134
+				if (sizeof($defLines) == 0) return null;
2135
+				$defLines[sizeof($defLines) - 1] .= "\n" . $line;
2136
+			} elseif (mb_eregi('^:\\s+', $line)) {
2137
+				array_push($defLines, $line);
2138
+				$definitionCount++;
2139
+			} else {
2140
+				array_push($defLines, $line);
2141
+				$termCount++;
2142
+			}
2143
+		}
2144
+		if ($termCount == 0 || $definitionCount == 0) return null;
2145
+		$blocks = array_map(function($line) {
2146
+			if (mb_eregi('^:\\s+(.*?)$', $line)) {
2147
+				return new MDDefinitionListDefinitionNode($state->inlineMarkdownToNodes($groups[1]));
2148
+			} else {
2149
+				return new MDDefinitionListTermNode($state->inlineMarkdownToNodes($line));
2150
+			}
2151
+		}, $defLines);
2152
+		$state->p = $p;
2153
+		return new MDDefinitionListNode($blocks);
2154
+	}
2155
+}
2156
+
2157
+/**
2158
+ * Block reader for defining footnote contents. Footnotes can be defined anywhere
2159
+ * in the document but will always be rendered at the end of a page or end of
2160
+ * the document.
2161
+ */
2162
+class MDFootnoteReader extends MDReader {
2163
+	private static string $footnoteWithTitleRegex = '^\\[\\^([^\\s\\[\\]]+?)\\s+"(.*?)"\\]';  // 1=symbol, 2=title
2164
+	private static string $footnoteRegex = '^\\[\\^([^\\s\\[\\]]+?)\\]';  // 1=symbol
2165
+
2166
+	/**
2167
+	 * @param MDState $state
2168
+	 * @param string $symbol
2169
+	 * @param MDNode[] $footnote
2170
+	 */
2171
+	private function defineFootnote(MDState $state, string $symbol, array $footnote) {
2172
+		$footnotes = $state->root()['footnotes'] ?? [];
2173
+		$footnotes[$symbol] = $footnote;
2174
+		$state->root()['footnotes'] = $footnotes;
2175
+	}
2176
+
2177
+	private function registerUniqueInstance(MDState $state, string $symbol, int $unique) {
2178
+		$footnoteInstances = $state->root()['footnoteInstances'];
2179
+		$instances = $footnoteInstances[$symbol] ?? [];
2180
+		array_push($instances, $unique);
2181
+		$footnoteInstances[$symbol] = $instances;
2182
+	}
2183
+
2184
+	private function idForFootnoteSymbol(MDState $state, string $symbol): int {
2185
+		$footnoteIds = $state->root()['footnoteIds'];
2186
+		$existing = $footnoteIds[$symbol];
2187
+		if ($existing) return $existing;
2188
+		$nextFootnoteId = $state->root()['nextFootnoteId'];
2189
+		$id = $nextFootnoteId++;
2190
+		$footnoteIds[$symbol] = $id;
2191
+		$state->root()['nextFootnoteId'] = $nextFootnoteId;
2192
+		return $id;
2193
+	}
2194
+
2195
+	public function preProcess(MDState $state) {
2196
+		$state->root()['footnoteInstances'] = [];
2197
+		$state->root()['footnotes'] = [];
2198
+		$state->root()['footnoteIds'] = [];
2199
+		$state->root()['nextFootnoteId'] = 1;
2200
+	}
2201
+
2202
+	public function readBlock(MDState $state): ?MDBlockNode {
2203
+		$p = $state->p;
2204
+		if (!mb_eregi('^\\s*\\[\\^\\s*([^\\]]+)\\s*\\]:\\s+(.*)\\s*$', $state->lines[$p++], $groups)) return null;
2205
+		$symbol = $groups[1];
2206
+		$def = $groups[2];
2207
+		while ($state->hasLines(1, $p)) {
2208
+			$line = $state->lines[$p++];
2209
+			if (mb_eregi('^\\s+', $line)) {
2210
+				$def += "\n" . $line;
2211
+			} else {
2212
+				$p--;
2213
+				break;
2214
+			}
2215
+		}
2216
+		$content = $state->inlineMarkdownToNodes($def);
2217
+		$this->defineFootnote($state, $symbol, $content);
2218
+		$state->p = $p;
2219
+		return new MDNode(); // empty
2220
+	}
2221
+
2222
+	public function readToken(MDState $state, string $line): ?MDToken {
2223
+		$groups;
2224
+		if (mb_eregi(self::$footnoteWithTitleRegex, $line, $groups)) {
2225
+			return new MDToken($groups[0], MDTokenType::Footnote, $groups[1], $groups[2]);
2226
+		}
2227
+		if (mb_eregi(MDFootnoteReader::footnoteRegex, $line, $groups)) {
2228
+			return new MDToken($groups[0], MDTokenType::Footnote, $groups[1]);
2229
+		}
2230
+		return null;
2231
+	}
2232
+
2233
+	public function substituteTokens(MDState $state, int $pass, array $tokens): bool {
2234
+		if ($match = MDToken::findFirstTokens($tokens, [ MDTokenType::Footnote ])) {
2235
+			$symbol = $match->tokens[0]->content;
2236
+			array_splice($tokens, $match->index, 1, new MDFootnoteNode($symbol));
2237
+			return true;
2238
+		}
2239
+		return false;
2240
+	}
2241
+
2242
+	/**
2243
+	 * @param {MDState} state
2244
+	 * @param {MDBlockNode[]} blocks
2245
+	 */
2246
+	public function postProcess(MDState $state, array $blocks) {
2247
+		$nextOccurrenceId = 1;
2248
+		foreach ($blocks as $block) {
2249
+			$block->visitChildren(function($node) use (&$nextOccurrenceId) {
2250
+				if (!($node instanceof MDFootnoteNode)) return;
2251
+				$node->footnoteId = $this->idForFootnoteSymbol($state, $node->symbol);
2252
+				$node->occurrenceId = $nextOccurrenceId++;
2253
+				$node->displaySymbol = strval($node->footnoteId);
2254
+				$this->$registerUniqueInstance($state, $node->symbol, $node->occurrenceId);
2255
+			});
2256
+		}
2257
+		if (sizeof($state->footnotes) == 0) return;
2258
+		array_push($blocks, new MDFootnoteListNode());
2259
+	}
2260
+
2261
+	public function compareBlockOrdering(MDReader $other): int {
2262
+		if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
2263
+			return -1;
2264
+		}
2265
+		return 0;
2266
+	}
2267
+
2268
+	public function compareTokenizeOrdering(MDReader $other): int {
2269
+		if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
2270
+			return -1;
2271
+		}
2272
+		return 0;
2273
+	}
2274
+
2275
+	public function compareSubstituteOrdering(MDReader $other, int $pass): int {
2276
+		if ($other instanceof MDLinkReader || $other instanceof MDImageReader) {
2277
+			return -1;
2278
+		}
2279
+		return 0;
2280
+	}
2281
+}
2282
+
2283
+/**
2284
+ * Block reader for abbreviation definitions. Anywhere the abbreviation appears
2285
+ * in plain text will have its definition available when hovering over it.
2286
+ * Definitions can appear anywhere in the document. Their content should only
2287
+ * contain simple text, not markdown.
2288
+ */
2289
+class MDAbbreviationReader extends MDReader {
2290
+	private function defineAbbreviation(MDState $state, string $abbreviation, string $definition) {
2291
+		$state->root()->abbreviations[$abbreviation] = $definition;
2292
+		$regex = "\\b(" . preg_quote($abbreviation) . ")\\b";
2293
+		$state->root()->abbreviationRegexes[$abbreviation] = $regex;
2294
+	}
2295
+
2296
+	public function preProcess(MDState $state) {
2297
+		$state->root()['abbreviations'] = [];
2298
+		$state->root()['abbreviationRegexes'] = [];
2299
+	}
2300
+
2301
+	public function readBlock(MDState $state): ?MDBlockNode {
2302
+		$p = $state->p;
2303
+		$line = $state->lines[$p++];
2304
+		if (!mb_eregi('^\\s*\\*\\[([^\\]]+?)\\]:\\s+(.*?)\\s*$', $line, $groups)) return null;
2305
+		$abbrev = $groups[1];
2306
+		$def = $groups[2];
2307
+		$this->defineAbbreviation($state, $abbrev, $def);
2308
+		$state->p = $p;
2309
+		return new MDNode(); // empty
2310
+	}
2311
+
2312
+	/**
2313
+	 * @param MDState $state
2314
+	 * @param MDNode[] $blocks
2315
+	 */
2316
+	public function postProcess(MDState $state, array $blocks) {
2317
+		$abbreviations = $state->root()['abbreviations'];
2318
+		$regexes = $state->root()['abbreviationRegexes'];
2319
+		MDNode::replaceNodes($state, $blocks, function($original) {
2320
+			if (!($original instanceof MDTextNode)) return null;
2321
+			$changed = false;
2322
+			$elems = [ $original->text ]; // mix of strings and MDNodes
2323
+			for ($i = 0; $i < sizeof($elems); $i++) {
2324
+				$text = $elems[i];
2325
+				if (!is_string($text)) continue;
2326
+				foreach ($abbreviations as $abbreviation) {
2327
+					$index = strpos($text, $abbreviation);
2328
+					if ($index === false) break;
2329
+					$prefix = substr($text, 0, $index);
2330
+					$suffix = substr($text, $index + strlen($abbreviation));
2331
+					$definition = $abbreviations[$abbreviation];
2332
+					array_splice($elems, $i, 1, [ $prefix, new MDAbbreviationNode($abbreviation, $definition), $suffix ]);
2333
+					$i = -1; // start over
2334
+					$changed = true;
2335
+					break;
2336
+				}
2337
+			}
2338
+			if (!$changed) return null;
2339
+			$nodes = array_map(fn($elem) => is_string($elem) ? new MDTextNode($elem) : $elem);
2340
+			return new MDNode($nodes);
2341
+		});
2342
+	}
2343
+}
2344
+
2345
+/**
2346
+ * Block reader for simple paragraphs. Paragraphs are separated by a blank (or
2347
+ * whitespace-only) line. This reader is prioritized after every other reader
2348
+ * since there is no distinguishing syntax.
2349
+ */
2350
+class MDParagraphReader extends MDReader {
2351
+	public function readBlock(MDState $state): ?MDBlockNode {
2352
+		$paragraphLines = [];
2353
+		$p = $state->p;
2354
+		while ($state->hasLines(1, $p)) {
2355
+			$line = $state->lines[$p++];
2356
+			if (trim($line) === '') {
2357
+				break;
2358
+			}
2359
+			array_push($paragraphLines, $line);
2360
+		}
2361
+		if ($state->p == 0 && $p >= sizeof($state->lines)) {
2362
+			// If it's the entire document don't wrap it in a paragraph
2363
+			return null;
2364
+		}
2365
+		if (sizeof($paragraphLines) > 0) {
2366
+			$state->p = $p;
2367
+			$content = implode("\n", $paragraphLines);
2368
+			return new MDParagraphNode($state->inlineMarkdownToNodes($content));
2369
+		}
2370
+		return null;
2371
+	}
2372
+
2373
+	public function compareBlockOrdering(MDReader $other): int {
2374
+		return 1; // always dead last
2375
+	}
2376
+}
1493 2377
 
1494 2378
 class MDSimplePairInlineReader extends MDReader {}
1495 2379
 

Loading…
取消
儲存