Explorar el Código

Inline markdown tokenized

main
Rocketsoup hace 1 año
padre
commit
ede5987d02
Se han modificado 1 ficheros con 388 adiciones y 21 borrados
  1. 388
    21
      js/markdown.js

+ 388
- 21
js/markdown.js Ver fichero

@@ -25,6 +25,9 @@ class _MDHAlign {
25 25
 	static Center = new _MDHAlign('Center');
26 26
 	static Right = new _MDHAlign('Right');
27 27
 
28
+	/** @var {String} */
29
+	name;
30
+
28 31
 	constructor(name) {
29 32
 		this.name = name;
30 33
 	}
@@ -43,6 +46,62 @@ class _MDHAlign {
43 46
 	}
44 47
 }
45 48
 
49
+class _MDTokenType {
50
+	static Text = new _MDTokenType('Text');
51
+	static Whitespace = new _MDTokenType('Whitespace');
52
+
53
+	static Underscore = new _MDTokenType('Underscore');
54
+	static Asterisk = new _MDTokenType('Asterisk');
55
+	static Slash = new _MDTokenType('Slash');
56
+	static Tilde = new _MDTokenType('Tilde');
57
+	static Bang = new _MDTokenType('Bang');
58
+	static Backtick = new _MDTokenType('Backtick');
59
+
60
+	static Label = new _MDTokenType('Label'); // content=label
61
+	static URL = new _MDTokenType('URL'); // content=URL, extra=title
62
+	static Email = new _MDTokenType('Email'); // content=email address, extra=title
63
+	static SimpleLink = new _MDTokenType('SimpleLink'); // content=URL
64
+	static SimpleEmail = new _MDTokenType('SimpleEmail'); // content=email address
65
+	static Footnote = new _MDTokenType('Footnote'); // content=symbol
66
+
67
+	static HTMLTag = new _MDTokenType('HTMLTag'); // content=tag string, tag=_MDHTMLTag
68
+
69
+	#name;
70
+
71
+	get name() {
72
+		return this.#name;
73
+	}
74
+
75
+	constructor(name) {
76
+		this.#name = name;
77
+	}
78
+
79
+	toString() {
80
+		return this.constructor.name + `.${this.#name}`;
81
+	}
82
+}
83
+class _MDToken {
84
+
85
+	/** @var {String} */
86
+	original;
87
+	/** @var {_MDTokenType} */
88
+	type;
89
+	/** @var {String|null} */
90
+	content;
91
+	/** @var {String|null} */
92
+	extra;
93
+	/** @var {_MDHTMLTag|null} */
94
+	tag;
95
+
96
+	constructor(original, type, content=null, extra=null, tag=null) {
97
+		this.original = original;
98
+		this.type = type;
99
+		this.content = content;
100
+		this.extra = extra;
101
+		this.tag = tag;
102
+	}
103
+}
104
+
46 105
 class _MDSpan {
47 106
 	toHTML(config) {
48 107
 		throw new Error(self.constructor.name + ".toHTML not implemented");
@@ -620,27 +679,30 @@ class _MDInline extends _MDBlock {
620 679
 	}
621 680
 }
622 681
 
623
-// Blocks that immediately start a new block
624
-// - Headers
625
-// - Blockquote
626
-// - Code block   ```\ncode\n```
627
-// Blocks that need blank line first
628
-// - HR   ---   - - -   ***   * * * * * *
629
-// - Lists
630
-// - Table
631
-// - Code block   [4+spaces]code
632
-// - Definition list   term\n: definition\n: alternate def
633
-// Unknown blocks
634
-// - Footnotes   some text[^1]   [^1]: first footnote content
635
-// - Abbreviations   *[HTML]: Hyper Text
636
-// Inline styles
637
-// - Links
638
-// - Italic
639
-// - Bold
640
-// - `code`
641
-// - Strikethrough
642
-// - Images   ![alt text](url){.cssclass}
643
-// - Literals   \*
682
+class _MDHTMLTag {
683
+	/** @var {String} */
684
+	fullTag;
685
+	/** @var {String} */
686
+	tagName;
687
+	/** @var {Boolean} */
688
+	isCloser;
689
+	/** @var {Object} */
690
+	attributes;
691
+
692
+	/**
693
+	 * @param {String} fullTag
694
+	 * @param {String} tagName
695
+	 * @param {Boolean} isCloser
696
+	 * @param {Object} attributes
697
+	 */
698
+	constructor(fullTag, tagName, isCloser, attributes) {
699
+		this.fullTag = fullTag;
700
+		this.tagName = tagName;
701
+		this.isCloser = isCloser;
702
+		this.attributes = attributes;
703
+	}
704
+}
705
+
644 706
 class _MDState {
645 707
 	/** @var {String[]} */
646 708
 	lines = [];
@@ -753,11 +815,316 @@ class Markdown {
753 815
 		return null;
754 816
 	}
755 817
 
818
+	static #baseURLRegex = /(?:(?:(?:[a-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[a-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[a-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/i;
819
+	static #baseEmailRegex = /(?:(?:[^<>()\[\]\\.,;:\s@"]+(?:\.[^<>()\[\]\\.,;:\s@"]+)*)|(?:".+"))@(?:(?:\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(?:(?:[a-z\-0-9]+\.)+[a-z]{2,}))/i;
820
+
821
+	static #footnoteWithTitleRegex = /^\[\^\s*([^\]"]+?)\s+"(.*?)"\s*\]/;
822
+	static #footnoteRegex = /^\[\^\s*([^\]]+?)\s*\]/;
823
+	static #labelRegex = /^\[(.*?)\]/;
824
+	static #urlWithTitleRegex = /^\((\S+?)\s+"(.*?)"\)/i;
825
+	static #urlRegex = /^\((\S+?)\)/i;
826
+	static #emailWithTitleRegex = new RegExp("^\\(\\s*(" + this.#baseEmailRegex.source + ")\\s+\"(.*?)\"\\s*\\)", "i");
827
+	static #emailRegex = new RegExp("^\\(\\s*(" + this.#baseEmailRegex.source + ")\\s*\\)", "i");
828
+	static #simpleURLRegex = new RegExp("^<" + this.#baseURLRegex.source + ">", "i");
829
+	static #simpleEmailRegex = new RegExp("^<" + this.#baseEmailRegex.source + ">", "i");
830
+
831
+	/**
832
+	 * @param {String} line
833
+	 * @returns {_MDHTMLTag|null} HTML tag if possible
834
+	 */
835
+	static #htmlTag(line) {
836
+		let expectOpenBracket = 0;
837
+		let expectCloserOrName = 1;
838
+		let expectName = 2;
839
+		let expectAttributeNameOrEnd = 3;
840
+		let expectEqualsOrAttributeOrEnd = 4;
841
+		let expectAttributeValue = 5;
842
+		let expectCloseBracket = 6;
843
+
844
+		var isCloser = false;
845
+		var tagName = '';
846
+		var attributeName = '';
847
+		var attributeValue = '';
848
+		var attributeQuote = null;
849
+		var attributes = {};
850
+		var fullTag = null;
851
+		let endAttribute = function() {
852
+			if (attributeName.length > 0) {
853
+				if (attributeValue.length > 0 || attributeQuote) {
854
+					attributes[attributeName] = attributeValue;
855
+				} else {
856
+					attributes[attributeName] = true;
857
+				}
858
+			}
859
+			attributeName = '';
860
+			attributeValue = '';
861
+			attributeQuote = null;
862
+		};
863
+
864
+		var expect = expectOpenBracket;
865
+		for (var p = 0; p < line.length && fullTag === null; p++) {
866
+			let ch = line.substring(p, p + 1);
867
+			switch (expect) {
868
+				case expectOpenBracket:
869
+					if (ch != '<') return null;
870
+					expect = expectCloserOrName;
871
+					break;
872
+				case expectCloserOrName:
873
+					if (ch == '/') {
874
+						isCloser = true;
875
+					} else {
876
+						p--;
877
+					}
878
+					expect = expectName;
879
+					break;
880
+				case expectName:
881
+					if (tagName.length == 0) {
882
+						if (/[a-z]/i.exec(ch) === null) return null;
883
+						tagName += ch;
884
+					} else {
885
+						if (/[a-z0-9]/i.exec(ch)) {
886
+							tagName += ch;
887
+						} else {
888
+							p--;
889
+							expect = (isCloser) ? expectCloseBracket : expectAttributeNameOrEnd;
890
+						}
891
+					}
892
+					break;
893
+				case expectAttributeNameOrEnd:
894
+					if (attributeName.length == 0) {
895
+						if (/\s/.exec(ch)) {
896
+							// skip whitespace
897
+						} else if (ch == '/') {
898
+							expect = expectCloseBracket;
899
+						} else if (ch == '>') {
900
+							fullTag = line.substring(0, p + 1);
901
+							break;
902
+						} else if (/[a-z0-9-]/i.exec(ch)) {
903
+							attributeName += ch;
904
+						} else {
905
+							return null;
906
+						}
907
+					} else if (/\s/.exec(ch)) {
908
+						expect = expectEqualsOrAttributeOrEnd;
909
+					} else if (ch == '/') {
910
+						endAttribute();
911
+						expect = expectCloseBracket;
912
+					} else if (ch == '>') {
913
+						endAttribute();
914
+						fullTag = line.substring(0, p + 1);
915
+						break;
916
+					} else if (ch == '=') {
917
+						expect = expectAttributeValue;
918
+					} else if (/[a-z0-9-]/i.exec(ch)) {
919
+						attributeName += ch;
920
+					} else {
921
+						return null;
922
+					}
923
+					break;
924
+				case expectEqualsOrAttributeOrEnd:
925
+					if (ch == '=') {
926
+						expect = expectAttributeValue;
927
+					} else if (/\s/.exec(ch)) {
928
+						// skip whitespace
929
+					} else if (ch == '/') {
930
+						expect = expectCloseBracket;
931
+					} else if (ch == '>') {
932
+						fullTag = line.substring(0, p + 1);
933
+						break;
934
+					} else if (/[a-z]/i.exec(ch)) {
935
+						endAttribute();
936
+						expect = expectAttributeNameOrEnd;
937
+						p--;
938
+					}
939
+					break;
940
+				case expectAttributeValue:
941
+					if (attributeValue.length == 0) {
942
+						if (attributeQuote === null) {
943
+							if (/\s/.exec(ch)) {
944
+								// skip whitespace
945
+							} else if (ch == '"' || ch == "'") {
946
+								attributeQuote = ch;
947
+							} else {
948
+								attributeQuote = '';
949
+								p--;
950
+							}
951
+						} else {
952
+							if (ch === attributeQuote) {
953
+								// Empty string
954
+								endAttribute();
955
+								expect = expectAttributeNameOrEnd;
956
+							} else if (attributeQuote === '' && (ch == '/' || ch == '>')) {
957
+								return null;
958
+							} else {
959
+								attributeValue += ch;
960
+							}
961
+						}
962
+					} else {
963
+						if (ch === attributeQuote) {
964
+							endAttribute();
965
+							expect = expectAttributeNameOrEnd;
966
+						} else if (attributeQuote === '' && /\s/.exec(ch)) {
967
+							endAttribute();
968
+							expect = expectAttributeNameOrEnd;
969
+						} else {
970
+							attributeValue += ch;
971
+						}
972
+					}
973
+					break;
974
+				case expectCloseBracket:
975
+					if (/\s/.exec(ch)) {
976
+						// ignore whitespace
977
+					} else if (ch == '>') {
978
+						fullTag = line.substring(0, p + 1);
979
+						break;
980
+					}
981
+					break;
982
+			}
983
+		}
984
+		if (fullTag === null) return null;
985
+		endAttribute();
986
+		return new _MDHTMLTag(fullTag, tagName, isCloser, attributes);
987
+	}
988
+
989
+	/**
990
+	 * @param {String} line
991
+	 * @returns {_MDToken[]} tokens
992
+	 */
993
+	static #tokenize(line) {
994
+		var tokens = [];
995
+		var text = '';
996
+		var expectLiteral = false;
997
+		var groups = null;
998
+		var tag = null;
999
+		const endText = function() {
1000
+			if (text.length == 0) return;
1001
+			let textGroups = /^(\s*)(?:(\S|\S.*\S)(\s*?))?$/.exec(text);
1002
+			if (textGroups !== null) {
1003
+				if (textGroups[1].length > 0) {
1004
+					tokens.push(new _MDToken(textGroups[1], _MDTokenType.Whitespace, textGroups[1]));
1005
+				}
1006
+				if (textGroups[2] !== undefined && textGroups[2].length > 0) {
1007
+					tokens.push(new _MDToken(textGroups[2], _MDTokenType.Text, textGroups[2]));
1008
+				}
1009
+				if (textGroups[3] !== undefined && textGroups[3].length > 0) {
1010
+					tokens.push(new _MDToken(textGroups[3], _MDTokenType.Whitespace, textGroups[3]));
1011
+				}
1012
+			} else {
1013
+				tokens.push(new _MDToken(text, _MDTokenType.Text, text));
1014
+			}
1015
+			text = '';
1016
+		}
1017
+		for (var p = 0; p < line.length; p++) {
1018
+			let ch = line.substring(p, p + 1);
1019
+			let remainder = line.substring(p);
1020
+			if (expectLiteral) {
1021
+				// TODO: Check for only allowable escapable characters
1022
+				text += ch;
1023
+				continue;
1024
+			}
1025
+			if (ch == '\\') {
1026
+				expectLiteral = true;
1027
+			} else if (ch == '*') {
1028
+				endText();
1029
+				tokens.push(new _MDToken(ch, _MDTokenType.Asterisk));
1030
+			} else if (ch == '_') {
1031
+				endText();
1032
+				tokens.push(new _MDToken(ch, _MDTokenType.Underscore));
1033
+			} else if (ch == '`') {
1034
+				endText();
1035
+				tokens.push(new _MDToken(ch, _MDTokenType.Backtick));
1036
+			} else if (ch == '~') {
1037
+				endText();
1038
+				tokens.push(new _MDToken(ch, _MDTokenType.Tilde));
1039
+			} else if (ch == '!') {
1040
+				endText();
1041
+				tokens.push(new _MDToken(ch, _MDTokenType.Bang));
1042
+
1043
+			} else if (groups = this.#footnoteWithTitleRegex.exec(remainder)) {
1044
+				// Footnote with title   [^1 "Foo"]
1045
+				endText();
1046
+				tokens.push(new _MDToken(groups[0], _MDTokenType.Footnote, groups[1], groups[2]));
1047
+				p += groups[0].length - 1;
1048
+			} else if (groups = this.#footnoteRegex.exec(remainder)) {
1049
+				// Footnote without title   [^1]
1050
+				endText();
1051
+				tokens.push(new _MDToken(groups[0], _MDTokenType.Footnote, groups[1]));
1052
+				p += groups[0].length - 1;
1053
+			} else if (groups = this.#labelRegex.exec(remainder)) {
1054
+				// Label/ref for link/image   [Foo]
1055
+				endText();
1056
+				tokens.push(new _MDToken(groups[0], _MDTokenType.Label, groups[1]));
1057
+				p += groups[0].length - 1;
1058
+			} else if (groups = this.#urlWithTitleRegex.exec(remainder)) {
1059
+				// URL with title   (https://foo "Bar")
1060
+				endText();
1061
+				tokens.push(new _MDToken(groups[0], _MDTokenType.URL, groups[1], groups[2]));
1062
+				p += groups[0].length - 1;
1063
+			} else if (groups = this.#emailWithTitleRegex.exec(remainder)) {
1064
+				// Email address with title   (user@example.com  "Foo")
1065
+				endText();
1066
+				tokens.push(new _MDToken(groups[0], _MDTokenType.Email, groups[1]));
1067
+				p += groups[0].length - 1;
1068
+			} else if (groups = this.#urlRegex.exec(remainder)) {
1069
+				// URL   (https://example.com)
1070
+				endText();
1071
+				tokens.push(new _MDToken(groups[0], _MDTokenType.URL, groups[1]));
1072
+				p += groups[0].length - 1;
1073
+			} else if (groups = this.#emailRegex.exec(remainder)) {
1074
+				// Email   (user@example.com)
1075
+				endText();
1076
+				tokens.push(new _MDToken(groups[0], _MDTokenType.Email, groups[1]));
1077
+				p += groups[0].length - 1;
1078
+			} else if (groups = this.#simpleURLRegex.exec(remainder)) {
1079
+				// Simple URL   <https://example.com>
1080
+				endText();
1081
+				tokens.push(new _MDToken(groups[0], _MDTokenType.SimpleLink, groups[1]));
1082
+				p += groups[0].length - 1;
1083
+			} else if (groups = this.#simpleEmailRegex.exec(remainder)) {
1084
+				// Simple email   <user@example.com>
1085
+				endText();
1086
+				tokens.push(new _MDToken(groups[0], _MDTokenType.SimpleEmail, groups[1]));
1087
+				p += groups[0].length - 1;
1088
+			} else if (tag = this.#htmlTag(remainder)) {
1089
+				endText();
1090
+				tokens.push(new _MDToken(tag.fullTag, _MDTokenType.HTMLTag, tag.fullTag, null, tag));
1091
+				p += tag.fullTag.length - 1;
1092
+			} else {
1093
+				text += ch;
1094
+			}
1095
+		}
1096
+		endText();
1097
+		return tokens;
1098
+	}
1099
+
756 1100
 	/**
757 1101
 	 * @param {_MDState} state
1102
+	 * @param {String} line
758 1103
 	 * @returns {_MDBlock|null}
759 1104
 	 */
760 1105
 	static #readInline(state, line) {
1106
+		let tokens = this.#tokenize(line);
1107
+		var spans = [];
1108
+// - Link   [text](https://url)
1109
+// - Emphasis   *emphasized* or _emphasized_ or /emphasized/
1110
+// - Strong   **bold** or __bold__
1111
+// - Inline code   `code` or ``code``
1112
+// - Strikethrough   ~strike~ or ~~strike~~
1113
+// - Image   ![alt text](https://image){.cssclass}
1114
+// - Footnote (inline)   [^1]: footnote text
1115
+// - Abbreviation (inline)
1116
+//
1117
+// Tokens:
1118
+// _
1119
+// *
1120
+// /
1121
+// ~
1122
+// [label] or []
1123
+// (url)
1124
+// !
1125
+// <>
1126
+// `
1127
+
761 1128
 		return new _MDInline(line);
762 1129
 	}
763 1130
 

Loading…
Cancelar
Guardar