Explorar el Código

HTML sanitizing added

main
Rocketsoup hace 1 año
padre
commit
17c0a98206
Se han modificado 4 ficheros con 364 adiciones y 9 borrados
  1. 360
    5
      js/markdown.js
  2. 1
    1
      js/markdown.min.js
  3. 1
    1
      js/spreadsheet.js
  4. 2
    2
      testjs.html

+ 360
- 5
js/markdown.js Ver fichero

@@ -1,6 +1,3 @@
1
-// FIXME: Strikethrough
2
-// FIXME: Modifiers not applying
3
-
4 1
 class MDTokenType {
5 2
 	static Text = new MDTokenType('Text');
6 3
 	static Whitespace = new MDTokenType('Whitespace');
@@ -614,6 +611,9 @@ class MDState {
614 611
 	 */
615 612
 	#referenceToTitle = {};
616 613
 
614
+	/** @type {MDHTMLFilter} */
615
+	tagFilter;
616
+
617 617
 	static #textWhitespaceRegex = /^(\s*)(?:(\S|\S.*\S)(\s*?))?$/; // 1=leading WS, 2=text, 3=trailing WS
618 618
 
619 619
 	/**
@@ -627,12 +627,14 @@ class MDState {
627 627
 		config=null,
628 628
 		readersByBlockPriority=null,
629 629
 		readersByTokenPriority=null,
630
-		readersBySubstitutePriority=null) {
630
+		readersBySubstitutePriority=null,
631
+		tagFilter=null) {
631 632
 		this.#lines = lines;
632 633
 		this.config = config;
633 634
 		this.#readersByBlockPriority = readersByBlockPriority
634 635
 		this.#readersByTokenPriority = readersByTokenPriority
635 636
 		this.#readersBySubstitutePriority = readersBySubstitutePriority
637
+		this.tagFilter = tagFilter;
636 638
 	}
637 639
 
638 640
 	/**
@@ -839,11 +841,13 @@ class MDState {
839 841
 
840 842
 		// Convert any remaining tokens to nodes, apply CSS modifiers.
841 843
 		var lastNode = null;
844
+		const me = this;
842 845
 		nodes = nodes.map(function(node) {
843 846
 			if (node instanceof MDToken) {
844 847
 				/** @type {MDToken} */
845 848
 				const token = node;
846 849
 				if (token.type == MDTokenType.Modifier && lastNode) {
850
+					me.root.tagFilter.scrubModifier(token.modifier);
847 851
 					token.modifier.applyTo(lastNode);
848 852
 					lastNode = null;
849 853
 					return new MDTextNode('');
@@ -2282,6 +2286,8 @@ class MDHTMLTagReader extends MDReader {
2282 2286
 	readToken(state, line) {
2283 2287
 		const tag = MDHTMLTag.fromLineStart(line)
2284 2288
 		if (tag === null) return null;
2289
+		if (!state.root.tagFilter.isValidTagName(tag.tagName)) return null;
2290
+		state.root.tagFilter.scrubTag(tag);
2285 2291
 		return new MDToken(tag.original, MDTokenType.HTMLTag, null, null, tag)
2286 2292
 	}
2287 2293
 
@@ -3035,6 +3041,348 @@ class MDHTMLTagNode extends MDInlineNode {
3035 3041
 // -- Other -----------------------------------------------------------------
3036 3042
 
3037 3043
 
3044
+/**
3045
+ * Helps to reject unapproved HTML, tag attributes, and CSS.
3046
+ */
3047
+class MDHTMLFilter {
3048
+	/**
3049
+	 * Mapping of permitted lowercase tag names to objects containing allowable
3050
+	 * attributes for those tags. Does not need to include those attributes
3051
+	 * defined in `allowableGlobalAttributes`.
3052
+	 *
3053
+	 * Values are objects with allowable lowercase attribute names mapped to
3054
+	 * allowable value patterns. A `*` means any value is acceptable. Multiple
3055
+	 * allowable values can be joined together with `|`. These special symbols
3056
+	 * represent certain kinds of values and can be used in combination or in
3057
+	 * place of literal values.
3058
+	 *
3059
+	 * - `{classlist}`: A list of legal CSS classnames, separated by spaces
3060
+	 * - `{int}`: An integer
3061
+	 * - `{none}`: No value (an attribute with no `=` or value, like `checked`)
3062
+	 * - `{style}`: One or more CSS declarations, separated by semicolons (simple
3063
+	 *   `key: value;` syntax only)
3064
+	 * - `{url}`: A URL
3065
+	 * @type {object}
3066
+	 */
3067
+	allowableTags = {
3068
+		'address': {
3069
+			'cite': '{url}',
3070
+		},
3071
+		'h1': {},
3072
+		'h2': {},
3073
+		'h3': {},
3074
+		'h4': {},
3075
+		'h5': {},
3076
+		'h6': {},
3077
+		'blockquote': {},
3078
+		'dl': {},
3079
+		'dt': {},
3080
+		'dd': {},
3081
+		'div': {},
3082
+		'hr': {},
3083
+		'ul': {},
3084
+		'ol': {
3085
+			'start': '{int}',
3086
+			'type': 'a|A|i|I|1',
3087
+		},
3088
+		'li': {
3089
+			'value': '{int}',
3090
+		},
3091
+		'p': {},
3092
+		'pre': {},
3093
+		'table': {},
3094
+		'thead': {},
3095
+		'tbody': {},
3096
+		'tfoot': {},
3097
+		'tr': {},
3098
+		'td': {},
3099
+		'th': {},
3100
+		'a': {
3101
+			'href': '{url}',
3102
+			'target': '*',
3103
+		},
3104
+		'abbr': {},
3105
+		'b': {},
3106
+		'br': {},
3107
+		'cite': {},
3108
+		'code': {},
3109
+		'data': {
3110
+			'value': '*',
3111
+		},
3112
+		'dfn': {},
3113
+		'em': {},
3114
+		'i': {},
3115
+		'kbd': {},
3116
+		'mark': {},
3117
+		'q': {
3118
+			'cite': '{url}',
3119
+		},
3120
+		's': {},
3121
+		'samp': {},
3122
+		'small': {},
3123
+		'span': {},
3124
+		'strong': {},
3125
+		'sub': {},
3126
+		'sup': {},
3127
+		'time': {
3128
+			'datetime': '*',
3129
+		},
3130
+		'u': {},
3131
+		'var': {},
3132
+		'wbr': {},
3133
+		'img': {
3134
+			'alt': '*',
3135
+			'href': '{url}',
3136
+		},
3137
+		'figure': {},
3138
+		'figcaption': {},
3139
+		'del': {},
3140
+		'ins': {},
3141
+		'details': {},
3142
+		'summary': {},
3143
+	};
3144
+
3145
+	/**
3146
+	 * Mapping of allowable lowercase global attributes to their permitted
3147
+	 * values. Uses same value pattern syntax as described in `allowableTags`.
3148
+	 * @type {object}
3149
+	 */
3150
+	allowableGlobalAttributes = {
3151
+		'class': '{classlist}',
3152
+		'data-*': '*',
3153
+		'dir': 'ltr|rtl|auto',
3154
+		'id': '*',
3155
+		'lang': '*',
3156
+		'style': '{style}',
3157
+		'title': '*',
3158
+		'translate': 'yes|no|{none}',
3159
+	};
3160
+
3161
+	/**
3162
+	 * Mapping of allowable CSS style names to their allowable value patterns.
3163
+	 * @type {object}
3164
+	 */
3165
+	allowableStyleKeys = {
3166
+		'background-color': '{color}',
3167
+		'color': '{color}',
3168
+	};
3169
+
3170
+	/**
3171
+	 * Scrubs all forbidden attributes from an HTML tag.
3172
+	 *
3173
+	 * @param {MDHTMLTag} tag - HTML tag
3174
+	 */
3175
+	scrubTag(tag) {
3176
+		for (const name of Object.keys(tag.attributes)) {
3177
+			if (!this.isValidAttributeName(tag.tagName, name)) {
3178
+				delete tag.attributes[name];
3179
+			}
3180
+			if (!this.isValidAttributeValue(tag.tagName, name, tag.attributes[name])) {
3181
+				delete tag.attributes[name];
3182
+			}
3183
+		}
3184
+	}
3185
+
3186
+	/**
3187
+	 * Scrubs all forbidden attributes from an HTML modifier.
3188
+	 *
3189
+	 * @param {MDTagModifier} modifier
3190
+	 * @param {string|null} tagName - HTML tag name, if known, otherwise only
3191
+	 *   global attributes will be permitted
3192
+	 */
3193
+	scrubModifier(modifier, tagName) {
3194
+		if (modifier.cssClasses.length > 0) {
3195
+			const classList = modifier.cssClasses.join(' ');
3196
+			if (!this.isValidAttributeValue(tagName, 'class', classList)) {
3197
+				modifier.cssClasses = [];
3198
+			}
3199
+		}
3200
+		if (modifier.cssId !== null) {
3201
+			if (!this.isValidAttributeValue(tagName, 'id', modifier.cssId)) {
3202
+				modifier.cssId = null;
3203
+			}
3204
+		}
3205
+		if (!this.isValidAttributeName(tagName, 'style')) {
3206
+			modifier.cssStyles = {};
3207
+		} else {
3208
+			for (const key of Object.keys(modifier.cssStyles)) {
3209
+				const val = modifier.cssStyles[key];
3210
+				if (!this.isValidStyleValue(key, val)) {
3211
+					delete modifier.cssStyles[key];
3212
+				}
3213
+			}
3214
+		}
3215
+		for (const key of Object.keys(modifier.attributes)) {
3216
+			const val = modifier.attributes[key];
3217
+			if (!this.isValidAttributeValue(tagName, key, val)) {
3218
+				delete modifier.attributes[key];
3219
+			}
3220
+		}
3221
+	}
3222
+
3223
+	/**
3224
+	 * Tests if an HTML tag name is permitted.
3225
+	 *
3226
+	 * @param {string} tagName
3227
+	 * @returns {boolean}
3228
+	 */
3229
+	isValidTagName(tagName) {
3230
+		return this.allowableTags[tagName.toLowerCase()] !== undefined;
3231
+	}
3232
+
3233
+	/**
3234
+	 * Tests if an HTML attribute name is permitted.
3235
+	 *
3236
+	 * @param {string|null} tagName - HTML tag name or null to only check global
3237
+	 *   attributes
3238
+	 * @param {string} attributeName - attribute name
3239
+	 * @returns {boolean}
3240
+	 */
3241
+	isValidAttributeName(tagName, attributeName) {
3242
+		const lcAttributeName = attributeName.toLowerCase();
3243
+		if (this.allowableGlobalAttributes[lcAttributeName] !== undefined) {
3244
+			return true;
3245
+		}
3246
+		for (const pattern in this.allowableGlobalAttributes) {
3247
+			if (pattern.endsWith('*') && lcAttributeName.startsWith(pattern.substring(0, pattern.length - 1))) {
3248
+				return true;
3249
+			}
3250
+		}
3251
+		if (tagName === null) return false;
3252
+		const lcTagName = tagName.toLowerCase();
3253
+		const tagAttributes = this.allowableTags[lcTagName];
3254
+		if (tagAttributes) {
3255
+			return tagAttributes[lcAttributeName] !== undefined;
3256
+		}
3257
+		return false;
3258
+	}
3259
+
3260
+	/**
3261
+	 * Tests if an attribute value is allowable.
3262
+	 *
3263
+	 * @param {string|null} tagName
3264
+	 * @param {string} attributeName
3265
+	 * @param {string} attributeValue
3266
+	 * @returns {boolean}
3267
+	 */
3268
+	isValidAttributeValue(tagName, attributeName, attributeValue) {
3269
+		const lcAttributeName = attributeName.toLowerCase();
3270
+		const globalPattern = this.allowableGlobalAttributes[attributeName.toLowerCase()];
3271
+		if (globalPattern !== undefined) {
3272
+			return this.#attributeValueMatchesPattern(attributeValue, globalPattern);
3273
+		}
3274
+		for (const namePattern in this.allowableGlobalAttributes) {
3275
+			if (namePattern.endsWith('*') && lcAttributeName.startsWith(namePattern.substring(0, namePattern.length - 1))) {
3276
+				return this.#attributeValueMatchesPattern(attributeValue, this.allowableGlobalAttributes[namePattern]);
3277
+			}
3278
+		}
3279
+		if (tagName === null) return false;
3280
+		const lcTagName = tagName.toLowerCase();
3281
+		const tagAttributes = this.allowableTags[lcTagName];
3282
+		if (tagAttributes === undefined) return false;
3283
+		const valuePattern = tagAttributes[lcAttributeName];
3284
+		if (valuePattern === undefined) return false;
3285
+		return this.#attributeValueMatchesPattern(attributeValue, valuePattern);
3286
+	}
3287
+
3288
+	static #permissiveURLRegex = /^\S+$/;
3289
+	static #integerRegex = /^[\-]?\d+$/;
3290
+	static #classListRegex = /^-?[_a-zA-Z]+[_a-zA-Z0-9-]*(?:\s+-?[_a-zA-Z]+[_a-zA-Z0-9-]*)*$/;
3291
+
3292
+	/**
3293
+	 * @param {string} value
3294
+	 * @param {string} pattern
3295
+	 * @returns {boolean}
3296
+	 */
3297
+	#attributeValueMatchesPattern(value, pattern) {
3298
+		const options = pattern.split('|');
3299
+		for (const option of options) {
3300
+			switch (option) {
3301
+				case '*':
3302
+					return true;
3303
+				case '{classlist}':
3304
+					if (MDHTMLFilter.#classListRegex.exec(value)) return true;
3305
+					break;
3306
+				case '{int}':
3307
+					if (MDHTMLFilter.#integerRegex.exec(value)) return true;
3308
+					break;
3309
+				case '{none}':
3310
+					if (value === true) return true;
3311
+					break;
3312
+				case '{style}':
3313
+					if (this.isValidStyleDeclaration(value)) return true;
3314
+					break;
3315
+				case '{url}':
3316
+					if (MDHTMLFilter.#permissiveURLRegex.exec(value)) return true;
3317
+					break;
3318
+				default:
3319
+					if (value === option) return true;
3320
+					break;
3321
+			}
3322
+		}
3323
+		return false;
3324
+	}
3325
+
3326
+	/**
3327
+	 * Tests if a string of one or more style `key: value;` declarations is
3328
+	 * fully allowable.
3329
+	 *
3330
+	 * @param {string} styles
3331
+	 * @returns {boolean}
3332
+	 */
3333
+	isValidStyleDeclaration(styles) {
3334
+		const settings = styles.split(';');
3335
+		for (const setting of settings) {
3336
+			if (setting.trim().length == 0) continue;
3337
+			const parts = setting.split(':');
3338
+			if (parts.length != 2) return false;
3339
+			const name = parts[0].trim();
3340
+			if (!this.isValidStyleKey(name)) return false;
3341
+			const value = parts[1].trim();
3342
+			if (!this.isValidStyleValue(name, value)) return false;
3343
+		}
3344
+		return true;
3345
+	}
3346
+
3347
+	/**
3348
+	 * Tests if a CSS style key is allowable.
3349
+	 *
3350
+	 * @param {string} key - CSS key
3351
+	 * @returns {boolean}
3352
+	 */
3353
+	isValidStyleKey(key) {
3354
+		return this.allowableStyleKeys[key] !== undefined;
3355
+	}
3356
+
3357
+	/**
3358
+	 * Tests if a CSS style value is allowable.
3359
+	 *
3360
+	 * @param {string} key
3361
+	 * @param {string} value
3362
+	 * @returns {boolean}
3363
+	 */
3364
+	isValidStyleValue(key, value) {
3365
+		const pattern = this.allowableStyleKeys[key];
3366
+		if (pattern === undefined) return false;
3367
+		const options = pattern.split('|');
3368
+		for (const option of options) {
3369
+			switch (option) {
3370
+				case '{color}':
3371
+					if (this.#isValidCSSColor(value)) return true;
3372
+				default:
3373
+					if (value === option) return true;
3374
+			}
3375
+		}
3376
+		return false;
3377
+	}
3378
+
3379
+	static #styleColorRegex = /^#[0-9a-f]{3}(?:[0-9a-f]{3})?$|^[a-zA-Z]+$/i;
3380
+
3381
+	#isValidCSSColor(value) {
3382
+		return MDHTMLFilter.#styleColorRegex.exec(value) !== null;
3383
+	}
3384
+}
3385
+
3038 3386
 class MDHTMLTag {
3039 3387
 	/** @type {string} */
3040 3388
 	original;
@@ -3434,6 +3782,12 @@ class Markdown {
3434 3782
 	 */
3435 3783
 	config;
3436 3784
 
3785
+	/**
3786
+	 * Filter for what non-markdown HTML is permitted. HTML generated as a
3787
+	 * result of markdown is unaffected.
3788
+	 */
3789
+	tagFilter = new MDHTMLFilter();
3790
+
3437 3791
 	#readers;
3438 3792
 
3439 3793
 	/** @type {MDReader[]} */
@@ -3469,7 +3823,8 @@ class Markdown {
3469 3823
 			this.config,
3470 3824
 			this.#readersByBlockPriority,
3471 3825
 			this.#readersByTokenPriority,
3472
-			this.#readersBySubstitutePriority);
3826
+			this.#readersBySubstitutePriority,
3827
+			this.tagFilter);
3473 3828
 		for (const reader of this.#readers) {
3474 3829
 			reader.preProcess(state);
3475 3830
 		}

+ 1
- 1
js/markdown.min.js
La diferencia del archivo ha sido suprimido porque es demasiado grande
Ver fichero


+ 1
- 1
js/spreadsheet.js Ver fichero

@@ -2889,7 +2889,7 @@ class SpreadsheetCell {
2889 2889
 /**
2890 2890
  * Integration with Markdown. Adding this block reader to a parser will run a
2891 2891
  * post-process step on any tables in the document tree. Must be used with
2892
- * `MDTableBlockReader`. Tables without at least one formula will not be altered.
2892
+ * `MDTableReader`. Tables without at least one formula will not be altered.
2893 2893
  */
2894 2894
 class MDSpreadsheetReader extends MDReader {
2895 2895
 	postProcess(state, nodes) {

+ 2
- 2
testjs.html Ver fichero

@@ -870,8 +870,8 @@
870 870
 				}
871 871
 
872 872
 				test_htmlTags() {
873
-					let markdown = 'Lorem <strong title="value" foo=\'with " quote\' bar="with \' apostrophe" attr=unquoted checked>ipsum</strong> dolor';
874
-					let expected = 'Lorem <strong title="value" foo="with &quot; quote" bar="with \' apostrophe" attr="unquoted" checked>ipsum</strong> dolor';
873
+					let markdown = 'Lorem <strong title=\'with " quote\' id="with \' apostrophe" lang=unquoted translate forbidden="true">ipsum</strong> dolor';
874
+					let expected = 'Lorem <strong title="with &quot; quote" id="with \' apostrophe" lang="unquoted" translate>ipsum</strong> dolor';
875 875
 					let actual = this.md(markdown);
876 876
 					this.assertEqual(actual, expected);
877 877
 				}

Loading…
Cancelar
Guardar