Parcourir la source

Adding more utils. Pattern now supports content.plain and content.markdown

master
Rocketsoup il y a 4 ans
Parent
révision
cc06573e54

+ 11
- 10
patterns.md Voir le fichier

@@ -41,16 +41,17 @@ Available actions:
41 41
 The simplest expression just consists of a message field, a comparison operator, and a value to compare it to. For example:
42 42
 
43 43
 ```
44
-content contains "forbidden"
44
+content.plain contains "forbidden"
45 45
 ```
46 46
 
47
-The message will match if its `content` `contains` the word `"forbidden"`.
47
+The message will match if its `content.plain` `contains` the word `"forbidden"`.
48 48
 
49 49
 The available operators and type of value depends on the field being accessed.
50 50
 
51 51
 #### Fields
52 52
 
53
-* `content` - The text of the message. Note this is in raw Discord markdown, so it may contain underscores, emote names, and mention codes. Available operators: `==`, `!=`, `contains`, `!contains`, `matches`, `!matches`. Comparison value must be a quoted string.
53
+* `content.plain` - The plain text of the message. All markdown formatting is removed, and mentions look like the `@Username` text name that gets displayed.
54
+* `content.markdown` - The raw markdown of the message. This contains all markdown characters, and mentions are of the `<@!0000000>` form. Available operators: `==`, `!=`, `contains`, `!contains`, `matches`, `!matches`. Comparison value must be a quoted string.
54 55
 * `author` - Who sent the message. Available operators: `==`, `!=`. Comparison value must be a user mention (an @ that Discord will tab-complete for you).
55 56
 * `author.id` - The numeric ID of the user who sent the message. Available operators: `==`, `!=`. Comparison value must be a numeric user ID.
56 57
 * `author.name` - The username of the author. Available operators: `==`, `!=`, `contains`, `!contains`, `matches`, `!matches`. Comparison value must be a quoted string.
@@ -84,7 +85,7 @@ Timespans consist of one or more pairs of a number and a unit letter ("d" for da
84 85
 Multiple expressions can be combined with "and" or "or". For example:
85 86
 
86 87
 ```
87
-content contains "alpha" and content contains "bravo"
88
+content.plain contains "alpha" and content.plain contains "bravo"
88 89
 ```
89 90
 
90 91
 This will only match messages that contain both "alpha" and "bravo" somewhere in the text.
@@ -92,7 +93,7 @@ This will only match messages that contain both "alpha" and "bravo" somewhere in
92 93
 More complex expressions can use parentheses to clarify the order of interpretation. For example:
93 94
 
94 95
 ```
95
-(content contains "foo" and author.joinage < 30m) or (content contains "bar" and author.joinage > 30m)
96
+(content.plain contains "foo" and author.joinage < 30m) or (content.plain contains "bar" and author.joinage > 30m)
96 97
 ```
97 98
 
98 99
 A message will match if it contains "foo" and the user joined in the last half hour, OR it will match if the message contains "bar" and the author joined more than a half hour ago.
@@ -100,7 +101,7 @@ A message will match if it contains "foo" and the user joined in the last half h
100 101
 Lastly, expressions can be inverted by prefixing a `!`. For example:
101 102
 
102 103
 ```
103
-!(content contains "me" and content contains "you")
104
+!(content.plain contains "me" and content.plain contains "you")
104 105
 ```
105 106
 
106 107
 This will only match messages that do not contain both "me" and "you". If it contains just "me" or just "you" or neither word then it will match. If both are present it will not match.
@@ -112,19 +113,19 @@ Here are examples of `add` commands:
112 113
 Automatically delete a banned word:
113 114
 
114 115
 ```
115
-$rb_pattern add "bad word" delete if content contains "darn"
116
+$rb_pattern add "bad word" delete if content.plain contains "darn"
116 117
 ```
117 118
 
118 119
 Ban anyone who posts a URL within the first 30 minutes of joining the server.
119 120
 
120 121
 ```
121
-$rb_pattern add "url spam" ban if author.joinage < 30m and (content contains "http://" or content contains "https://")
122
+$rb_pattern add "url spam" ban if author.joinage < 30m and (content.plain contains "http://" or content.plain contains "https://")
122 123
 ```
123 124
 
124 125
 Automatically reply to anyone asking when lunch is.
125 126
 
126 127
 ```
127
-$rb_pattern add "lunch" reply "Lunch is at noon." if content == "When is lunch?"
128
+$rb_pattern add "lunch" reply "Lunch is at noon." if content.plain == "When is lunch?"
128 129
 ```
129 130
 
130 131
 ## Grammar
@@ -151,7 +152,7 @@ $rb_pattern add "lunch" reply "Lunch is at noon." if content == "When is lunch?"
151 152
 
152 153
 `<or_expr>` ::= `<expression>` " or " `<expression>`
153 154
 
154
-`<field_name>` ::= "content" | "author" | "author.id" | "author.name" | "author.joinage"
155
+`<field_name>` ::= "content.plain" | "content.markdown" | "author" | "author.id" | "author.name" | "author.joinage"
155 156
 
156 157
 `<op>` ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "contains" | "!contains" | "matches" | "!matches"
157 158
 

+ 3
- 3
rocketbot/cogs/crosspostcog.py Voir le fichier

@@ -2,7 +2,7 @@
2 2
 Cog for detecting spam messages posted in multiple channels.
3 3
 """
4 4
 from datetime import datetime, timedelta
5
-from discord import Member, Message
5
+from discord import Member, Message, utils as discordutils
6 6
 from discord.ext import commands
7 7
 
8 8
 from config import CONFIG
@@ -163,7 +163,7 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
163 163
 		await self.__update_message_from_context(context)
164 164
 
165 165
 	async def __update_message_from_context(self, context: SpamContext) -> None:
166
-		first_spam_message = next(iter(context.spam_messages))
166
+		first_spam_message: Message = next(iter(context.spam_messages))
167 167
 		spam_count = len(context.spam_messages)
168 168
 		channel_count = len(context.unique_channels)
169 169
 		deleted_count = len(context.spam_messages)
@@ -171,7 +171,7 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
171 171
 		if message is None:
172 172
 			message = BotMessage(context.member.guild, '',
173 173
 				BotMessage.TYPE_MOD_WARNING, context)
174
-			message.quote = first_spam_message.content
174
+			message.quote = discordutils.remove_markdown(first_spam_message.clean_content)
175 175
 		if context.is_autobanned:
176 176
 			text = f'User {context.member.mention} auto banned for ' + \
177 177
 				f'posting the same message in {channel_count} channels. ' + \

+ 2
- 2
rocketbot/cogs/generalcog.py Voir le fichier

@@ -9,7 +9,7 @@ from discord.ext import commands
9 9
 
10 10
 from config import CONFIG
11 11
 from rocketbot.cogs.basecog import BaseCog, BotMessage
12
-from rocketbot.utils import parse_timedelta, describe_timedelta
12
+from rocketbot.utils import timedelta_from_str, describe_timedelta
13 13
 from rocketbot.storage import ConfigKey, Storage
14 14
 
15 15
 class GeneralCog(BaseCog, name='General'):
@@ -97,7 +97,7 @@ class GeneralCog(BaseCog, name='General'):
97 97
 				mention_author=False)
98 98
 			return
99 99
 		try:
100
-			age_delta: timedelta = parse_timedelta(age)
100
+			age_delta: timedelta = timedelta_from_str(age)
101 101
 		except ValueError:
102 102
 			await context.message.reply(
103 103
 				f'{CONFIG["failure_emoji"]} age must be a timespan, like "30s", "10m", "1h30m"',

+ 60
- 49
rocketbot/cogs/patterncog.py Voir le fichier

@@ -4,13 +4,15 @@ automated actions on them.
4 4
 """
5 5
 import re
6 6
 from abc import ABCMeta, abstractmethod
7
-from discord import Guild, Member, Message
7
+from discord import Guild, Member, Message, utils as discordutils
8 8
 from discord.ext import commands
9 9
 
10 10
 from config import CONFIG
11 11
 from rocketbot.cogs.basecog import BaseCog, BotMessage, BotMessageReaction
12
+from rocketbot.cogsetting import CogSetting
12 13
 from rocketbot.storage import Storage
13
-from rocketbot.utils import parse_timedelta
14
+from rocketbot.utils import is_user_id, str_from_quoted_str, timedelta_from_str, \
15
+	user_id_from_mention
14 16
 
15 17
 class PatternAction:
16 18
 	"""
@@ -50,8 +52,10 @@ class PatternSimpleExpression(PatternExpression):
50 52
 		self.value = value
51 53
 
52 54
 	def __field_value(self, message: Message):
53
-		if self.field == 'content':
55
+		if self.field in ('content.markdown', 'content'):
54 56
 			return message.content
57
+		if self.field == 'content.plain':
58
+			return discordutils.remove_markdown(message.clean_content)
55 59
 		if self.field == 'author':
56 60
 			return str(message.author.id)
57 61
 		if self.field == 'author.id':
@@ -119,7 +123,7 @@ class PatternCompoundExpression(PatternExpression):
119 123
 				if op.matches(message):
120 124
 					return True
121 125
 			return False
122
-		raise RuntimeError(f'Bad operator "{self.operator}"')
126
+		raise ValueError(f'Bad operator "{self.operator}"')
123 127
 
124 128
 	def __str__(self) -> str:
125 129
 		if self.operator == '!':
@@ -133,12 +137,32 @@ class PatternStatement:
133 137
 	A full message match statement. If a message matches the given expression,
134 138
 	the given actions should be performed.
135 139
 	"""
136
-	def __init__(self, name: str, actions: list, expression: PatternExpression, original: str):
140
+	def __init__(self,
141
+			name: str,
142
+			actions: list,
143
+			expression: PatternExpression,
144
+			original: str):
137 145
 		self.name = name
138 146
 		self.actions = list(actions)  # PatternAction[]
139 147
 		self.expression = expression
140 148
 		self.original = original
141 149
 
150
+	def to_json(self) -> dict:
151
+		"""
152
+		Returns a JSON representation of this statement.
153
+		"""
154
+		return {
155
+			'name': self.name,
156
+			'statement': self.original,
157
+		}
158
+
159
+	@classmethod
160
+	def from_json(cls, json: dict):
161
+		"""
162
+		Gets a PatternStatement from its JSON representation.
163
+		"""
164
+		return PatternCompiler.parse_statement(json['name'], json['statement'])
165
+
142 166
 class PatternContext:
143 167
 	"""
144 168
 	Data about a message that has matched a configured statement and what
@@ -157,33 +181,30 @@ class PatternCog(BaseCog, name='Pattern Matching'):
157 181
 	various critera. Patterns can be defined by mods for each guild.
158 182
 	"""
159 183
 
160
-	def __get_patterns(self, guild: Guild) -> dict:
161
-		patterns = Storage.get_state_value(guild, 'PatternCog.patterns')
184
+	SETTING_PATTERNS = CogSetting('patterns', None)
185
+
186
+	def __get_patterns(self, guild: Guild) -> dict[str, PatternStatement]:
187
+		"""
188
+		Returns a name -> PatternStatement lookup for the guild.
189
+		"""
190
+		patterns: dict[str, PatternStatement] = Storage.get_state_value(guild,
191
+			'PatternCog.patterns')
162 192
 		if patterns is None:
163
-			patterns = {}
164
-			patterns_encoded = Storage.get_config_value(guild, 'PatternCog.patterns')
165
-			if patterns_encoded:
166
-				for pe in patterns_encoded:
167
-					name = pe.get('name')
168
-					statement = pe.get('statement')
169
-					try:
170
-						ps = PatternCompiler.parse_statement(name, statement)
171
-						patterns[name] = ps
172
-					except PatternError as e:
173
-						self.log(guild, 'Error parsing saved statement ' + \
174
-							f'"{name}": "{e}" Statement: {statement}')
193
+			jsons: list[dict] = self.get_guild_setting(guild, self.SETTING_PATTERNS)
194
+			pattern_list: list[PatternStatement] = []
195
+			for json in jsons:
196
+				try:
197
+					pattern_list.append(PatternStatement.from_json(json))
198
+				except PatternError as e:
199
+					self.log(guild, f'Error decoding pattern "{json["name"]}": {e}')
200
+			patterns = { p.name:p for p in pattern_list}
175 201
 			Storage.set_state_value(guild, 'PatternCog.patterns', patterns)
176 202
 		return patterns
177 203
 
178 204
 	@classmethod
179
-	def __save_patterns(cls, guild: Guild, patterns: dict) -> None:
180
-		to_save = []
181
-		for name, statement in patterns.items():
182
-			to_save.append({
183
-				'name': name,
184
-				'statement': statement.original,
185
-			})
186
-		Storage.set_config_value(guild, 'PatternCog.patterns', to_save)
205
+	def __save_patterns(cls, guild: Guild, patterns: dict[str, PatternStatement]) -> None:
206
+		to_save: list[dict] = list(map(PatternStatement.to_json, patterns.values()))
207
+		cls.set_guild_setting(guild, cls.SETTING_PATTERNS, to_save)
187 208
 
188 209
 	@commands.Cog.listener()
189 210
 	async def on_message(self, message: Message) -> None:
@@ -200,12 +221,11 @@ class PatternCog(BaseCog, name='Pattern Matching'):
200 221
 			return
201 222
 
202 223
 		patterns = self.__get_patterns(message.guild)
203
-		for _, statement in patterns.items():
224
+		for statement in patterns.values():
204 225
 			if statement.expression.matches(message):
205 226
 				await self.__trigger_actions(message, statement)
206 227
 				break
207 228
 
208
-
209 229
 	async def __trigger_actions(self, message: Message, statement: PatternStatement) -> None:
210 230
 		context = PatternContext(message, statement)
211 231
 		should_alert_mods = False
@@ -239,7 +259,7 @@ class PatternCog(BaseCog, name='Pattern Matching'):
239 259
 					f'{action.arguments[0]}',
240 260
 					mention_author=False)
241 261
 				action_descriptions.append('Autoreplied')
242
-				self.log(message.guild, f'{message.author.name} autoreplied to')
262
+				self.log(message.guild, f'autoreplied to {message.author.name}')
243 263
 		bm = BotMessage(
244 264
 			message.guild,
245 265
 			f'User {message.author.name} tripped custom pattern ' + \
@@ -247,7 +267,7 @@ class PatternCog(BaseCog, name='Pattern Matching'):
247 267
 				('\n• '.join(action_descriptions)),
248 268
 			type=BotMessage.TYPE_MOD_WARNING if should_alert_mods else BotMessage.TYPE_INFO,
249 269
 			context=context)
250
-		bm.quote = message.content
270
+		bm.quote = discordutils.remove_markdown(message.clean_content)
251 271
 		await bm.set_reactions(BotMessageReaction.standard_set(
252 272
 			did_delete=context.is_deleted,
253 273
 			did_kick=context.is_kicked,
@@ -361,11 +381,14 @@ class PatternCompiler:
361 381
 	TYPE_TIMESPAN = 'timespan'
362 382
 
363 383
 	FIELD_TO_TYPE = {
364
-		'content': TYPE_TEXT,
384
+		'content.plain': TYPE_TEXT,
385
+		'content.markdown': TYPE_TEXT,
365 386
 		'author': TYPE_MEMBER,
366 387
 		'author.id': TYPE_ID,
367 388
 		'author.name': TYPE_TEXT,
368 389
 		'author.joinage': TYPE_TIMESPAN,
390
+
391
+		'content': TYPE_TEXT, # deprecated, use content.markdown or content.plain
369 392
 	}
370 393
 
371 394
 	ACTION_TO_ARGS = {
@@ -675,29 +698,17 @@ class PatternCompiler:
675 698
 		Converts a value token to its Python value. Raises ValueError on failure.
676 699
 		"""
677 700
 		if datatype == cls.TYPE_ID:
678
-			p = re.compile('^[0-9]+$')
679
-			if p.match(value) is None:
680
-				raise ValueError(f'Illegal id value "{value}"')
681
-			# Store it as a str so it can be larger than an int
701
+			if not is_user_id(value):
702
+				raise ValueError(f'Illegal user id value: {value}')
682 703
 			return value
683 704
 		if datatype == cls.TYPE_MEMBER:
684
-			p = re.compile('^<@!?([0-9]+)>$')
685
-			m = p.match(value)
686
-			if m is None:
687
-				raise ValueError('Illegal member value. Must be an @ mention.')
688
-			return m.group(1)
705
+			return user_id_from_mention(value)
689 706
 		if datatype == cls.TYPE_TEXT:
690
-			# Must be quoted.
691
-			if len(value) < 2 or \
692
-					value[0:1] not in cls.STRING_QUOTE_CHARS or \
693
-					value[-1:] not in cls.STRING_QUOTE_CHARS or \
694
-					value[0:1] != value[-1:]:
695
-				raise ValueError(f'Not a quoted string value: {value}')
696
-			return value[1:-1]
707
+			return str_from_quoted_str(value)
697 708
 		if datatype == cls.TYPE_INT:
698 709
 			return int(value)
699 710
 		if datatype == cls.TYPE_FLOAT:
700 711
 			return float(value)
701 712
 		if datatype == cls.TYPE_TIMESPAN:
702
-			return parse_timedelta(value)
713
+			return timedelta_from_str(value)
703 714
 		raise ValueError(f'Unhandled datatype {datatype}')

+ 2
- 2
rocketbot/cogs/urlspamcog.py Voir le fichier

@@ -3,7 +3,7 @@ Cog for detecting URLs posted by new users.
3 3
 """
4 4
 import re
5 5
 from datetime import timedelta
6
-from discord import Member, Message
6
+from discord import Member, Message, utils as discordutils
7 7
 from discord.ext import commands
8 8
 
9 9
 from config import CONFIG
@@ -120,7 +120,7 @@ class URLSpamCog(BaseCog, name='URL Spam'):
120 120
 					f'{join_age_str} after joining.',
121 121
 					type = BotMessage.TYPE_MOD_WARNING if needs_attention else BotMessage.TYPE_INFO,
122 122
 					context = context)
123
-			bm.quote = message.content
123
+			bm.quote = discordutils.remove_markdown(message.clean_content)
124 124
 			await bm.set_reactions(BotMessageReaction.standard_set(
125 125
 				did_delete=context.is_deleted,
126 126
 				did_kick=context.is_kicked,

+ 65
- 7
rocketbot/utils.py Voir le fichier

@@ -6,7 +6,7 @@ from datetime import datetime, timedelta
6 6
 from discord import Guild
7 7
 from discord.ext.commands import Cog, Group
8 8
 
9
-def parse_timedelta(s: str) -> timedelta:
9
+def timedelta_from_str(s: str) -> timedelta:
10 10
 	"""
11 11
 	Parses a timespan. Format examples:
12 12
 	"30m"
@@ -36,6 +36,25 @@ def parse_timedelta(s: str) -> timedelta:
36 36
 			seconds = scalar
37 37
 	return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
38 38
 
39
+def str_from_timedelta(td: timedelta) -> str:
40
+	"""
41
+	Encodes a timedelta as a str. E.g. "3d2h"
42
+	"""
43
+	d = td.days
44
+	h = td.seconds // 3600
45
+	m = (td.seconds // 60) % 60
46
+	s = td.seconds % 60
47
+	components = []
48
+	if d != 0:
49
+		components.append(f'{d}d')
50
+	if h != 0:
51
+		components.append(f'{h}h')
52
+	if m != 0:
53
+		components.append(f'{m}m')
54
+	if s != 0 or len(components) == 0:
55
+		components.append(f'{s}s')
56
+	return ''.join(components)
57
+
39 58
 def describe_timedelta(td: timedelta, max_components: int = 2) -> str:
40 59
 	"""
41 60
 	Formats a human-readable description of a time span. E.g. "3 days 2 hours".
@@ -58,9 +77,7 @@ def describe_timedelta(td: timedelta, max_components: int = 2) -> str:
58 77
 	return ' '.join(components)
59 78
 
60 79
 def first_command_group(cog: Cog) -> Group:
61
-	"""
62
-	Returns the first command Group found in a cog.
63
-	"""
80
+	'Returns the first command Group found in a cog.'
64 81
 	for member_name in dir(cog):
65 82
 		member = getattr(cog, member_name)
66 83
 		if isinstance(member, Group):
@@ -68,12 +85,53 @@ def first_command_group(cog: Cog) -> Group:
68 85
 	return None
69 86
 
70 87
 def bot_log(guild: Guild, cog_class, message: str) -> None:
71
-	"""
72
-	Logs a message to stdout with time, cog, and guild info.
73
-	"""
88
+	'Logs a message to stdout with time, cog, and guild info.'
74 89
 	now = datetime.now() # local
75 90
 	s = f'[{now.strftime("%Y-%m-%dT%H:%M:%S")}|'
76 91
 	s += f'{cog_class.__name__}|' if cog_class else '-|'
77 92
 	s += f'{guild.name}] ' if guild else '-] '
78 93
 	s += message
79 94
 	print(s)
95
+
96
+__QUOTE_CHARS = '\'"'
97
+__ID_REGEX = re.compile('^[0-9]{17,20}$')
98
+__MENTION_REGEX = re.compile('^<@[!&]([0-9]{17,20})>$')
99
+__USER_MENTION_REGEX = re.compile('^<@!([0-9]{17,20})>$')
100
+__ROLE_MENTION_REGEX = re.compile('^<@&([0-9]{17,20})>$')
101
+
102
+def is_user_id(val: str) -> bool:
103
+	'Tests if a string is in user/role ID format.'
104
+	return __ID_REGEX.match(val) is not None
105
+
106
+def is_mention(val: str) -> bool:
107
+	'Tests if a string is a user or role mention.'
108
+	return __MENTION_REGEX.match(val) is not None
109
+
110
+def is_role_mention(val: str) -> bool:
111
+	'Tests if a string is a role mention.'
112
+	return __ROLE_MENTION_REGEX.match(val) is not None
113
+
114
+def is_user_mention(val: str) -> bool:
115
+	'Tests if a string is a user mention.'
116
+	return __USER_MENTION_REGEX.match(val) is not None
117
+
118
+def user_id_from_mention(mention: str) -> str:
119
+	'Extracts the user ID from a mention. Raises a ValueError if malformed.'
120
+	m = __USER_MENTION_REGEX.match(mention)
121
+	if m:
122
+		return m.group(1)
123
+	raise ValueError(f'"{mention}" is not an @ user mention')
124
+
125
+def mention_from_user_id(user_id: str) -> str:
126
+	'Returns a markdown user mention from a user id.'
127
+	return f'<@!{user_id}>'
128
+
129
+def mention_from_role_id(role_id: str) -> str:
130
+	'Returns a markdown role mention from a role id.'
131
+	return f'<@&{role_id}>'
132
+
133
+def str_from_quoted_str(val: str) -> str:
134
+	'Removes the leading and trailing quotes from a string.'
135
+	if len(val) < 2 or val[0:1] not in __QUOTE_CHARS or val[-1:] not in __QUOTE_CHARS:
136
+		raise ValueError(f'Not a quoted string: {val}')
137
+	return val[1:-1]

Chargement…
Annuler
Enregistrer