Explorar el Código

Adding deceptive link detection

master
Rocketsoup hace 1 año
padre
commit
747af4bd0e
Se han modificado 1 ficheros con 107 adiciones y 0 borrados
  1. 107
    0
      rocketbot/cogs/urlspamcog.py

+ 107
- 0
rocketbot/cogs/urlspamcog.py Ver fichero

@@ -5,6 +5,7 @@ import re
5 5
 from datetime import timedelta
6 6
 from discord import Member, Message, utils as discordutils
7 7
 from discord.ext import commands
8
+from discord.utils import escape_markdown
8 9
 
9 10
 from config import CONFIG
10 11
 from rocketbot.cogs.basecog import BaseCog, BotMessage, BotMessageReaction, CogSetting
@@ -43,12 +44,19 @@ class URLSpamCog(BaseCog, name='URL Spam'):
43 44
 				'disables URL spam detection.',
44 45
 			usage='<seconds:int>',
45 46
 			min_value=0)
47
+	SETTING_DECEPTIVE_ACTION = CogSetting('deceptiveaction', str,
48
+			brief='action to take on deceptive link markdown',
49
+			description='The action to take on chat messages with links ' + \
50
+				'where the text looks like a different URL than the actual link.',
51
+			enum_values=set(['nothing', 'modwarn', 'modwarndelete', \
52
+				'chatwarn', 'chatwarndelete', 'delete', 'kick', 'ban']))
46 53
 
47 54
 	def __init__(self, bot):
48 55
 		super().__init__(bot)
49 56
 		self.add_setting(URLSpamCog.SETTING_ENABLED)
50 57
 		self.add_setting(URLSpamCog.SETTING_ACTION)
51 58
 		self.add_setting(URLSpamCog.SETTING_JOIN_AGE)
59
+		self.add_setting(URLSpamCog.SETTING_DECEPTIVE_ACTION)
52 60
 
53 61
 	@commands.group(
54 62
 		brief='Manages URL spam detection',
@@ -71,7 +79,11 @@ class URLSpamCog(BaseCog, name='URL Spam'):
71 79
 			return
72 80
 		if not self.get_guild_setting(message.guild, self.SETTING_ENABLED):
73 81
 			return
82
+		await self.check_message_recency(message);
83
+		await self.check_deceptive_links(message);
74 84
 
85
+	async def check_message_recency(self, message: Message):
86
+		'Checks if the message was sent too recently by a new user'
75 87
 		action = self.get_guild_setting(message.guild, self.SETTING_ACTION)
76 88
 		join_seconds = self.get_guild_setting(message.guild, self.SETTING_JOIN_AGE)
77 89
 		min_join_age = timedelta(seconds=join_seconds)
@@ -129,6 +141,101 @@ class URLSpamCog(BaseCog, name='URL Spam'):
129 141
 			if needs_attention:
130 142
 				self.record_warning(message.author)
131 143
 
144
+	async def check_deceptive_links(self, message: Message):
145
+		"""
146
+		Checks if the message contains deceptive URL markdown, e.g.
147
+		`[nicewebsite.com](https://evilwebsite.com)'`
148
+		"""
149
+		action = self.get_guild_setting(message.guild, self.SETTING_DECEPTIVE_ACTION)
150
+		if action == None or action == 'nothing':
151
+			return
152
+
153
+		if not self.contains_deceptive_links(message.content):
154
+			return
155
+		mod_text = f'User {message.author.name} ({message.author.id}) posted a deceptive link.'
156
+		quoted = '> ' + escape_markdown(message.content).replace('\n', '\n> ')
157
+		mod_text += f'\n\n{quoted}'
158
+		self.log(message.guild, f'{message.author.name} posted deceptive link - action: {action}')
159
+		if 'modwarn' in action:
160
+			if 'delete' in action:
161
+				mod_text += '\n\nMessage deleted'
162
+			else:
163
+				mod_text += f'\n\n{message.jump_url}'
164
+			bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_MOD_WARNING, suppress_embeds=True)
165
+			await self.post_message(bm)
166
+			if 'delete' in action:
167
+				await message.delete()
168
+		elif 'chatwarn' in action:
169
+			if 'delete' in action:
170
+				response = f':warning: Links with deceptive labels are prohibited :warning:'
171
+			else:
172
+				response = f':warning: Message contains a deceptively labeled link! Click carefully. :warning:'
173
+			await message.reply(response, mention_author=False)
174
+			if 'delete' in action:
175
+				await message.delete()
176
+		elif action == 'delete':
177
+			mod_text += f'\n\nDeleting message'
178
+			bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_INFO, suppress_embeds=True)
179
+			await self.post_message(bm)
180
+			await message.delete()
181
+		elif action == 'kick':
182
+			mod_text += f'\n\nUser kicked'
183
+			bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_MOD_WARNING, suppress_embeds=True)
184
+			await self.post_message(bm)
185
+			await message.delete()
186
+			await message.author.kick(
187
+				reason=f'Rocketbot: User posted a deceptive link')
188
+		elif action == 'ban':
189
+			mod_text += f'\n\nUser banned'
190
+			bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_MOD_WARNING, suppress_embeds=True)
191
+			await self.post_message(bm)
192
+			await message.author.ban(
193
+				reason=f'Rocketbot: User posted a deceptive link',
194
+				delete_message_days=1)
195
+
196
+	def contains_deceptive_links(self, content: str) -> bool:
197
+		# Strip markdown that can safely contain URL sequences
198
+		content = re.sub(r'`[^`]+`', '', content)  # `inline code`
199
+		content = re.sub(r'```.+?```', '', content, re.DOTALL)  # ``` code block ```
200
+		matches = re.findall(r'\[([^\]]+)\]\(([^\)]+)\)', content)
201
+		for match in matches:
202
+			original_label: str = match[0].strip()
203
+			original_link: str = match[1].strip()
204
+			label: str = original_label
205
+			link: str = original_link
206
+			if link.startswith('<') and link.endswith('>'):
207
+				link = link[1:-1]
208
+			if self.is_url(label):
209
+				if label != link:
210
+					return True
211
+			elif self.is_casual_url(label):
212
+				# Trim www. for easier comparisons.
213
+				if link.startswith('https://www.'):
214
+					link = 'https://' + link[12:]
215
+				if link.startswith('http://www.'):
216
+					link = 'http://' + link[11:]
217
+				if link.endswith('/'):
218
+					link = link[:-1]
219
+				if label.startswith('www.'):
220
+					label = label[4:]
221
+				if label.endswith('/'):
222
+					label = label[:-1]
223
+				if link.startswith('https://') and 'https://' + label != link:
224
+					return True
225
+				elif link.startswith('http://') and 'http://' + label != link:
226
+					return True
227
+		return False
228
+
229
+	def is_url(self, s: str):
230
+		'Tests if a string is strictly a URL'
231
+		pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
232
+		return re.match(pattern, s, re.IGNORECASE) != None
233
+
234
+	def is_casual_url(self, s: str):
235
+		'Tests if a string is a "casual URL" with no scheme included'
236
+		pattern = r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
237
+		return re.match(pattern, s, re.IGNORECASE) != None
238
+
132 239
 	async def on_mod_react(self,
133 240
 			bot_message: BotMessage,
134 241
 			reaction: BotMessageReaction,

Loading…
Cancelar
Guardar