Parcourir la source

Crosspost cog looks for both duplicate and any messages posted in multiple channels

Log cog groups similar events to prevent getting throttled by API
tags/2.0.0
Rocketsoup il y a 2 mois
Parent
révision
c11f61844e
6 fichiers modifiés avec 391 ajouts et 170 suppressions
  1. 3
    1
      config.sample.py
  2. 1
    1
      main.py
  3. 2
    1
      rocketbot/cogs/basecog.py
  4. 160
    75
      rocketbot/cogs/crosspostcog.py
  5. 223
    91
      rocketbot/cogs/logcog.py
  6. 2
    1
      rocketbot/storage.py

+ 3
- 1
config.sample.py Voir le fichier

@@ -59,7 +59,9 @@ CONFIG = {
59 59
 		},
60 60
 		'CrossPostCog': {
61 61
 			'enabled': False,
62
-			'warncount': 3,
62
+			'dupewarncount': 3,
63
+			'warncount': 5,
64
+			'dupebancount': 9999,
63 65
 			'bancount': 9999,
64 66
 			'minlength': 1,
65 67
 			'timespan': 60,

+ 1
- 1
main.py Voir le fichier

@@ -11,7 +11,7 @@ from config import CONFIG
11 11
 from rocketbot.bot import start_bot
12 12
 from rocketbot.utils import bot_log
13 13
 
14
-CURRENT_CONFIG_VERSION = 3
14
+CURRENT_CONFIG_VERSION = 4
15 15
 if (CONFIG.get('__config_version') or 0) < CURRENT_CONFIG_VERSION:
16 16
 	# If you're getting this error, it means something changed in config.py's
17 17
 	# format. Consult config.sample.py and compare it to your own config.py.

+ 2
- 1
rocketbot/cogs/basecog.py Voir le fichier

@@ -9,6 +9,7 @@ from discord.abc import GuildChannel
9 9
 from discord.ext import commands
10 10
 
11 11
 from config import CONFIG
12
+from rocketbot.bot import Rocketbot
12 13
 from rocketbot.botmessage import BotMessage, BotMessageReaction
13 14
 from rocketbot.cogsetting import CogSetting
14 15
 from rocketbot.collections import AgeBoundDict
@@ -28,7 +29,7 @@ class BaseCog(commands.Cog):
28 29
 	common tasks.
29 30
 	"""
30 31
 	def __init__(self, bot):
31
-		self.bot = bot
32
+		self.bot: Rocketbot = bot
32 33
 		self.are_settings_setup = False
33 34
 		self.settings = []
34 35
 

+ 160
- 75
rocketbot/cogs/crosspostcog.py Voir le fichier

@@ -1,7 +1,8 @@
1 1
 """
2 2
 Cog for detecting spam messages posted in multiple channels.
3 3
 """
4
-from datetime import datetime, timedelta
4
+import re
5
+from datetime import datetime, timedelta, timezone
5 6
 from typing import Optional
6 7
 
7 8
 from discord import Member, Message, utils as discordutils, TextChannel
@@ -9,16 +10,17 @@ from discord.ext import commands
9 10
 
10 11
 from config import CONFIG
11 12
 from rocketbot.cogs.basecog import BaseCog, BotMessage, BotMessageReaction, CogSetting
12
-from rocketbot.collections import AgeBoundList, SizeBoundDict
13
+from rocketbot.collections import AgeBoundList, AgeBoundDict
13 14
 from rocketbot.storage import Storage
15
+from rocketbot.utils import str_from_timedelta
16
+
14 17
 
15 18
 class SpamContext:
16 19
 	"""
17 20
 	Data about a set of duplicate messages from a user.
18 21
 	"""
19
-	def __init__(self, member: Member, message_hash: int) -> None:
22
+	def __init__(self, member: Member) -> None:
20 23
 		self.member: Member = member
21
-		self.message_hash: int = message_hash
22 24
 		self.age: datetime = datetime.now()
23 25
 		self.bot_message: Optional[BotMessage] = None
24 26
 		self.is_kicked: bool = False
@@ -27,28 +29,49 @@ class SpamContext:
27 29
 		self.spam_messages: set[Message] = set()
28 30
 		self.deleted_messages: set[Message] = set()
29 31
 		self.unique_channels: set[TextChannel] = set()
32
+		self.duplicate_count: int = 0
30 33
 
31 34
 class CrossPostCog(BaseCog, name='Crosspost Detection'):
32 35
 	"""
33
-	Detects a user posting the same text in multiple channels in a short period
34
-	of time: a common pattern for spammers. Repeated posts in the same channel
35
-	aren't detected, as this can often be for a reason or due to trying a
36
-	failed post when connectivity is poor. Minimum message length can be
37
-	enforced for detection. Minimum is always at least 1 to ignore posts with
38
-	just embeds or images and no text.
36
+	Detects a user posting in multiple channels in a short period
37
+	of time: a common pattern for spammers.
38
+
39
+	These used to be identical text, but more recent attacks have had small
40
+	variations, such as different imgur URLs. It's reasonable to treat
41
+	posting in many channels in a short period as suspicious on its own,
42
+	regardless of whether they are identical.
43
+
44
+	Repeated posts in the same channel aren't currently detected, as this can
45
+	often be for a reason or due to trying a failed post when connectivity is
46
+	poor. Minimum message length can be enforced for detection.
39 47
 	"""
40 48
 	SETTING_ENABLED = CogSetting('enabled', bool,
41 49
 		brief='crosspost detection',
42 50
 		description='Whether crosspost detection is enabled.')
43 51
 	SETTING_WARN_COUNT = CogSetting('warncount', int,
44 52
 		brief='number of messages to trigger a warning',
45
-		description='The number of unique channels the same message is ' + \
53
+		description='The number of unique channels messages are ' + \
54
+			'posted in by the same user to trigger a mod warning. The ' + \
55
+			'messages need not be identical (see dupewarncount).',
56
+		usage='<count:int>',
57
+		min_value=2)
58
+	SETTING_DUPE_WARN_COUNT = CogSetting('dupewarncount', int,
59
+		brief='number of identical messages to trigger a warning',
60
+		description='The number of unique channels identical messages are ' + \
46 61
 			'posted in by the same user to trigger a mod warning.',
47 62
 		usage='<count:int>',
48 63
 		min_value=2)
49 64
 	SETTING_BAN_COUNT = CogSetting('bancount', int,
50 65
 		brief='number of messages to trigger a ban',
51
-		description='The number of unique channels the same message is ' + \
66
+		description='The number of unique channels messages are ' + \
67
+			'posted in by the same user to trigger an automatic ban. The ' + \
68
+			'messages need not be identical (see dupebancount). Set ' + \
69
+			'to a large value to effectively disable, e.g. 9999.',
70
+		usage='<count:int>',
71
+		min_value=2)
72
+	SETTING_DUPE_BAN_COUNT = CogSetting('dupebancount', int,
73
+		brief='number of identical messages to trigger a ban',
74
+		description='The number of unique channels identical messages are ' + \
52 75
 			'posted in by the same user to trigger an automatic ban. Set ' + \
53 76
 			'to a large value to effectively disable, e.g. 9999.',
54 77
 		usage='<count:int>',
@@ -75,7 +98,9 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
75 98
 		super().__init__(bot)
76 99
 		self.add_setting(CrossPostCog.SETTING_ENABLED)
77 100
 		self.add_setting(CrossPostCog.SETTING_WARN_COUNT)
101
+		self.add_setting(CrossPostCog.SETTING_DUPE_WARN_COUNT)
78 102
 		self.add_setting(CrossPostCog.SETTING_BAN_COUNT)
103
+		self.add_setting(CrossPostCog.SETTING_DUPE_BAN_COUNT)
79 104
 		self.add_setting(CrossPostCog.SETTING_MIN_LENGTH)
80 105
 		self.add_setting(CrossPostCog.SETTING_TIMESPAN)
81 106
 		self.max_spam_contexts = 12
@@ -83,99 +108,121 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
83 108
 	async def __record_message(self, message: Message) -> None:
84 109
 		if message.channel.permissions_for(message.author).ban_members:
85 110
 			# User exempt from spam detection
111
+			self.__trace("User exempt from crosspost checks")
86 112
 			return
87 113
 		def compute_message_hash(m: Message) -> int:
88 114
 			to_hash = m.content
115
+			# URLs sometimes differ per spam message, so simplify them
116
+			url_regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
117
+			to_hash = re.sub(url_regex, '<url>', to_hash)
118
+			# Add attachment metadata
89 119
 			for attachment in m.attachments:
90 120
 				to_hash += f'\n[[ATT: ct={attachment.content_type} s={attachment.size} w={attachment.width} h={attachment.height}]]'
91 121
 			h = hash(to_hash)
122
+			self.__trace(f"Message hash for {m.id} is {h}")
92 123
 			return h
93
-		compute_message_hash(message)
94
-		if len(message.attachments) == 0 and len(message.content) < self.get_guild_setting(message.guild, self.SETTING_MIN_LENGTH):
124
+
125
+		min_length = self.get_guild_setting(message.guild, self.SETTING_MIN_LENGTH)
126
+		if len(message.attachments) == 0 and len(message.content) < min_length:
95 127
 			# Message too short to count towards spam total
128
+			self.__trace(f"Message len {len(message.content)} < {min_length}")
96 129
 			return
130
+
131
+		# Get config
97 132
 		max_age = timedelta(seconds=self.get_guild_setting(message.guild, self.SETTING_TIMESPAN))
98 133
 		warn_count: int = self.get_guild_setting(message.guild, self.SETTING_WARN_COUNT)
134
+		dupe_warn_count: int = self.get_guild_setting(message.guild, self.SETTING_DUPE_WARN_COUNT)
135
+
136
+		# Record message
99 137
 		recent_messages: AgeBoundList[Message, datetime, timedelta] = Storage.get_state_value(message.guild, self.STATE_KEY_RECENT_MESSAGES)
100 138
 		if recent_messages is None:
101 139
 			recent_messages = AgeBoundList(max_age, lambda index, message : message.created_at)
102 140
 			Storage.set_state_value(message.guild, self.STATE_KEY_RECENT_MESSAGES, recent_messages)
103 141
 		recent_messages.max_age = max_age
104 142
 		recent_messages.append(message)
143
+		self.__trace(f"Recent messages now length {len(recent_messages)}")
105 144
 
106 145
 		# Get all recent messages by user
107 146
 		member_messages = [m for m in recent_messages if m.author.id == message.author.id]
108
-		if len(member_messages) < warn_count:
147
+		message_count = len(member_messages)
148
+		self.__trace(f"Found {message_count} messages for {message.author.name}")
149
+		if message_count < warn_count and message_count < dupe_warn_count:
150
+			self.__trace(f"Bailing because message count {message_count} < warn count {warn_count} and < dupe warn count {dupe_warn_count}")
109 151
 			return
110 152
 
111
-		# Look for repeats
153
+		# Look for identical(ish) messages and unique channels
112 154
 		hash_to_channels: dict[int, set[TextChannel]] = {}
113
-		max_count = 0
155
+		unique_channels: set[TextChannel] = set()
156
+		max_duplicate_count = 0
114 157
 		for m in member_messages:
115 158
 			message_hash = compute_message_hash(m)
116
-			channels: set[TextChannel] = hash_to_channels.get(message_hash)
117
-			if channels is None:
118
-				channels = set()
119
-				hash_to_channels[message_hash] = channels
120
-			channels.add(m.channel)
121
-			max_count = max(max_count, len(channels))
122
-		if max_count < warn_count:
159
+			dupe_message_channels: set[TextChannel] = hash_to_channels.get(message_hash)
160
+			if dupe_message_channels is None:
161
+				dupe_message_channels = set()
162
+				hash_to_channels[message_hash] = dupe_message_channels
163
+			dupe_message_channels.add(m.channel)
164
+			unique_channels.add(m.channel)
165
+			max_duplicate_count = max(max_duplicate_count, len(dupe_message_channels))
166
+		channel_count = len(unique_channels)
167
+		self.__trace(f"Found {len(hash_to_channels)} unique messages, {channel_count} unique channels, {max_duplicate_count} duplicated messages")
168
+		if channel_count < warn_count and max_duplicate_count < dupe_warn_count:
169
+			self.__trace(f"Bailing because channels {channel_count} < warn count {warn_count} and max dupes {max_duplicate_count} < dupe warn count {dupe_warn_count}")
123 170
 			return
124 171
 
125
-		# Handle the spam
126
-		spam_lookup: SizeBoundDict[str, SpamContext, datetime] = Storage.get_state_value(message.guild, self.STATE_KEY_SPAM_CONTEXT)
172
+		# This person is a problem
173
+
174
+		spam_lookup: AgeBoundDict[str, SpamContext, datetime, timedelta] = Storage.get_state_value(message.guild, self.STATE_KEY_SPAM_CONTEXT)
127 175
 		if spam_lookup is None:
128
-			spam_lookup = SizeBoundDict(
129
-				self.max_spam_contexts,
176
+			spam_lookup = AgeBoundDict(
177
+				max_age,
130 178
 				lambda key, context : context.age)
131 179
 			Storage.set_state_value(message.guild, self.STATE_KEY_SPAM_CONTEXT, spam_lookup)
132
-		for message_hash, channels in hash_to_channels.items():
133
-			channel_count = len(channels)
134
-			if channel_count < warn_count:
135
-				continue
136
-			key = f'{message.author.id}|{message_hash}'
137
-			context = spam_lookup.get(key)
138
-			if context is None:
139
-				context = SpamContext(message.author, message_hash)
140
-				spam_lookup[key] = context
141
-				context.age = message.created_at
142
-				self.log(message.guild,
143
-					f'\u0007{message.author.name} ({message.author.id}) ' + \
144
-					f'posted the same message in {channel_count} or more channels.')
145
-			for m in member_messages:
146
-				if compute_message_hash(m) == message_hash:
147
-					context.spam_messages.add(m)
148
-					context.unique_channels.add(m.channel)
149
-			await self.__update_from_context(context)
180
+		key = f'{message.author.id}'
181
+		context = spam_lookup.get(key)
182
+		if context is not None and message.created_at - context.age > max_age:
183
+			context = None
184
+		if context is None:
185
+			context = SpamContext(message.author)
186
+			spam_lookup[key] = context
187
+			self.log(message.guild,
188
+				f'\u0007{message.author.name} ({message.author.id}) ' + \
189
+				f'posted messages in {channel_count} channels.')
190
+		context.age = message.created_at
191
+		context.duplicate_count = max_duplicate_count
192
+		context.spam_messages.update(member_messages)
193
+		context.unique_channels.update(unique_channels)
194
+		await self.__update_from_context(context)
150 195
 
151 196
 	async def __update_from_context(self, context: SpamContext):
152 197
 		ban_count = self.get_guild_setting(context.member.guild, self.SETTING_BAN_COUNT)
198
+		dupe_ban_count = self.get_guild_setting(context.member.guild, self.SETTING_DUPE_BAN_COUNT)
153 199
 		channel_count = len(context.unique_channels)
154
-		if channel_count >= ban_count:
200
+		if channel_count >= ban_count or context.duplicate_count >= dupe_ban_count:
155 201
 			if not context.is_banned:
202
+				max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
203
+				max_age_str = str_from_timedelta(max_age)
156 204
 				await context.member.ban(
157
-					reason='Rocketbot: Posted same message in ' + \
158
-						f'{channel_count} channels. Banned by ' + \
159
-						f'{self.bot.user.name}.',
205
+					reason=f'Rocketbot: Posted in {channel_count} channels within {max_age_str} ' + \
206
+					   f'({context.duplicate_count} identical). Banned by {self.bot.user.name}.',
160 207
 					delete_message_days=1)
161 208
 				context.is_kicked = True
162 209
 				context.is_banned = True
163 210
 				context.is_autobanned = True
164 211
 				context.deleted_messages |= context.spam_messages
165
-				self.log(context.member.guild,
166
-					f'{context.member.name} ({context.member.id}) posted ' + \
167
-					f'same message in {channel_count} channels. Banned by ' + \
168
-					f'{self.bot.user.name}.')
212
+				self.__log_ban(context, self.bot.user.name)
169 213
 			else:
170 214
 				# Already banned. Nothing to update in the message.
171 215
 				return
172 216
 		await self.__update_message_from_context(context)
173 217
 
174 218
 	async def __update_message_from_context(self, context: SpamContext) -> None:
175
-		first_spam_message: Message = next(iter(context.spam_messages))
219
+		first_spam_message: Message = sorted(list(context.spam_messages), key=lambda m: m.created_at)[0]
176 220
 		spam_count = len(context.spam_messages)
177 221
 		channel_count = len(context.unique_channels)
178
-		deleted_count = len(context.spam_messages)
222
+		deleted_count = len(context.deleted_messages)
223
+		duplicate_count = context.duplicate_count
224
+		max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
225
+		max_age_str = str_from_timedelta(max_age)
179 226
 		message = context.bot_message
180 227
 		if message is None:
181 228
 			message_type: int = BotMessage.TYPE_INFO if self.was_warned_recently(context.member) \
@@ -185,15 +232,25 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
185 232
 			self.record_warning(context.member)
186 233
 		if context.is_autobanned:
187 234
 			text = f'User {context.member.mention} auto banned for ' + \
188
-				f'posting the same message in {channel_count} channels. ' + \
189
-				'Messages from past 24 hours deleted.'
235
+				f'posting messages in {channel_count} channels within {max_age_str} ' + \
236
+				f'({duplicate_count} identical). Messages from past 24 hours deleted.'
190 237
 			await message.set_reactions([])
191 238
 			await message.set_text(text)
192 239
 		else:
193
-			body: str = f'User {context.member.mention} posted ' + \
194
-				f'the same message in {channel_count} channels.'
195
-			for msg in context.spam_messages:
240
+			body: str = f'User {context.member.mention} posted '
241
+			if duplicate_count == channel_count:
242
+				body += f'identical messages in {channel_count} channels within {max_age_str} .'
243
+			elif duplicate_count == 1:
244
+				body += f'**different** messages in {channel_count} channels within ' + \
245
+						f'{max_age_str}. (Showing first one).'
246
+			else:
247
+				body += f'messages in {channel_count} channels within {max_age_str} ' + \
248
+						f'({duplicate_count} are identical, showing first one).'
249
+			max_links = 10
250
+			for msg in sorted(list(context.spam_messages), key=lambda m: m.created_at)[:max_links]:
196 251
 				body += f'\n- {msg.jump_url}'
252
+			if len(context.spam_messages) > max_links:
253
+				body += f'\n- ...{len(context.spam_messages) - max_links} more...'
197 254
 			await message.set_text(body)
198 255
 			await message.set_reactions(BotMessageReaction.standard_set(
199 256
 				did_delete = deleted_count >= spam_count,
@@ -218,30 +275,58 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
218 275
 				await message.delete()
219 276
 				context.deleted_messages.add(message)
220 277
 			await self.__update_from_context(context)
221
-			self.log(context.member.guild,
222
-				f'{context.member.name} ({context.member.id}) posted same ' + \
223
-				f'message in {channel_count} channels. Deleted by {reacted_by.name}.')
278
+			self.__log_deletion(context, reacted_by.name)
224 279
 		elif reaction.emoji == CONFIG['kick_emoji']:
225 280
 			await context.member.kick(
226
-				reason=f'Rocketbot: Posted same message in {channel_count} ' + \
281
+				reason=f'Rocketbot: Posted messages in {channel_count} ' + \
227 282
 					f'channels. Kicked by {reacted_by.name}.')
228 283
 			context.is_kicked = True
229 284
 			await self.__update_from_context(context)
230
-			self.log(context.member.guild,
231
-				f'{context.member.name} ({context.member.id}) posted same ' + \
232
-				f'message in {channel_count} channels. Kicked by {reacted_by.name}.')
285
+			self.__log_kick(context, reacted_by.name)
233 286
 		elif reaction.emoji == CONFIG['ban_emoji']:
234 287
 			await context.member.ban(
235
-				reason=f'Rocketbot: Posted same message in {channel_count} ' + \
288
+				reason=f'Rocketbot: Posted messages in {channel_count} ' + \
236 289
 					f'channels. Banned by {reacted_by.name}.',
237 290
 				delete_message_days=1)
238 291
 			context.deleted_messages |= context.spam_messages
239 292
 			context.is_kicked = True
240 293
 			context.is_banned = True
241 294
 			await self.__update_from_context(context)
242
-			self.log(context.member.guild,
243
-				f'{context.member.name} ({context.member.id}) posted same ' + \
244
-				f'message in {channel_count} channels. Kicked by {reacted_by.name}.')
295
+			self.__log_ban(context, reacted_by.name)
296
+
297
+	def __log_deletion(self, context: SpamContext, by_who: str) -> None:
298
+		max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
299
+		max_age_str = str_from_timedelta(max_age)
300
+		channel_count = len(context.unique_channels)
301
+		duplicate_count = context.duplicate_count
302
+		self.log(context.member.guild,
303
+				 f'{context.member.name} ({context.member.id}) posted ' + \
304
+				 f'messages in {channel_count} channels withint {max_age_str} ' + \
305
+				 f'({duplicate_count} identical). Deleted by {by_who}.')
306
+
307
+	def __log_kick(self, context: SpamContext, by_who: str) -> None:
308
+		max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
309
+		max_age_str = str_from_timedelta(max_age)
310
+		channel_count = len(context.unique_channels)
311
+		duplicate_count = context.duplicate_count
312
+		self.log(context.member.guild,
313
+				 f'{context.member.name} ({context.member.id}) posted ' + \
314
+				 f'messages in {channel_count} channels within {max_age_str} ' + \
315
+				 f'({duplicate_count} identical). Kicked by {by_who}.')
316
+
317
+	def __log_ban(self, context: SpamContext, by_who: str) -> None:
318
+		max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
319
+		max_age_str = str_from_timedelta(max_age)
320
+		channel_count = len(context.unique_channels)
321
+		duplicate_count = context.duplicate_count
322
+		self.log(context.member.guild,
323
+				 f'{context.member.name} ({context.member.id}) posted ' + \
324
+				 f'messages in {channel_count} channels within {max_age_str} ' + \
325
+				 f'({duplicate_count} identical). Banned by {by_who}.')
326
+
327
+	def __trace(self, message):
328
+		# print(message)
329
+		pass
245 330
 
246 331
 	@commands.Cog.listener()
247 332
 	async def on_message(self, message: Message):
@@ -249,11 +334,11 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
249 334
 		if message.author is None or \
250 335
 				message.author.bot or \
251 336
 				message.channel is None or \
252
-				message.guild is None or \
253
-				message.content is None:
337
+				message.guild is None:
254 338
 			return
255 339
 		if not self.get_guild_setting(message.guild, self.SETTING_ENABLED):
256 340
 			return
341
+		self.__trace("--ON MESSAGE--")
257 342
 		await self.__record_message(message)
258 343
 
259 344
 	@commands.group(
@@ -262,6 +347,6 @@ class CrossPostCog(BaseCog, name='Crosspost Detection'):
262 347
 	@commands.has_permissions(ban_members=True)
263 348
 	@commands.guild_only()
264 349
 	async def crosspost(self, context: commands.Context):
265
-		"""Crosspost detection command group"""
350
+		"""Detects members posting messages in multiple channels in a short period of time."""
266 351
 		if context.invoked_subcommand is None:
267 352
 			await context.send_help()

+ 223
- 91
rocketbot/cogs/logcog.py Voir le fichier

@@ -2,16 +2,34 @@
2 2
 Cog for detecting large numbers of guild joins in a short period of time.
3 3
 """
4 4
 from collections.abc import Sequence
5
-from datetime import datetime
5
+from datetime import datetime, timezone, timedelta
6
+
6 7
 from discord import AuditLogAction, AuditLogEntry, Emoji, Guild, GuildSticker, Invite, Member, Message, RawBulkMessageDeleteEvent, RawMessageDeleteEvent, RawMessageUpdateEvent, Role, Thread, User
7 8
 from discord.abc import GuildChannel
8
-from discord.ext import commands
9
+from discord.ext import commands, tasks
9 10
 from discord.utils import escape_markdown
10
-from typing import Optional, Tuple, Union
11
+from typing import Optional, Tuple, Union, Callable
11 12
 import difflib
12 13
 import re
13 14
 
14 15
 from rocketbot.cogs.basecog import BaseCog, BotMessage, CogSetting
16
+from rocketbot.storage import Storage
17
+
18
+class BufferedMessageEditEvent:
19
+	def __init__(self, guild: Guild, channel: GuildChannel, before: Optional[Message], after: Message, data = None) -> None:
20
+		self.guild = guild
21
+		self.channel = channel
22
+		self.before = before
23
+		self.after = after
24
+		self.data = data
25
+
26
+class BufferedMessageDeleteEvent:
27
+	def __init__(self, guild: Guild, channel: GuildChannel, message_id: int, message: Optional[Message] = None) -> None:
28
+		self.guild = guild
29
+		self.channel = channel
30
+		self.message_id = message_id
31
+		self.message = message
32
+		self.author = message.author if message is not None else None
15 33
 
16 34
 class LoggingCog(BaseCog, name='Logging'):
17 35
 	"""
@@ -21,9 +39,16 @@ class LoggingCog(BaseCog, name='Logging'):
21 39
 			brief='logging',
22 40
 			description='Whether this cog is enabled for a guild.')
23 41
 
42
+	STATE_EVENT_BUFFER = 'LoggingCog.eventBuffer'
43
+
24 44
 	def __init__(self, bot):
25 45
 		super().__init__(bot)
26 46
 		self.add_setting(LoggingCog.SETTING_ENABLED)
47
+		self.flush_buffers.start()
48
+		self.buffered_guilds: set[Guild] = set()
49
+
50
+	def cog_unload(self) -> None:
51
+		self.flush_buffers.cancel()
27 52
 
28 53
 	@commands.group(
29 54
 		brief='Manages event logging',
@@ -407,6 +432,44 @@ class LoggingCog(BaseCog, name='Logging'):
407 432
 
408 433
 	# Events - Messages
409 434
 
435
+	def __buffer_event(self, guild: Guild, event_type: str, event) -> None:
436
+		buffers: dict[str, list] = Storage.get_state_value(guild, self.STATE_EVENT_BUFFER)
437
+		if buffers is None:
438
+			buffers = {}
439
+			Storage.set_state_value(guild, self.STATE_EVENT_BUFFER, buffers)
440
+		if buffers.get(event_type) is None:
441
+			buffers[event_type] = [ event ]
442
+		else:
443
+			buffers[event_type].append(event)
444
+		self.buffered_guilds.add(guild)
445
+
446
+	@tasks.loop(seconds=3.0)
447
+	async def flush_buffers(self) -> None:
448
+		try:
449
+			if len(self.buffered_guilds) == 0:
450
+				return
451
+			guilds = set(self.buffered_guilds)
452
+			self.buffered_guilds.clear()
453
+			for guild in guilds:
454
+				await self.__flush_buffers_for_guild(guild)
455
+		except Exception as e:
456
+			print(e)
457
+
458
+	async def __flush_buffers_for_guild(self, guild: Guild) -> None:
459
+		buffers: dict[str, list] = Storage.get_state_value(guild, self.STATE_EVENT_BUFFER)
460
+		if buffers is None:
461
+			return
462
+		Storage.set_state_value(guild, self.STATE_EVENT_BUFFER, None)
463
+		for event_type, buffer in buffers.items():
464
+			if event_type == 'edit':
465
+				await self.__flush_edit_buffers(guild, buffer)
466
+			elif event_type == 'delete':
467
+				await self.__flush_delete_buffers(guild, buffer)
468
+
469
+	@flush_buffers.before_loop
470
+	async def before_flush_buffers_start(self) -> None:
471
+		await self.bot.wait_until_ready()
472
+
410 473
 	@commands.Cog.listener()
411 474
 	async def on_message(self, message: Message) -> None:
412 475
 		"""
@@ -443,6 +506,79 @@ class LoggingCog(BaseCog, name='Logging'):
443 506
 			return
444 507
 		if after.author.id == self.bot.user.id:
445 508
 			return
509
+		channel = after.channel
510
+
511
+		self.__buffer_event(guild, 'edit', BufferedMessageEditEvent(guild, channel, before, after))
512
+
513
+	@commands.Cog.listener()
514
+	async def on_raw_message_edit(self, payload: RawMessageUpdateEvent) -> None:
515
+		"""
516
+		Called when a message is edited. Unlike on_message_edit(), this is called
517
+		regardless of the state of the internal message cache.
518
+
519
+		If the message is found in the message cache, it can be accessed via
520
+		RawMessageUpdateEvent.cached_message. The cached message represents the
521
+		message before it has been edited. For example, if the content of a
522
+		message is modified and triggers the on_raw_message_edit() coroutine,
523
+		the RawMessageUpdateEvent.cached_message will return a Message object
524
+		that represents the message before the content was modified.
525
+
526
+		Due to the inherently raw nature of this event, the data parameter
527
+		coincides with the raw data given by the gateway.
528
+
529
+		Since the data payload can be partial, care must be taken when accessing
530
+		stuff in the dictionary. One example of a common case of partial data is
531
+		when the 'content' key is inaccessible. This denotes an “embed” only
532
+		edit, which is an edit in which only the embeds are updated by the
533
+		Discord embed server.
534
+
535
+		https://discordpy.readthedocs.io/en/stable/api.html#discord.on_raw_message_edit
536
+		"""
537
+		if payload.cached_message:
538
+			return  # already handled by on_message_edit
539
+		guild = self.bot.get_guild(payload.guild_id) or await self.bot.fetch_guild(payload.guild_id)
540
+		if not guild:
541
+			return
542
+		if not self.get_guild_setting(guild, self.SETTING_ENABLED):
543
+			return
544
+		channel = guild.get_channel(payload.channel_id) or await guild.fetch_channel(payload.channel_id)
545
+		if not channel:
546
+			return
547
+
548
+		self.__buffer_event(guild, 'edit', BufferedMessageEditEvent(
549
+			guild, channel, None, payload.message, payload.data))
550
+
551
+	async def __flush_edit_buffers(self, guild: Guild, events: list[BufferedMessageEditEvent]) -> None:
552
+		simple_edits: list[BufferedMessageEditEvent] = []
553
+		complex_edits: list[BufferedMessageEditEvent] = []
554
+		old_cutoff = timedelta(days=1)
555
+		now = datetime.now(timezone.utc)
556
+		for event in events:
557
+			if event.before is not None and (now - event.after.created_at) < old_cutoff:
558
+				simple_edits.append(event)
559
+			else:
560
+				complex_edits.append(event)
561
+		if len(simple_edits) <= 3:
562
+			# A small number of edits with full details. Log them individually.
563
+			for event in events:
564
+				await self.__handle_complete_edit_event(event)
565
+		else:
566
+			complex_edits = events
567
+		if len(complex_edits) > 0:
568
+			# These messages are not cached, too old, or too numerous
569
+			text = 'Multiple messages edited' if len(complex_edits) > 1 else 'Message edited'
570
+			for event in complex_edits[:10]:
571
+				text += f'\n- {event.after.jump_url} by {event.after.author.name} ' + \
572
+						f'first posted <t:{int(event.after.created_at.timestamp())}:f>'
573
+			if len(complex_edits) > 10:
574
+				text += f'\n- ...{len(complex_edits) - 10} more...'
575
+			bot_message = BotMessage(guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
576
+			await bot_message.update()
577
+
578
+	async def __handle_complete_edit_event(self, event: BufferedMessageEditEvent) -> None:
579
+		before = event.before
580
+		after = event.after
581
+		guild = after.guild
446 582
 
447 583
 		content_changed = (after.content != before.content)
448 584
 		attachments_changed = (after.attachments != before.attachments)
@@ -495,49 +631,6 @@ class LoggingCog(BaseCog, name='Logging'):
495 631
 		await bot_message.update()
496 632
 
497 633
 	@commands.Cog.listener()
498
-	async def on_raw_message_edit(self, payload: RawMessageUpdateEvent) -> None:
499
-		"""
500
-		Called when a message is edited. Unlike on_message_edit(), this is called
501
-		regardless of the state of the internal message cache.
502
-
503
-		If the message is found in the message cache, it can be accessed via
504
-		RawMessageUpdateEvent.cached_message. The cached message represents the
505
-		message before it has been edited. For example, if the content of a
506
-		message is modified and triggers the on_raw_message_edit() coroutine,
507
-		the RawMessageUpdateEvent.cached_message will return a Message object
508
-		that represents the message before the content was modified.
509
-
510
-		Due to the inherently raw nature of this event, the data parameter
511
-		coincides with the raw data given by the gateway.
512
-
513
-		Since the data payload can be partial, care must be taken when accessing
514
-		stuff in the dictionary. One example of a common case of partial data is
515
-		when the 'content' key is inaccessible. This denotes an “embed” only
516
-		edit, which is an edit in which only the embeds are updated by the
517
-		Discord embed server.
518
-
519
-		https://discordpy.readthedocs.io/en/stable/api.html#discord.on_raw_message_edit
520
-		"""
521
-		if payload.cached_message:
522
-			return  # already handled by on_message_edit
523
-		guild = self.bot.get_guild(payload.guild_id) or await self.bot.fetch_guild(payload.guild_id)
524
-		if not guild:
525
-			return
526
-		if not self.get_guild_setting(guild, self.SETTING_ENABLED):
527
-			return
528
-		channel = guild.get_channel(payload.channel_id) or await guild.fetch_channel(payload.channel_id)
529
-		if not channel:
530
-			return
531
-		message = await channel.fetch_message(payload.message_id)
532
-		if not message:
533
-			return
534
-		text = f'Message {message.jump_url} edited by {self.__describe_user(message.author)}.\n' + \
535
-			'Original markdown unavailable in cache.\n' + \
536
-			f'Updated markdown:\n{self.__quote_markdown(message.content)}'
537
-		bot_message = BotMessage(guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
538
-		await bot_message.update()
539
-
540
-	@commands.Cog.listener()
541 634
 	async def on_raw_message_delete(self, payload: RawMessageDeleteEvent) -> None:
542 635
 		"""
543 636
 		Called when a message is deleted. Unlike on_message_delete(), this is
@@ -548,33 +641,22 @@ class LoggingCog(BaseCog, name='Logging'):
548 641
 
549 642
 		https://discordpy.readthedocs.io/en/stable/api.html#discord.on_raw_message_delete
550 643
 		"""
551
-		if payload.cached_message:
552
-			message = payload.cached_message
553
-			guild = message.guild
554
-			if not self.get_guild_setting(guild, self.SETTING_ENABLED):
555
-				return
556
-			if message.author.id == self.bot.user.id:
557
-				return
558
-			text = f'Message by {self.__describe_user(message.author)} deleted from {message.channel.mention}. ' + \
559
-				f'Markdown:\n{self.__quote_markdown(message.content)}'
560
-			for attachment in message.attachments or []:
561
-				text += f'\n> * 📎 {attachment.url}'
562
-			for embed in message.embeds or []:
563
-				text += f'\n> * 🔗 {embed.url}'
564
-			bot_message = BotMessage(message.guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
565
-			await bot_message.update()
566
-		else:
567
-			guild = self.bot.get_guild(payload.guild_id) or await self.bot.fetch_guild(payload.guild_id)
568
-			if not guild:
569
-				return
570
-			if not self.get_guild_setting(guild, self.SETTING_ENABLED):
571
-				return
572
-			channel = guild.get_channel(payload.channel_id) or await guild.fetch_channel(payload.channel_id)
573
-			if not channel:
574
-				return
575
-			text = f'Message {payload.message_id} deleted in ' + channel.mention + ' but content and author not available in cache.'
576
-			bot_message = BotMessage(guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
577
-			await bot_message.update()
644
+		message = payload.cached_message
645
+		if message and message.author.id == self.bot.user.id:
646
+			return
647
+		guild = (message.guild if message else None) or \
648
+				self.bot.get_guild(payload.guild_id) or \
649
+				await self.bot.fetch_guild(payload.guild_id)
650
+		if guild is None:
651
+			return
652
+		if not self.get_guild_setting(guild, self.SETTING_ENABLED):
653
+			return
654
+		channel = (message.channel if message else None) or \
655
+				  self.bot.get_channel(payload.channel_id) or \
656
+				  await guild.fetch_channel(payload.channel_id)
657
+		if channel is None:
658
+			return
659
+		self.__buffer_event(guild, 'delete', BufferedMessageDeleteEvent(guild, channel, payload.message_id, message))
578 660
 
579 661
 	@commands.Cog.listener()
580 662
 	async def on_raw_bulk_message_delete(self, payload: RawBulkMessageDeleteEvent) -> None:
@@ -594,27 +676,66 @@ class LoggingCog(BaseCog, name='Logging'):
594 676
 		if not self.get_guild_setting(guild, self.SETTING_ENABLED):
595 677
 			return
596 678
 		channel = guild.get_channel(payload.channel_id) or await guild.fetch_channel(payload.channel_id)
597
-		count = len(payload.message_ids)
598
-		cached_count = len(payload.cached_messages)
599
-		uncached_count = count - cached_count
600
-		text = f'Bulk deletion of {count} message(s) from {channel.mention}.'
601
-		if uncached_count == count:
602
-			text += f' No cached content available for any of them.'
603
-		elif uncached_count > 0:
604
-			text += f' No cached content available for {uncached_count} of them.'
605
-		bot_message = BotMessage(guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
606
-		await bot_message.update()
607
-
608
-		for message in payload.cached_messages:
609
-			text = f'Message by {self.__describe_user(message.author)} bulk deleted from {message.channel.mention}. ' + \
610
-				f'Markdown:\n{self.__quote_markdown(message.content)}'
611
-			for attachment in message.attachments or []:
612
-				text += f'\n> * 📎 {attachment.url}'
613
-			for embed in message.embeds or []:
614
-				text += f'\n> * 🔗 {embed.url}'
679
+		for message_id in payload.message_ids:
680
+			message = None
681
+			for cached_message in payload.cached_messages:
682
+				if cached_message.id == message_id:
683
+					message = cached_message
684
+			self.__buffer_event(guild, 'delete', BufferedMessageDeleteEvent(guild, channel, message_id, message))
685
+
686
+	async def __flush_delete_buffers(self, guild: Guild, events: list[BufferedMessageDeleteEvent]) -> None:
687
+		simple_deletes: list[BufferedMessageDeleteEvent] = []
688
+		complex_deletes: list[BufferedMessageDeleteEvent] = []
689
+		for event in events:
690
+			if event.message is not None:
691
+				simple_deletes.append(event)
692
+			else:
693
+				complex_deletes.append(event)
694
+		if len(simple_deletes) <= 3:
695
+			# Small number of deletes with complete info
696
+			for event in simple_deletes:
697
+				await self.__handle_complete_delete_event(event)
698
+		else:
699
+			complex_deletes = events
700
+		if len(complex_deletes) > 0:
701
+			messages_per_author: dict[Optional[User], list[BufferedMessageDeleteEvent]] = self.__groupby(complex_deletes, lambda e: e.author)
702
+			text = 'Multiple messages deleted' if len(complex_deletes) > 1 else 'Message deleted'
703
+			row_count = 0
704
+			for author, messages in messages_per_author.items():
705
+				row_count += 1
706
+				if row_count > 10:
707
+					break
708
+				count = len(messages)
709
+				text += f'\n- {count} {"message" if count == 1 else "messages"} by {author.mention if author else "unavailable user"}'
710
+				if count == 1:
711
+					text += f' in {messages[0].channel.mention}'
712
+				else:
713
+					messages_by_channel: dict[GuildChannel, list[BufferedMessageDeleteEvent]] = self.__groupby(messages, lambda e: e.channel)
714
+					if len(messages_by_channel) == 1:
715
+						text += f' in {messages[0].channel.mention}'
716
+					else:
717
+						for channel, ch_messages in messages_by_channel.items():
718
+							row_count += 1
719
+							if row_count > 10:
720
+								break
721
+							ch_count = len(ch_messages)
722
+							text += f'\n   - {ch_count} in {channel.mention}'
723
+			if row_count > 10:
724
+				text += '- ...more omitted...'
615 725
 			bot_message = BotMessage(guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
616 726
 			await bot_message.update()
617 727
 
728
+	async def __handle_complete_delete_event(self, event: BufferedMessageDeleteEvent) -> None:
729
+		message: Message = event.message
730
+		text = f'Message by {self.__describe_user(message.author)} deleted from {message.channel.mention}. ' + \
731
+			   f'Markdown:\n{self.__quote_markdown(message.content)}'
732
+		for attachment in message.attachments or []:
733
+			text += f'\n> * 📎 {attachment.url}'
734
+		for embed in message.embeds or []:
735
+			text += f'\n> * 🔗 {embed.url}'
736
+		bot_message = BotMessage(message.guild, text, BotMessage.TYPE_LOG, suppress_embeds=True)
737
+		await bot_message.update()
738
+
618 739
 	# Events - Roles
619 740
 
620 741
 	@commands.Cog.listener()
@@ -778,3 +899,14 @@ class LoggingCog(BaseCog, name='Logging'):
778 899
 		markdown_b = re.sub(r'[\ue000-\uefff]', unsub_token, markdown_b)
779 900
 
780 901
 		return markdown_a, markdown_b
902
+
903
+	def __groupby(self, a_list: list, grouper: Callable[[any], any]) -> dict:
904
+		"""itertools.groupby just less annoying"""
905
+		d = {}
906
+		for elem in a_list:
907
+			key = grouper(elem)
908
+			if key in d:
909
+				d[key].append(elem)
910
+			else:
911
+				d[key] = [elem]
912
+		return d

+ 2
- 1
rocketbot/storage.py Voir le fichier

@@ -92,7 +92,7 @@ class Storage:
92 92
 			'load from disk.')
93 93
 		config = cls.__read_guild_config(guild)
94 94
 		if config is None:
95
-			return {}
95
+			config = {}
96 96
 		cls.__guild_id_to_config[guild.id] = config
97 97
 		return config
98 98
 
@@ -154,6 +154,7 @@ class Storage:
154 154
 		path: str = cls.__guild_config_path(guild)
155 155
 		cls.__trace(f'Saving config for guild {guild.id} to {path}')
156 156
 		cls.__trace(f'config = {config}')
157
+		config['_guild_name'] = guild.name  # Just for making JSON files easier to identify
157 158
 		with open(path, 'w', encoding='utf8') as file:
158 159
 			# Pretty printing to make more legible for debugging
159 160
 			# Sorting keys to help with diffs

Chargement…
Annuler
Enregistrer