Experimental Discord bot written in Python
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

crosspostcog.py 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. """
  2. Cog for detecting spam messages posted in multiple channels.
  3. """
  4. import re
  5. from datetime import datetime, timedelta
  6. from typing import Optional
  7. from discord import Member, Message, utils as discordutils, TextChannel
  8. from discord.ext.commands import Cog
  9. from config import CONFIG
  10. from rocketbot.cogs.basecog import BaseCog, BotMessage, BotMessageReaction, CogSetting
  11. from rocketbot.collections import AgeBoundList, AgeBoundDict
  12. from rocketbot.storage import Storage
  13. from rocketbot.utils import str_from_timedelta
  14. class SpamContext:
  15. """
  16. Data about a set of duplicate messages from a user.
  17. """
  18. def __init__(self, member: Member) -> None:
  19. self.member: Member = member
  20. self.age: datetime = datetime.now()
  21. self.bot_message: Optional[BotMessage] = None
  22. self.is_kicked: bool = False
  23. self.is_banned: bool = False
  24. self.is_autobanned: bool = False
  25. self.spam_messages: set[Message] = set()
  26. self.deleted_messages: set[Message] = set()
  27. self.unique_channels: set[TextChannel] = set()
  28. self.duplicate_count: int = 0
  29. class CrossPostCog(BaseCog, name='Crosspost Detection'):
  30. """
  31. Detects a user posting in multiple channels in a short period
  32. of time: a common pattern for spammers.
  33. """
  34. SETTING_ENABLED = CogSetting(
  35. 'enabled',
  36. bool,
  37. default_value=False,
  38. brief='crosspost detection',
  39. description='Whether crosspost detection is enabled.',
  40. )
  41. SETTING_WARN_COUNT = CogSetting(
  42. 'warncount',
  43. int,
  44. default_value=5,
  45. brief='number of messages to trigger a warning',
  46. description='The number of unique channels messages are ' + \
  47. 'posted in by the same user to trigger a mod warning. The ' + \
  48. 'messages need not be identical (see dupewarncount).',
  49. min_value=2,
  50. )
  51. SETTING_DUPE_WARN_COUNT = CogSetting(
  52. 'dupewarncount',
  53. int,
  54. default_value=3,
  55. brief='number of identical messages to trigger a warning',
  56. description='The number of unique channels identical messages are ' + \
  57. 'posted in by the same user to trigger a mod warning.',
  58. min_value=2,
  59. )
  60. SETTING_BAN_COUNT = CogSetting(
  61. 'bancount',
  62. int,
  63. default_value=9999,
  64. brief='number of messages to trigger a ban',
  65. description='The number of unique channels messages are ' + \
  66. 'posted in by the same user to trigger an automatic ban. The ' + \
  67. 'messages need not be identical (see dupebancount). Set ' + \
  68. 'to a large value to effectively disable, e.g. 9999.',
  69. min_value=2,
  70. )
  71. SETTING_DUPE_BAN_COUNT = CogSetting(
  72. 'dupebancount',
  73. int,
  74. default_value=9999,
  75. brief='number of identical messages to trigger a ban',
  76. description='The number of unique channels identical messages are ' + \
  77. 'posted in by the same user to trigger an automatic ban. Set ' + \
  78. 'to a large value to effectively disable, e.g. 9999.',
  79. min_value=2,
  80. )
  81. SETTING_MIN_LENGTH = CogSetting(
  82. 'minlength',
  83. int,
  84. default_value=1,
  85. brief='minimum message length',
  86. description='The minimum number of characters in a message to be ' + \
  87. 'checked for duplicates. This can help ignore common short ' + \
  88. 'messages like "lol" or a single emoji.',
  89. min_value=1,
  90. )
  91. SETTING_TIMESPAN = CogSetting(
  92. 'timespan',
  93. timedelta,
  94. default_value=timedelta(seconds=60),
  95. brief='time window to look for dupe messages',
  96. description='The number of seconds of message history to look at '
  97. 'when looking for duplicates. Shorter values are preferred, '
  98. 'both to detect bots and avoid excessive memory usage.',
  99. min_value=timedelta(seconds=1),
  100. )
  101. STATE_KEY_RECENT_MESSAGES = "CrossPostCog.recent_messages"
  102. STATE_KEY_SPAM_CONTEXT = "CrossPostCog.spam_context"
  103. def __init__(self, bot):
  104. super().__init__(
  105. bot,
  106. config_prefix='crosspost',
  107. short_description='Manages crosspost detection and handling.',
  108. long_description='Detects a user posting in multiple channels in a short period of '
  109. 'time: a common pattern for spammers.\n'
  110. '\n'
  111. "These used to be identical text, but more recent attacks have had "
  112. "small variations, such as different imgur URLs. It's reasonable to "
  113. "treat posting in many channels in a short period as suspicious on its "
  114. "own, regardless of whether they are identical.\n"
  115. "\n"
  116. "Repeated posts in the same channel aren't currently detected, as "
  117. "this can often be for a reason or due to trying a failed post when "
  118. "connectivity is poor. Minimum message length can be enforced for "
  119. "detection.",
  120. )
  121. self.add_setting(CrossPostCog.SETTING_ENABLED)
  122. self.add_setting(CrossPostCog.SETTING_WARN_COUNT)
  123. self.add_setting(CrossPostCog.SETTING_DUPE_WARN_COUNT)
  124. self.add_setting(CrossPostCog.SETTING_BAN_COUNT)
  125. self.add_setting(CrossPostCog.SETTING_DUPE_BAN_COUNT)
  126. self.add_setting(CrossPostCog.SETTING_MIN_LENGTH)
  127. self.add_setting(CrossPostCog.SETTING_TIMESPAN)
  128. self.max_spam_contexts = 12
  129. async def __record_message(self, message: Message) -> None:
  130. if message.channel.permissions_for(message.author).ban_members:
  131. # User exempt from spam detection
  132. self.__trace("User exempt from crosspost checks")
  133. return
  134. def compute_message_hash(m: Message) -> int:
  135. to_hash = m.content
  136. # URLs sometimes differ per spam message, so simplify them
  137. url_regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
  138. to_hash = re.sub(url_regex, '<url>', to_hash)
  139. # Add attachment metadata
  140. for attachment in m.attachments:
  141. to_hash += f'\n[[ATT: ct={attachment.content_type} s={attachment.size} w={attachment.width} h={attachment.height}]]'
  142. h = hash(to_hash)
  143. self.__trace(f"Message hash for {m.id} is {h}")
  144. return h
  145. min_length = self.get_guild_setting(message.guild, self.SETTING_MIN_LENGTH)
  146. if len(message.attachments) == 0 and len(message.content) < min_length:
  147. # Message too short to count towards spam total
  148. self.__trace(f"Message len {len(message.content)} < {min_length}")
  149. return
  150. # Get config
  151. max_age = timedelta(seconds=self.get_guild_setting(message.guild, self.SETTING_TIMESPAN))
  152. warn_count: int = self.get_guild_setting(message.guild, self.SETTING_WARN_COUNT)
  153. dupe_warn_count: int = self.get_guild_setting(message.guild, self.SETTING_DUPE_WARN_COUNT)
  154. # Record message
  155. recent_messages: AgeBoundList[Message, datetime, timedelta] = Storage.get_state_value(message.guild, self.STATE_KEY_RECENT_MESSAGES)
  156. if recent_messages is None:
  157. recent_messages = AgeBoundList(max_age, lambda index, message : message.created_at)
  158. Storage.set_state_value(message.guild, self.STATE_KEY_RECENT_MESSAGES, recent_messages)
  159. recent_messages.max_age = max_age
  160. recent_messages.append(message)
  161. self.__trace(f"Recent messages now length {len(recent_messages)}")
  162. # Get all recent messages by user
  163. member_messages = [m for m in recent_messages if m.author.id == message.author.id]
  164. message_count = len(member_messages)
  165. self.__trace(f"Found {message_count} messages for {message.author.name}")
  166. if message_count < warn_count and message_count < dupe_warn_count:
  167. self.__trace(f"Bailing because message count {message_count} < warn count {warn_count} and < dupe warn count {dupe_warn_count}")
  168. return
  169. # Look for identical(ish) messages and unique channels
  170. hash_to_channels: dict[int, set[TextChannel]] = {}
  171. unique_channels: set[TextChannel] = set()
  172. max_duplicate_count = 0
  173. for m in member_messages:
  174. message_hash = compute_message_hash(m)
  175. dupe_message_channels: set[TextChannel] = hash_to_channels.get(message_hash)
  176. if dupe_message_channels is None:
  177. dupe_message_channels = set()
  178. hash_to_channels[message_hash] = dupe_message_channels
  179. dupe_message_channels.add(m.channel)
  180. unique_channels.add(m.channel)
  181. max_duplicate_count = max(max_duplicate_count, len(dupe_message_channels))
  182. channel_count = len(unique_channels)
  183. self.__trace(f"Found {len(hash_to_channels)} unique messages, {channel_count} unique channels, {max_duplicate_count} duplicated messages")
  184. if channel_count < warn_count and max_duplicate_count < dupe_warn_count:
  185. self.__trace(f"Bailing because channels {channel_count} < warn count {warn_count} and max dupes {max_duplicate_count} < dupe warn count {dupe_warn_count}")
  186. return
  187. # This person is a problem
  188. spam_lookup: AgeBoundDict[str, SpamContext, datetime, timedelta] = Storage.get_state_value(message.guild, self.STATE_KEY_SPAM_CONTEXT)
  189. if spam_lookup is None:
  190. spam_lookup = AgeBoundDict(
  191. max_age,
  192. lambda key, context : context.age)
  193. Storage.set_state_value(message.guild, self.STATE_KEY_SPAM_CONTEXT, spam_lookup)
  194. key = f'{message.author.id}'
  195. context = spam_lookup.get(key)
  196. if context is not None and message.created_at - context.age > max_age:
  197. context = None
  198. if context is None:
  199. context = SpamContext(message.author)
  200. spam_lookup[key] = context
  201. self.log(message.guild,
  202. f'\u0007{message.author.name} ({message.author.id}) ' + \
  203. f'posted messages in {channel_count} channels.')
  204. context.age = message.created_at
  205. context.duplicate_count = max_duplicate_count
  206. context.spam_messages.update(member_messages)
  207. context.unique_channels.update(unique_channels)
  208. await self.__update_from_context(context)
  209. async def __update_from_context(self, context: SpamContext):
  210. ban_count = self.get_guild_setting(context.member.guild, self.SETTING_BAN_COUNT)
  211. dupe_ban_count = self.get_guild_setting(context.member.guild, self.SETTING_DUPE_BAN_COUNT)
  212. channel_count = len(context.unique_channels)
  213. if channel_count >= ban_count or context.duplicate_count >= dupe_ban_count:
  214. if not context.is_banned:
  215. max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
  216. max_age_str = str_from_timedelta(max_age)
  217. await context.member.ban(
  218. reason=f'Rocketbot: Posted in {channel_count} channels within {max_age_str} ' + \
  219. f'({context.duplicate_count} identical). Banned by {self.bot.user.name}.',
  220. delete_message_days=1)
  221. context.is_kicked = True
  222. context.is_banned = True
  223. context.is_autobanned = True
  224. context.deleted_messages |= context.spam_messages
  225. self.__log_ban(context, self.bot.user.name)
  226. else:
  227. # Already banned. Nothing to update in the message.
  228. return
  229. await self.__update_message_from_context(context)
  230. async def __update_message_from_context(self, context: SpamContext) -> None:
  231. first_spam_message: Message = sorted(list(context.spam_messages), key=lambda m: m.created_at)[0]
  232. spam_count = len(context.spam_messages)
  233. channel_count = len(context.unique_channels)
  234. deleted_count = len(context.deleted_messages)
  235. duplicate_count = context.duplicate_count
  236. max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
  237. max_age_str = str_from_timedelta(max_age)
  238. message = context.bot_message
  239. if message is None:
  240. message_type: int = BotMessage.TYPE_INFO if self.was_warned_recently(context.member) \
  241. else BotMessage.TYPE_MOD_WARNING
  242. message = BotMessage(context.member.guild, '', message_type, context)
  243. message.quote = discordutils.remove_markdown(first_spam_message.clean_content)
  244. self.record_warning(context.member)
  245. if context.is_autobanned:
  246. text = f'User {context.member.mention} auto banned for ' + \
  247. f'posting messages in {channel_count} channels within {max_age_str} ' + \
  248. f'({duplicate_count} identical). Messages from past 24 hours deleted.'
  249. await message.set_reactions([])
  250. await message.set_text(text)
  251. else:
  252. body: str = f'User {context.member.mention} posted '
  253. if duplicate_count == channel_count:
  254. body += f'identical messages in {channel_count} channels within {max_age_str} .'
  255. elif duplicate_count == 1:
  256. body += f'**different** messages in {channel_count} channels within ' + \
  257. f'{max_age_str}. (Showing first one).'
  258. else:
  259. body += f'messages in {channel_count} channels within {max_age_str} ' + \
  260. f'({duplicate_count} are identical, showing first one).'
  261. max_links = 10
  262. for msg in sorted(list(context.spam_messages), key=lambda m: m.created_at)[:max_links]:
  263. body += f'\n- {msg.jump_url}'
  264. if len(context.spam_messages) > max_links:
  265. body += f'\n- ...{len(context.spam_messages) - max_links} more...'
  266. await message.set_text(body)
  267. await message.set_reactions(BotMessageReaction.standard_set(
  268. did_delete = deleted_count >= spam_count,
  269. message_count = spam_count,
  270. did_kick = context.is_kicked,
  271. did_ban = context.is_banned))
  272. if context.bot_message is None:
  273. await self.post_message(message)
  274. context.bot_message = message
  275. async def on_mod_react(self,
  276. bot_message: BotMessage,
  277. reaction: BotMessageReaction,
  278. reacted_by: Member) -> None:
  279. context: SpamContext = bot_message.context
  280. if context is None:
  281. return
  282. channel_count = len(context.unique_channels)
  283. if reaction.emoji == CONFIG['trash_emoji']:
  284. for message in context.spam_messages - context.deleted_messages:
  285. await message.delete()
  286. context.deleted_messages.add(message)
  287. await self.__update_from_context(context)
  288. self.__log_deletion(context, reacted_by.name)
  289. elif reaction.emoji == CONFIG['kick_emoji']:
  290. await context.member.kick(
  291. reason=f'Rocketbot: Posted messages in {channel_count} ' + \
  292. f'channels. Kicked by {reacted_by.name}.')
  293. context.is_kicked = True
  294. await self.__update_from_context(context)
  295. self.__log_kick(context, reacted_by.name)
  296. elif reaction.emoji == CONFIG['ban_emoji']:
  297. await context.member.ban(
  298. reason=f'Rocketbot: Posted messages in {channel_count} ' + \
  299. f'channels. Banned by {reacted_by.name}.',
  300. delete_message_days=1)
  301. context.deleted_messages |= context.spam_messages
  302. context.is_kicked = True
  303. context.is_banned = True
  304. await self.__update_from_context(context)
  305. self.__log_ban(context, reacted_by.name)
  306. def __log_deletion(self, context: SpamContext, by_who: str) -> None:
  307. max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
  308. max_age_str = str_from_timedelta(max_age)
  309. channel_count = len(context.unique_channels)
  310. duplicate_count = context.duplicate_count
  311. self.log(context.member.guild,
  312. f'{context.member.name} ({context.member.id}) posted ' + \
  313. f'messages in {channel_count} channels withint {max_age_str} ' + \
  314. f'({duplicate_count} identical). Deleted by {by_who}.')
  315. def __log_kick(self, context: SpamContext, by_who: str) -> None:
  316. max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
  317. max_age_str = str_from_timedelta(max_age)
  318. channel_count = len(context.unique_channels)
  319. duplicate_count = context.duplicate_count
  320. self.log(context.member.guild,
  321. f'{context.member.name} ({context.member.id}) posted ' + \
  322. f'messages in {channel_count} channels within {max_age_str} ' + \
  323. f'({duplicate_count} identical). Kicked by {by_who}.')
  324. def __log_ban(self, context: SpamContext, by_who: str) -> None:
  325. max_age = timedelta(seconds=self.get_guild_setting(context.member.guild, self.SETTING_TIMESPAN))
  326. max_age_str = str_from_timedelta(max_age)
  327. channel_count = len(context.unique_channels)
  328. duplicate_count = context.duplicate_count
  329. self.log(context.member.guild,
  330. f'{context.member.name} ({context.member.id}) posted ' + \
  331. f'messages in {channel_count} channels within {max_age_str} ' + \
  332. f'({duplicate_count} identical). Banned by {by_who}.')
  333. def __trace(self, message):
  334. # print(message)
  335. pass
  336. @Cog.listener()
  337. async def on_message(self, message: Message):
  338. """Event handler"""
  339. if message.author is None or \
  340. message.author.bot or \
  341. message.channel is None or \
  342. message.guild is None:
  343. return
  344. if not self.get_guild_setting(message.guild, self.SETTING_ENABLED):
  345. return
  346. self.__trace("--ON MESSAGE--")
  347. await self.__record_message(message)