Experimental Discord bot written in Python
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

urlspamcog.py 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. """
  2. Cog for detecting URLs posted by new users.
  3. """
  4. import re
  5. from datetime import timedelta
  6. from typing import Literal
  7. from discord import Member, Message
  8. from discord import utils as discordutils
  9. from discord.ext.commands import Cog
  10. from discord.utils import escape_markdown
  11. from config import CONFIG
  12. from rocketbot.cogs.basecog import BaseCog, BotMessage, BotMessageReaction, CogSetting
  13. from rocketbot.utils import describe_timedelta
  14. class URLSpamContext:
  15. """
  16. Data about a suspected spam message containing a URL.
  17. """
  18. def __init__(self, spam_message: Message):
  19. self.spam_message = spam_message
  20. self.is_deleted = False
  21. self.is_kicked = False
  22. self.is_banned = False
  23. class URLSpamCog(BaseCog, name='URL Spam'):
  24. """
  25. Detects users posting URLs who just joined recently: a common spam pattern.
  26. Can be configured to take immediate action or just warn the mods.
  27. """
  28. SETTING_ENABLED = CogSetting(
  29. 'enabled',
  30. bool,
  31. default_value=False,
  32. brief='URL spam detection',
  33. description='Whether URLs posted soon after joining are flagged.',
  34. )
  35. SETTING_ACTION = CogSetting(
  36. 'action',
  37. Literal['nothing', 'modwarn', 'delete', 'kick', 'ban'],
  38. default_value='nothing',
  39. brief='action to take on spam',
  40. description='The action to take on detected URL spam.',
  41. enum_values={'nothing', 'modwarn', 'delete', 'kick', 'ban'},
  42. )
  43. SETTING_JOIN_AGE = CogSetting(
  44. 'joinage',
  45. timedelta,
  46. default_value=timedelta(minutes=15),
  47. brief='seconds since member joined',
  48. description='The minimum seconds since the user joined the '
  49. 'server before they can post URLs. URLs posted by users '
  50. 'who joined too recently will be flagged. Keep in mind '
  51. 'many servers have a minimum 10 minute cooldown before '
  52. 'new members can say anything. Setting to 0 effectively '
  53. 'disables URL spam detection.',
  54. min_value=timedelta(seconds=0),
  55. )
  56. SETTING_DECEPTIVE_ACTION = CogSetting(
  57. 'deceptiveaction',
  58. Literal['nothing', 'modwarn', 'modwarndelete', 'chatwarn', 'chatwarndelete', 'delete', 'kick', 'ban'],
  59. default_value='nothing',
  60. brief='action to take on deceptive link markdown',
  61. description='The action to take on chat messages with links '
  62. 'where the text looks like a different URL than the actual link.',
  63. enum_values={'nothing', 'modwarn', 'modwarndelete',
  64. 'chatwarn', 'chatwarndelete', 'delete', 'kick', 'ban'},
  65. )
  66. def __init__(self, bot):
  67. super().__init__(
  68. bot,
  69. config_prefix='urlspam',
  70. short_description='Manages URL spam detection.',
  71. )
  72. self.add_setting(URLSpamCog.SETTING_ENABLED)
  73. self.add_setting(URLSpamCog.SETTING_ACTION)
  74. self.add_setting(URLSpamCog.SETTING_JOIN_AGE)
  75. self.add_setting(URLSpamCog.SETTING_DECEPTIVE_ACTION)
  76. @Cog.listener()
  77. async def on_message(self, message: Message):
  78. """Event listener"""
  79. if message.author is None or \
  80. message.author.bot or \
  81. message.guild is None or \
  82. message.channel is None or \
  83. message.content is None:
  84. return
  85. if not self.get_guild_setting(message.guild, self.SETTING_ENABLED):
  86. return
  87. await self.check_message_recency(message)
  88. await self.check_deceptive_links(message)
  89. async def check_message_recency(self, message: Message):
  90. """Checks if the message was sent too recently by a new user"""
  91. action = self.get_guild_setting(message.guild, self.SETTING_ACTION)
  92. join_seconds = self.get_guild_setting(message.guild, self.SETTING_JOIN_AGE)
  93. min_join_age = timedelta(seconds=join_seconds)
  94. if action == 'nothing':
  95. return
  96. if not self.__contains_url(message.content):
  97. return
  98. join_age = message.created_at - message.author.joined_at
  99. join_age_str = describe_timedelta(join_age)
  100. if join_age < min_join_age:
  101. context = URLSpamContext(message)
  102. needs_attention = False
  103. if action == 'modwarn':
  104. needs_attention = not self.was_warned_recently(message.author)
  105. self.log(message.guild, f'New user {message.author.name} ' + \
  106. f'({message.author.id}) posted URL {join_age_str} after ' + \
  107. 'joining.' + (' Mods alerted.' if needs_attention else ''))
  108. elif action == 'delete':
  109. await message.delete()
  110. context.is_deleted = True
  111. self.log(message.guild, f'New user {message.author.name} ' + \
  112. f'({message.author.id}) posted URL {join_age_str} after ' + \
  113. 'joining. Message deleted.')
  114. elif action == 'kick':
  115. await message.delete()
  116. context.is_deleted = True
  117. await message.author.kick(
  118. reason=f'Rocketbot: Posted a link {join_age_str} after joining')
  119. context.is_kicked = True
  120. self.log(message.guild, f'New user {message.author.name} ' + \
  121. f'({message.author.id}) posted URL {join_age_str} after ' + \
  122. 'joining. User kicked.')
  123. elif action == 'ban':
  124. await message.author.ban(
  125. reason=f'Rocketbot: User posted a link {join_age_str} after joining',
  126. delete_message_days=1)
  127. context.is_deleted = True
  128. context.is_kicked = True
  129. context.is_banned = True
  130. self.log(message.guild, f'New user {message.author.name} ' + \
  131. f'({message.author.id}) posted URL {join_age_str} after ' + \
  132. 'joining. User banned.')
  133. bm = BotMessage(
  134. message.guild,
  135. f'User {message.author.mention} posted a URL ' + \
  136. f'{join_age_str} after joining: {message.jump_url}',
  137. type = BotMessage.TYPE_MOD_WARNING if needs_attention else BotMessage.TYPE_INFO,
  138. context = context)
  139. bm.quote = discordutils.remove_markdown(message.clean_content)
  140. await bm.set_reactions(BotMessageReaction.standard_set(
  141. did_delete=context.is_deleted,
  142. did_kick=context.is_kicked,
  143. did_ban=context.is_banned))
  144. await self.post_message(bm)
  145. if needs_attention:
  146. self.record_warning(message.author)
  147. async def check_deceptive_links(self, message: Message):
  148. """
  149. Checks if the message contains deceptive URL Markdown, e.g.
  150. `[nicewebsite.com](https://evilwebsite.com)'`
  151. """
  152. action = self.get_guild_setting(message.guild, self.SETTING_DECEPTIVE_ACTION)
  153. if action is None or action == 'nothing':
  154. return
  155. if not self.contains_deceptive_links(message.content):
  156. return
  157. mod_text = f'User {message.author.name} ({message.author.id}) posted a deceptive link. {message.jump_url}'
  158. quoted = '> ' + escape_markdown(message.content).replace('\n', '\n> ')
  159. mod_text += f'\n\n{quoted}'
  160. self.log(message.guild, f'{message.author.name} posted deceptive link - action: {action}')
  161. if 'modwarn' in action:
  162. if 'delete' in action:
  163. mod_text += '\n\nMessage deleted'
  164. else:
  165. mod_text += f'\n\n{message.jump_url}'
  166. bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_MOD_WARNING, suppress_embeds=True)
  167. await self.post_message(bm)
  168. if 'delete' in action:
  169. await message.delete()
  170. elif 'chatwarn' in action:
  171. if 'delete' in action:
  172. response = ':warning: Links with deceptive labels are prohibited :warning:'
  173. else:
  174. response = ':warning: Message contains a deceptively labeled link! Click carefully. :warning:'
  175. await message.reply(response, mention_author=False)
  176. if 'delete' in action:
  177. await message.delete()
  178. elif action == 'delete':
  179. mod_text += '\n\nDeleting message'
  180. bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_INFO, suppress_embeds=True)
  181. await self.post_message(bm)
  182. await message.delete()
  183. elif action == 'kick':
  184. mod_text += '\n\nUser kicked'
  185. bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_MOD_WARNING, suppress_embeds=True)
  186. await self.post_message(bm)
  187. await message.delete()
  188. await message.author.kick(
  189. reason='Rocketbot: User posted a deceptive link')
  190. elif action == 'ban':
  191. mod_text += '\n\nUser banned'
  192. bm = BotMessage(message.guild, mod_text, BotMessage.TYPE_MOD_WARNING, suppress_embeds=True)
  193. await self.post_message(bm)
  194. await message.author.ban(
  195. reason='Rocketbot: User posted a deceptive link',
  196. delete_message_days=1)
  197. def contains_deceptive_links(self, content: str) -> bool:
  198. # Strip Markdown that can safely contain URL sequences
  199. content = re.sub(r'`[^`]+`', '', content) # `inline code`
  200. content = re.sub(r'```.+?```', '', content, re.DOTALL) # ``` code block ```
  201. matches = re.findall(r'\[([^]]+)]\(([^)]+)\)', content)
  202. for match in matches:
  203. original_label: str = match[0].strip()
  204. original_link: str = match[1].strip()
  205. label: str = original_label
  206. link: str = original_link
  207. if link.startswith('<') and link.endswith('>'):
  208. link = link[1:-1]
  209. if self.is_url(label):
  210. if label != link:
  211. return True
  212. elif self.is_casual_url(label):
  213. # Trim www. for easier comparisons.
  214. if link.startswith('https://www.'):
  215. link = 'https://' + link[12:]
  216. if link.startswith('http://www.'):
  217. link = 'http://' + link[11:]
  218. if link.endswith('/'):
  219. link = link[:-1]
  220. if label.startswith('www.'):
  221. label = label[4:]
  222. if label.endswith('/'):
  223. label = label[:-1]
  224. if link.startswith('https://') and 'https://' + label != link:
  225. return True
  226. elif link.startswith('http://') and 'http://' + label != link:
  227. return True
  228. return False
  229. def is_url(self, s: str) -> bool:
  230. """Tests if a string is strictly a URL"""
  231. ipv6_host_pattern = r'\[[0-9a-fA-F:]+\]'
  232. ipv4_host_pattern = r'[0-9\.]+'
  233. hostname_pattern = r'[a-zA-Z0-9-]+\.[a-zA-Z0-9-\.]+'
  234. host_pattern = r'(?:' + ipv6_host_pattern + '|' + ipv4_host_pattern + '|' + hostname_pattern + ')'
  235. port_pattern = '(?::[0-9]+)?'
  236. path_pattern = r'(?:/[^ \]\)]*)?'
  237. pattern = r'^http[s]?://' + host_pattern + port_pattern + path_pattern + '$'
  238. return re.match(pattern, s, re.IGNORECASE) is not None
  239. def is_casual_url(self, s: str) -> bool:
  240. """Tests if a string is a "casual URL" with no scheme included"""
  241. ipv6_host_pattern = r'\[[0-9a-fA-F:]+\]'
  242. ipv4_host_pattern = r'[0-9\.]+'
  243. hostname_pattern = r'[a-zA-Z0-9-]+\.[a-zA-Z0-9-\.]+'
  244. host_pattern = r'(?:' + ipv6_host_pattern + '|' + ipv4_host_pattern + '|' + hostname_pattern + ')'
  245. port_pattern = '(?::[0-9]+)?'
  246. path_pattern = r'(?:/[^ \]\)]*)?'
  247. pattern = r'^' + host_pattern + port_pattern + path_pattern + '$'
  248. return re.match(pattern, s, re.IGNORECASE) is not None
  249. async def on_mod_react(self,
  250. bot_message: BotMessage,
  251. reaction: BotMessageReaction,
  252. reacted_by: Member) -> None:
  253. context: URLSpamContext = bot_message.context
  254. if context is None:
  255. return
  256. sm: Message = context.spam_message
  257. if reaction.emoji == CONFIG['trash_emoji']:
  258. if not context.is_deleted:
  259. await sm.delete()
  260. context.is_deleted = True
  261. self.log(sm.guild, f'URL spam by {sm.author.name} deleted ' + \
  262. f'by {reacted_by.name}')
  263. elif reaction.emoji == CONFIG['kick_emoji']:
  264. if not context.is_deleted:
  265. await sm.delete()
  266. context.is_deleted = True
  267. if not context.is_kicked:
  268. await sm.author.kick(
  269. reason=f'Rocketbot: Kicked for URL spam by {reacted_by.name}')
  270. context.is_kicked = True
  271. self.log(sm.guild, f'URL spammer {sm.author.name} kicked ' + \
  272. f'by {reacted_by.name}')
  273. elif reaction.emoji == CONFIG['ban_emoji']:
  274. if not context.is_banned:
  275. await sm.author.ban(
  276. reason=f'Rocketbot: Banned for URL spam by {reacted_by.name}',
  277. delete_message_days=1)
  278. context.is_deleted = True
  279. context.is_kicked = True
  280. context.is_banned = True
  281. self.log(sm.guild, f'URL spammer {sm.author.name} banned ' + \
  282. f'by {reacted_by.name}')
  283. else:
  284. return
  285. await bot_message.set_reactions(BotMessageReaction.standard_set(
  286. did_delete=context.is_deleted,
  287. did_kick=context.is_kicked,
  288. did_ban=context.is_banned))
  289. @classmethod
  290. def __contains_url(cls, text: str) -> bool:
  291. p = re.compile(r'https?://\S+')
  292. return p.search(text) is not None