Experimental Discord bot written in Python
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

videopreviewcog.py 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. import asyncio
  2. import json
  3. import re
  4. import subprocess
  5. from datetime import timedelta
  6. from discord import Message
  7. from discord.ext.commands import Cog
  8. from rocketbot.cogs.basecog import BaseCog
  9. from rocketbot.cogsetting import CogSetting
  10. from rocketbot.utils import (
  11. blockquote_markdown,
  12. format_bytes,
  13. suppress_markdown_url_previews,
  14. )
  15. def filter_video_format(format: dict) -> bool:
  16. if format.get('resolution') == 'audio only':
  17. return False
  18. if format.get('format_note') == 'DASH audio':
  19. return False
  20. return True
  21. def rank_video_format(format: dict) -> tuple:
  22. content = 0
  23. if format.get('resolution') == 'audio only':
  24. content = 1
  25. elif format.get('format_note') == 'DASH audio':
  26. content = 1
  27. elif format.get('format_note') == 'DASH video':
  28. content = 2
  29. else:
  30. content = 3 # both I guess! multiplexed formats don't seem clearly labeled
  31. res = (format.get('width') or 0) + (format.get('height') or 0)
  32. return (content, res)
  33. class MessageLink:
  34. url: str
  35. spoiler: bool = False
  36. link_type: str = 'unknown'
  37. def __init__(self, url: str, link_type: str, spoiler: bool = False):
  38. self.url = url
  39. self.link_type = link_type
  40. self.spoiler = spoiler
  41. class VideoPreviewCog(BaseCog, name='Video Link Previews'):
  42. SETTING_ENABLED = CogSetting(
  43. 'enabled',
  44. bool,
  45. default_value=False,
  46. brief='Video link previews',
  47. description='Whether links to certain social media videos should show previews.',
  48. )
  49. SETTING_DELAY = CogSetting(
  50. 'delay',
  51. timedelta,
  52. default_value=3,
  53. brief='delay before attempting to fetch a preview',
  54. description='How long to wait after a message is posted to see if Discord successfully loads a video preview',
  55. )
  56. SETTING_INSTAGRAM = CogSetting(
  57. 'instagram',
  58. bool,
  59. default_value=False,
  60. brief='whether to show video previews for Instagram links',
  61. description='For both regular posts and reels',
  62. )
  63. SETTING_FACEBOOK = CogSetting(
  64. 'facebook',
  65. bool,
  66. default_value=False,
  67. brief='whether to show video previews for Facebook links',
  68. )
  69. SETTING_TWITTER = CogSetting(
  70. 'twitter',
  71. bool,
  72. default_value=False,
  73. brief='whether to show video previews for Twitter links',
  74. )
  75. REGEX_INSTAGRAM_POST = r'(https?:\/\/(?:www\.)?)\w*(instagram\.com\/\w+/\w+\/?)'
  76. REGEX_FACEBOOK_POST = r'(https?:\/\/(?:www\.)?)\w*(facebook\.com(?:\/\w+)+\/\w+\/?)'
  77. REGEX_TWITTER_POST = r'(https?:\/\/(?:www\.)?)\w*((?:twitter|x)\.com\/\w+\/status\/[0-9]+)'
  78. REGEX_SPOILERS = '\|\|.+\|\|'
  79. # Best video and best audio, mp4 format with m4a audio
  80. FORMATS = 'bv*[ext=mp4]+ba[ext=m4a]/' \
  81. 'b[ext=mp4]/' \
  82. 'bv*[ext=mp4]+ba[ext=m4a]/' \
  83. 'b[ext=mp4]/' \
  84. 'bv*+ba/' \
  85. 'b'
  86. def __init__(self, bot):
  87. super().__init__(
  88. bot,
  89. config_prefix='linkpreview',
  90. short_description='Manages video link preview behavior.',
  91. )
  92. Self = VideoPreviewCog
  93. self.add_setting(Self.SETTING_ENABLED)
  94. self.add_setting(Self.SETTING_DELAY)
  95. self.add_setting(Self.SETTING_INSTAGRAM)
  96. self.add_setting(Self.SETTING_FACEBOOK)
  97. self.add_setting(Self.SETTING_TWITTER)
  98. @Cog.listener()
  99. async def on_message(self, message: Message):
  100. """Event listener"""
  101. if message.author is None or \
  102. message.author.bot or \
  103. message.guild is None or \
  104. message.channel is None or \
  105. message.content is None:
  106. return
  107. if not self.get_guild_setting(message.guild, self.SETTING_ENABLED):
  108. return
  109. links = self._get_previewable_links(message)
  110. if len(links) == 0:
  111. return
  112. await self._wait_for_preview(message, links)
  113. # TODO: Make this just link to the raw video file if possible (yt-dlp --get-url)
  114. def _get_previewable_links(self, message: Message) -> list[MessageLink]:
  115. Self = VideoPreviewCog
  116. links: list[MessageLink] = []
  117. content: str = message.content
  118. has_spoilers = re.match(Self.REGEX_SPOILERS, content) is not None
  119. if self.get_guild_setting(message.guild, Self.SETTING_INSTAGRAM):
  120. for link in re.findall(Self.REGEX_INSTAGRAM_POST, content):
  121. url = link[0] + link[1]
  122. links.append(MessageLink(url, 'instagram', has_spoilers))
  123. if self.get_guild_setting(message.guild, Self.SETTING_FACEBOOK):
  124. for link in re.findall(Self.REGEX_FACEBOOK_POST, content):
  125. url = link[0] + link[1]
  126. links.append(MessageLink(url, 'facebook', has_spoilers))
  127. if self.get_guild_setting(message.guild, Self.SETTING_TWITTER):
  128. for link in re.findall(Self.REGEX_TWITTER_POST, content):
  129. url = link[0] + link[1]
  130. links.append(MessageLink(url, 'twitter', has_spoilers))
  131. # TODO: Custom patterns
  132. return links
  133. async def _wait_for_preview(self, message: Message, links: list[MessageLink]):
  134. Self = VideoPreviewCog
  135. delay: timedelta = self.get_guild_setting(message.guild, Self.SETTING_DELAY)
  136. await asyncio.sleep(delay.total_seconds)
  137. # Look for embeds already showing the video
  138. self.log(message.guild, "Checking message for embeds")
  139. for embed in message.embeds:
  140. if embed.video.url:
  141. # If there's any video, skip downloading any previews
  142. self.log(message.guild, "Message already has a video. Skipping this message.")
  143. return
  144. await self._fetch_previews(message, links)
  145. async def _fetch_previews(self, message: Message, links: list[MessageLink]):
  146. promises = []
  147. for link in links:
  148. promises.append(self._fetch_preview(message, link))
  149. await asyncio.gather(*promises)
  150. async def _fetch_preview(self, message: Message, link: MessageLink):
  151. result = subprocess.run(
  152. [
  153. 'yt-dlp',
  154. '--skip-download',
  155. '--dump-single-json',
  156. link.url,
  157. ],
  158. stdout=subprocess.PIPE,
  159. stderr=subprocess.PIPE,
  160. universal_newlines=True
  161. )
  162. if result.returncode != 0:
  163. self.log(message.guild, "Fetching link info JSON failed. Skipping preview.")
  164. self.log(message.guild, result.stderr)
  165. return
  166. try:
  167. info: dict = json.loads(result.stdout)
  168. except Exception as e:
  169. self.log(message.guild, f"Error parsing info.json. Skipping preview. {e}")
  170. return
  171. description = info.get('description') or ''
  172. formats: list[dict] = info.get('formats') or []
  173. self.log(message.guild, f"Found {len(formats)} formats")
  174. formats = list(filter(filter_video_format, formats))
  175. self.log(message.guild, f"Filtered to {len(formats)} formats")
  176. sorted_formats: list[dict] = sorted(formats, key=rank_video_format, reverse=True)
  177. if len(sorted_formats) == 0:
  178. self.log(message.guild, f"No eligible formats for URL {link.url}")
  179. return
  180. best_format: dict = sorted_formats[0]
  181. self.log(message.guild, f"Best format is id {best_format.get('format_id')}")
  182. video_url: str = best_format.get('url')
  183. link_description: str = "video"
  184. if (best_format.get('width') or 0) > 0 and (best_format.get('height') or 0) > 0:
  185. link_description += f", {best_format.get('width')}×{best_format.get('height')}"
  186. if (best_format.get('filesize') or 0) > 0:
  187. link_description += f", {format_bytes(best_format.get('filesize'))}"
  188. elif (best_format.get('filesize_approx') or 0) > 0:
  189. link_description += f", {format_bytes(best_format.get('filesize_approx'))}"
  190. content = "Hmm, video preview didn't load. Let's try this."
  191. if len(description) > 0:
  192. content += "\n\n" + blockquote_markdown(suppress_markdown_url_previews(description)) + "\n"
  193. if link.spoiler:
  194. content += f"\n||[{link_description}]({video_url})||"
  195. else:
  196. content += f"\n[{link_description}]({video_url})"
  197. await message.reply(
  198. content,
  199. mention_author=False
  200. )