Experimental Discord bot written in Python
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

videopreviewcog.py 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. import asyncio
  2. import json
  3. import re
  4. import subprocess
  5. from discord import Message
  6. from discord.ext.commands import Cog
  7. from rocketbot.cogs.basecog import BaseCog
  8. from rocketbot.cogsetting import CogSetting
  9. from rocketbot.utils import (
  10. blockquote_markdown,
  11. format_bytes,
  12. suppress_markdown_url_previews,
  13. )
  14. def filter_video_format(format: dict) -> bool:
  15. if format.get('resolution') == 'audio only':
  16. return False
  17. if format.get('format_note') == 'DASH audio':
  18. return False
  19. return True
  20. def rank_video_format(format: dict) -> tuple:
  21. content = 0
  22. if format.get('resolution') == 'audio only':
  23. content = 1
  24. elif format.get('format_note') == 'DASH audio':
  25. content = 1
  26. elif format.get('format_note') == 'DASH video':
  27. content = 2
  28. else:
  29. content = 3 # both I guess! multiplexed formats don't seem clearly labeled
  30. res = (format.get('width') or 0) + (format.get('height') or 0)
  31. return (content, res)
  32. class MessageLink:
  33. url: str
  34. spoiler: bool = False
  35. link_type: str = 'unknown'
  36. def __init__(self, url: str, link_type: str, spoiler: bool = False):
  37. self.url = url
  38. self.link_type = link_type
  39. self.spoiler = spoiler
  40. class VideoPreviewCog(BaseCog, name='Video Link Previews'):
  41. SETTING_ENABLED = CogSetting(
  42. 'enabled',
  43. bool,
  44. default_value=False,
  45. brief='Video link previews',
  46. description='Whether links to certain social media videos should show previews.',
  47. )
  48. SETTING_INSTAGRAM = CogSetting(
  49. 'instagram',
  50. bool,
  51. default_value=False,
  52. brief='whether to show video previews for Instagram links',
  53. description='For both regular posts and reels',
  54. )
  55. SETTING_FACEBOOK = CogSetting(
  56. 'facebook',
  57. bool,
  58. default_value=False,
  59. brief='whether to show video previews for Facebook links',
  60. )
  61. SETTING_TWITTER = CogSetting(
  62. 'twitter',
  63. bool,
  64. default_value=False,
  65. brief='whether to show video previews for Twitter links',
  66. )
  67. REGEX_INSTAGRAM_POST = r'https?:\/\/(?:www\.)?instagram\.com\/(?:p|reel)\/[a-zA-Z0-9_-]+\/?'
  68. REGEX_FACEBOOK_POST = r'https?:\/\/(?:www\.)?facebook\.com\/share\/[rv]\/[a-zA-Z0-9_-]+\/?'
  69. REGEX_TWITTER_POST = r'https?:\/\/(?:twitter|x)\.com\/[a-zA-Z0-9_-]+/status/[0-9]+'
  70. REGEX_SPOILERS = '\|\|.+\|\|'
  71. # Best video and best audio, mp4 format with m4a audio
  72. FORMATS = 'bv*[ext=mp4]+ba[ext=m4a]/' \
  73. 'b[ext=mp4]/' \
  74. 'bv*[ext=mp4]+ba[ext=m4a]/' \
  75. 'b[ext=mp4]/' \
  76. 'bv*+ba/' \
  77. 'b'
  78. def __init__(self, bot):
  79. super().__init__(
  80. bot,
  81. config_prefix='linkpreview',
  82. short_description='Manages video link preview behavior.',
  83. )
  84. Self = VideoPreviewCog
  85. self.add_setting(Self.SETTING_ENABLED)
  86. self.add_setting(Self.SETTING_INSTAGRAM)
  87. self.add_setting(Self.SETTING_FACEBOOK)
  88. self.add_setting(Self.SETTING_TWITTER)
  89. @Cog.listener()
  90. async def on_message(self, message: Message):
  91. """Event listener"""
  92. if message.author is None or \
  93. message.author.bot or \
  94. message.guild is None or \
  95. message.channel is None or \
  96. message.content is None:
  97. return
  98. if not self.get_guild_setting(message.guild, self.SETTING_ENABLED):
  99. return
  100. links = self._get_previewable_links(message)
  101. if len(links) == 0:
  102. return
  103. await self._wait_for_preview(message, links)
  104. # TODO: Make this just link to the raw video file if possible (yt-dlp --get-url)
  105. def _get_previewable_links(self, message: Message) -> list[MessageLink]:
  106. Self = VideoPreviewCog
  107. links: list[MessageLink] = []
  108. content: str = message.content
  109. has_spoilers = re.match(Self.REGEX_SPOILERS, content) is not None
  110. if self.get_guild_setting(message.guild, Self.SETTING_INSTAGRAM):
  111. for link in re.findall(Self.REGEX_INSTAGRAM_POST, content):
  112. links.append(MessageLink(link, 'instagram', has_spoilers))
  113. if self.get_guild_setting(message.guild, Self.SETTING_FACEBOOK):
  114. for link in re.findall(Self.REGEX_FACEBOOK_POST, content):
  115. links.append(MessageLink(link, 'facebook', has_spoilers))
  116. if self.get_guild_setting(message.guild, Self.SETTING_TWITTER):
  117. for link in re.findall(Self.REGEX_TWITTER_POST, content):
  118. links.append(MessageLink(link, 'twitter', has_spoilers))
  119. # TODO: Custom patterns
  120. return links
  121. async def _wait_for_preview(self, message: Message, links: list[MessageLink]):
  122. await asyncio.sleep(3)
  123. # Look for embeds already showing the video
  124. self.log(message.guild, "Checking message for embeds")
  125. for embed in message.embeds:
  126. if embed.video.url:
  127. # If there's any video, skip downloading any previews
  128. self.log(message.guild, "Message already has a video. Skipping this message.")
  129. return
  130. await self._fetch_previews(message, links)
  131. async def _fetch_previews(self, message: Message, links: list[MessageLink]):
  132. promises = []
  133. for link in links:
  134. promises.append(self._fetch_preview(message, link))
  135. await asyncio.gather(*promises)
  136. async def _fetch_preview(self, message: Message, link: MessageLink):
  137. result = subprocess.run(
  138. [
  139. 'yt-dlp',
  140. '--skip-download',
  141. '--dump-single-json',
  142. link.url,
  143. ],
  144. stdout=subprocess.PIPE,
  145. stderr=subprocess.PIPE,
  146. universal_newlines=True
  147. )
  148. if result.returncode != 0:
  149. self.log(message.guild, "Fetching link info JSON failed. Skipping preview.")
  150. self.log(message.guild, result.stderr)
  151. return
  152. try:
  153. info: dict = json.loads(result.stdout)
  154. except Exception as e:
  155. self.log(message.guild, f"Error parsing info.json. Skipping preview. {e}")
  156. return
  157. description = info.get('description') or ''
  158. formats: list[dict] = info.get('formats') or []
  159. self.log(message.guild, f"Found {len(formats)} formats")
  160. formats = list(filter(filter_video_format, formats))
  161. self.log(message.guild, f"Filtered to {len(formats)} formats")
  162. sorted_formats: list[dict] = sorted(formats, key=rank_video_format, reverse=True)
  163. if len(sorted_formats) == 0:
  164. self.log(message.guild, f"No eligible formats for URL {link.url}")
  165. return
  166. best_format: dict = sorted_formats[0]
  167. self.log(message.guild, f"Best format is id {best_format.get('format_id')}")
  168. video_url: str = best_format.get('url')
  169. link_description: str = "video"
  170. if (best_format.get('width') or 0) > 0 and (best_format.get('height') or 0) > 0:
  171. link_description += f", {best_format.get('width')}×{best_format.get('height')}"
  172. if (best_format.get('filesize') or 0) > 0:
  173. link_description += f", {format_bytes(best_format.get('filesize'))}"
  174. elif (best_format.get('filesize_approx') or 0) > 0:
  175. link_description += f", {format_bytes(best_format.get('filesize_approx'))}"
  176. content = "Hmm, video preview didn't load. Let's try this."
  177. if len(description) > 0:
  178. content += "\n\n" + blockquote_markdown(suppress_markdown_url_previews(description)) + "\n"
  179. if link.spoiler:
  180. content += f"\n||[{link_description}]({video_url})||"
  181. else:
  182. content += f"\n[{link_description}]({video_url})"
  183. await message.reply(
  184. content,
  185. mention_author=False
  186. )