瀏覽代碼

Better URL detection regex

master
Rocketsoup 1 年之前
父節點
當前提交
a647eb8cca
共有 1 個文件被更改,包括 14 次插入2 次删除
  1. 14
    2
      rocketbot/cogs/urlspamcog.py

+ 14
- 2
rocketbot/cogs/urlspamcog.py 查看文件

@@ -228,12 +228,24 @@ class URLSpamCog(BaseCog, name='URL Spam'):
228 228
 
229 229
 	def is_url(self, s: str):
230 230
 		'Tests if a string is strictly a URL'
231
-		pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
231
+		ipv6_host_pattern = r'\[[0-9a-fA-F:]+\]'
232
+		ipv4_host_pattern = r'[0-9\.]+'
233
+		hostname_pattern = r'[a-zA-Z0-9-]+\.[a-zA-Z0-9-\.]+'
234
+		host_pattern = r'(?:' + ipv6_host_pattern + '|' + ipv4_host_pattern + '|' + hostname_pattern + ')'
235
+		port_pattern = '(?::[0-9]+)?'
236
+		path_pattern = r'(?:/[^ \]\)]*)?'
237
+		pattern = r'^http[s]?://' + host_pattern + port_pattern + path_pattern + '$'
232 238
 		return re.match(pattern, s, re.IGNORECASE) != None
233 239
 
234 240
 	def is_casual_url(self, s: str):
235 241
 		'Tests if a string is a "casual URL" with no scheme included'
236
-		pattern = r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
242
+		ipv6_host_pattern = r'\[[0-9a-fA-F:]+\]'
243
+		ipv4_host_pattern = r'[0-9\.]+'
244
+		hostname_pattern = r'[a-zA-Z0-9-]+\.[a-zA-Z0-9-\.]+'
245
+		host_pattern = r'(?:' + ipv6_host_pattern + '|' + ipv4_host_pattern + '|' + hostname_pattern + ')'
246
+		port_pattern = '(?::[0-9]+)?'
247
+		path_pattern = r'(?:/[^ \]\)]*)?'
248
+		pattern = r'^' + host_pattern + port_pattern + path_pattern + '$'
237 249
 		return re.match(pattern, s, re.IGNORECASE) != None
238 250
 
239 251
 	async def on_mod_react(self,

Loading…
取消
儲存