Experimental Discord bot written in Python
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. """
  2. Statements that match messages based on an expression and have a list of actions
  3. to take on them.
  4. """
  5. import re
  6. from abc import ABCMeta, abstractmethod
  7. from datetime import datetime, timezone
  8. from typing import Any, Literal, Union
  9. from discord import Message
  10. from discord import utils as discordutils
  11. from discord.ext.commands import Context
  12. from rocketbot.utils import (
  13. is_user_id,
  14. str_from_quoted_str,
  15. timedelta_from_str,
  16. user_id_from_mention,
  17. )
  18. PatternField = Literal['content.markdown', 'content', 'content.plain', 'author', 'author.id', 'author.joinage', 'author.name', 'lastmatched']
  19. PatternComparisonOperator = Literal['==', '!=', '<', '>', '<=', '>=', 'contains', '!contains', 'matches', '!matches', 'containsword', '!containsword']
  20. PatternBooleanOperator = Literal['!', 'and', 'or']
  21. PatternActionType = Literal['ban', 'delete', 'kick', 'modinfo', 'modwarn', 'reply']
  22. class PatternError(RuntimeError):
  23. """
  24. Error thrown when parsing a pattern statement.
  25. """
  26. class PatternDeprecationError(PatternError):
  27. """
  28. Error raised by PatternStatement.check_deprecated_syntax.
  29. """
  30. class PatternAction:
  31. """
  32. Describes one action to take on a matched message or its author.
  33. """
  34. TYPE_BAN: PatternActionType = 'ban'
  35. TYPE_DELETE: PatternActionType = 'delete'
  36. TYPE_KICK: PatternActionType = 'kick'
  37. TYPE_INFORM_MODS: PatternActionType = 'modinfo'
  38. TYPE_WARN_MODS: PatternActionType = 'modwarn'
  39. TYPE_REPLY: PatternActionType = 'reply'
  40. def __init__(self, action: str, args: list[Any]):
  41. self.action = action
  42. self.arguments = list(args)
  43. def __str__(self) -> str:
  44. arg_str = ', '.join(self.arguments)
  45. return f'{self.action}({arg_str})'
  46. class PatternExpression(metaclass=ABCMeta):
  47. """
  48. Abstract message matching expression.
  49. """
  50. def __init__(self):
  51. pass
  52. @abstractmethod
  53. def matches(self, message: Message, other_fields: dict[str, Any]) -> bool:
  54. """
  55. Whether a message matches this expression. other_fields are additional
  56. fields that can be queried not contained in the message itself.
  57. """
  58. return False
  59. class PatternSimpleExpression(PatternExpression):
  60. """
  61. Message matching expression with a simple "<field> <operator> <value>"
  62. structure.
  63. """
  64. FIELD_CONTENT_MARKDOWN: PatternField = 'content.markdown'
  65. FIELD_CONTENT_PLAIN: PatternField = 'content.plain'
  66. FIELD_AUTHOR_ID: PatternField = 'author.id'
  67. FIELD_AUTHOR_JOINAGE: PatternField = 'author.joinage'
  68. FIELD_AUTHOR_NAME: PatternField = 'author.name'
  69. FIELD_LAST_MATCHED: PatternField = 'lastmatched'
  70. # Less preferred but recognized field aliases
  71. ALIAS_FIELD_CONTENT_MARKDOWN: PatternField = 'content'
  72. ALIAS_FIELD_AUTHOR_ID: PatternField = 'author'
  73. OP_EQUALS: PatternComparisonOperator = '=='
  74. OP_NOT_EQUALS: PatternComparisonOperator = '!='
  75. OP_LESS_THAN: PatternComparisonOperator = '<'
  76. OP_GREATER_THAN: PatternComparisonOperator = '>'
  77. OP_LESS_THAN_OR_EQUALS: PatternComparisonOperator = '<='
  78. OP_GREATER_THAN_OR_EQUALS: PatternComparisonOperator = '>='
  79. OP_CONTAINS: PatternComparisonOperator = 'contains'
  80. OP_NOT_CONTAINS: PatternComparisonOperator = '!contains'
  81. OP_MATCHES: PatternComparisonOperator = 'matches'
  82. OP_NOT_MATCHES: PatternComparisonOperator = '!matches'
  83. OP_CONTAINS_WORD: PatternComparisonOperator = 'containsword'
  84. OP_NOT_CONTAINS_WORD: PatternComparisonOperator = '!containsword'
  85. def __init__(self, field: PatternField, operator: PatternComparisonOperator, value: Any):
  86. super().__init__()
  87. self.field: PatternField = field
  88. self.operator: PatternComparisonOperator = operator
  89. self.value: Any = value
  90. def __field_value(self, message: Message, other_fields: dict[str, Any]) -> Any:
  91. cls = PatternSimpleExpression
  92. if self.field in (cls.FIELD_CONTENT_MARKDOWN, cls.ALIAS_FIELD_CONTENT_MARKDOWN):
  93. return message.content
  94. if self.field == cls.FIELD_CONTENT_PLAIN:
  95. return discordutils.remove_markdown(message.clean_content)
  96. if self.field in (cls.FIELD_AUTHOR_ID, cls.ALIAS_FIELD_AUTHOR_ID):
  97. return str(message.author.id)
  98. if self.field == cls.FIELD_AUTHOR_JOINAGE:
  99. return message.created_at - message.author.joined_at
  100. if self.field == cls.FIELD_AUTHOR_NAME:
  101. return message.author.name
  102. if self.field == cls.FIELD_LAST_MATCHED:
  103. long_ago = datetime(year=1900, month=1, day=1, hour=0, minute=0, second=0, tzinfo=timezone.utc)
  104. last_matched = other_fields.get('last_matched') or long_ago
  105. return message.created_at - last_matched
  106. raise ValueError(f'Bad field name "{self.field}"')
  107. def matches(self, message: Message, other_fields: dict[str, Any]) -> bool:
  108. cls = PatternSimpleExpression
  109. field_value = self.__field_value(message, other_fields)
  110. if self.operator == cls.OP_EQUALS:
  111. if isinstance(field_value, str) and isinstance(self.value, str):
  112. return field_value.lower() == self.value.lower()
  113. return field_value == self.value
  114. if self.operator == cls.OP_NOT_EQUALS:
  115. if isinstance(field_value, str) and isinstance(self.value, str):
  116. return field_value.lower() != self.value.lower()
  117. return field_value != self.value
  118. if self.operator == cls.OP_LESS_THAN:
  119. return field_value < self.value
  120. if self.operator == cls.OP_GREATER_THAN:
  121. return field_value > self.value
  122. if self.operator == cls.OP_LESS_THAN_OR_EQUALS:
  123. return field_value <= self.value
  124. if self.operator == cls.OP_GREATER_THAN_OR_EQUALS:
  125. return field_value >= self.value
  126. if self.operator == cls.OP_CONTAINS:
  127. return self.value.lower() in field_value.lower()
  128. if self.operator == cls.OP_NOT_CONTAINS:
  129. return self.value.lower() not in field_value.lower()
  130. if self.operator in (cls.OP_MATCHES, cls.OP_CONTAINS_WORD):
  131. return self.value.search(field_value.lower()) is not None
  132. if self.operator in (cls.OP_NOT_MATCHES, cls.OP_NOT_CONTAINS_WORD):
  133. return self.value.search(field_value.lower()) is None
  134. raise ValueError(f'Bad operator {self.operator}')
  135. def __str__(self) -> str:
  136. return f'({self.field} {self.operator} {self.value})'
  137. class PatternCompoundExpression(PatternExpression):
  138. """
  139. Message matching expression that combines several child expressions with
  140. a boolean operator.
  141. """
  142. OP_NOT = '!'
  143. OP_AND = 'and'
  144. OP_OR = 'or'
  145. def __init__(self, operator: PatternBooleanOperator, operands: list[PatternExpression]):
  146. super().__init__()
  147. self.operator: PatternBooleanOperator = operator
  148. self.operands = list(operands)
  149. def matches(self, message: Message, other_fields: dict[str, Any]) -> bool:
  150. if self.operator == PatternCompoundExpression.OP_NOT:
  151. return not self.operands[0].matches(message, other_fields)
  152. if self.operator == PatternCompoundExpression.OP_AND:
  153. for op in self.operands:
  154. if not op.matches(message, other_fields):
  155. return False
  156. return True
  157. if self.operator == PatternCompoundExpression.OP_OR:
  158. for op in self.operands:
  159. if op.matches(message, other_fields):
  160. return True
  161. return False
  162. raise ValueError(f'Bad operator "{self.operator}"')
  163. def __str__(self) -> str:
  164. if self.operator == PatternCompoundExpression.OP_NOT:
  165. return f'(!( {self.operands[0]} ))'
  166. strs = map(str, self.operands)
  167. joined = f' {self.operator} '.join(strs)
  168. return f'( {joined} )'
  169. class PatternStatement:
  170. """
  171. A full message match statement. If a message matches the given expression,
  172. the given actions should be performed.
  173. """
  174. DEFAULT_PRIORITY: int = 100
  175. def __init__(self,
  176. name: str,
  177. actions: list[PatternAction],
  178. expression: PatternExpression,
  179. original: str,
  180. priority: int = DEFAULT_PRIORITY):
  181. self.name: str = name
  182. self.actions: list[PatternAction] = list(actions) # PatternAction[]
  183. self.expression: PatternExpression = expression
  184. self.original: str = original
  185. self.priority: int = priority
  186. def check_deprecations(self) -> None:
  187. """
  188. Tests whether this statement uses any deprecated syntax. Will raise a
  189. PatternDeprecationError if one is found.
  190. """
  191. self.__check_deprecations(self.expression)
  192. @classmethod
  193. def __check_deprecations(cls, expression: PatternExpression) -> None:
  194. if isinstance(expression, PatternSimpleExpression):
  195. s: PatternSimpleExpression = expression
  196. if s.field in PatternCompiler.DEPRECATED_FIELDS:
  197. raise PatternDeprecationError(f'"{s.field}" field is deprecated')
  198. elif isinstance(expression, PatternCompoundExpression):
  199. c: PatternCompoundExpression = expression
  200. for oper in c.operands:
  201. cls.__check_deprecations(oper)
  202. def to_json(self) -> dict[str, Any]:
  203. """
  204. Returns a JSON representation of this statement.
  205. """
  206. return {
  207. 'name': self.name,
  208. 'priority': self.priority,
  209. 'statement': self.original,
  210. }
  211. @classmethod
  212. def from_json(cls, json: dict[str, Any]):
  213. """
  214. Gets a PatternStatement from its JSON representation.
  215. """
  216. ps = PatternCompiler.parse_statement(json['name'], json['statement'])
  217. ps.priority = json.get('priority', cls.DEFAULT_PRIORITY)
  218. return ps
  219. class PatternCompiler:
  220. """
  221. Parses a user-provided message filter statement into a PatternStatement.
  222. """
  223. DATATYPE_FLOAT: str = 'float'
  224. DATATYPE_ID: str = 'id'
  225. DATATYPE_INT: str = 'int'
  226. DATATYPE_MEMBER: str = 'Member'
  227. DATATYPE_REGEX: str = 'regex'
  228. DATATYPE_TEXT: str = 'text'
  229. DATATYPE_TIMESPAN: str = 'timespan'
  230. FIELD_TO_DATATYPE: dict[PatternField, str] = {
  231. PatternSimpleExpression.ALIAS_FIELD_AUTHOR_ID: DATATYPE_MEMBER,
  232. PatternSimpleExpression.FIELD_AUTHOR_ID: DATATYPE_ID,
  233. PatternSimpleExpression.FIELD_AUTHOR_JOINAGE: DATATYPE_TIMESPAN,
  234. PatternSimpleExpression.FIELD_AUTHOR_NAME: DATATYPE_TEXT,
  235. PatternSimpleExpression.ALIAS_FIELD_CONTENT_MARKDOWN: DATATYPE_TEXT, # deprecated, use content.markdown or content.plain
  236. PatternSimpleExpression.FIELD_CONTENT_MARKDOWN: DATATYPE_TEXT,
  237. PatternSimpleExpression.FIELD_CONTENT_PLAIN: DATATYPE_TEXT,
  238. PatternSimpleExpression.FIELD_LAST_MATCHED: DATATYPE_TIMESPAN,
  239. }
  240. DEPRECATED_FIELDS: set[PatternField] = { 'content' }
  241. ACTION_TO_ARGS: dict[PatternActionType, list[str]] = {
  242. PatternAction.TYPE_BAN: [],
  243. PatternAction.TYPE_DELETE: [],
  244. PatternAction.TYPE_KICK: [],
  245. PatternAction.TYPE_INFORM_MODS: [],
  246. PatternAction.TYPE_WARN_MODS: [],
  247. PatternAction.TYPE_REPLY: [ DATATYPE_TEXT ],
  248. }
  249. OPERATORS_IDENTITY: set[PatternComparisonOperator] = {
  250. PatternSimpleExpression.OP_EQUALS,
  251. PatternSimpleExpression.OP_NOT_EQUALS,
  252. }
  253. OPERATORS_COMPARISON: set[PatternComparisonOperator] = {
  254. PatternSimpleExpression.OP_LESS_THAN,
  255. PatternSimpleExpression.OP_GREATER_THAN,
  256. PatternSimpleExpression.OP_LESS_THAN_OR_EQUALS,
  257. PatternSimpleExpression.OP_GREATER_THAN_OR_EQUALS,
  258. }
  259. OPERATORS_NUMERIC: set[PatternComparisonOperator] = OPERATORS_IDENTITY | OPERATORS_COMPARISON
  260. OPERATORS_TEXT: set[PatternComparisonOperator] = OPERATORS_IDENTITY | {
  261. PatternSimpleExpression.OP_CONTAINS,
  262. PatternSimpleExpression.OP_NOT_CONTAINS,
  263. PatternSimpleExpression.OP_CONTAINS_WORD,
  264. PatternSimpleExpression.OP_NOT_CONTAINS_WORD,
  265. PatternSimpleExpression.OP_MATCHES,
  266. PatternSimpleExpression.OP_NOT_MATCHES,
  267. }
  268. OPERATORS_ALL: set[str] = OPERATORS_IDENTITY | OPERATORS_COMPARISON | OPERATORS_TEXT
  269. DATATYPE_TO_OPERATORS: dict[str, set[PatternComparisonOperator]] = {
  270. DATATYPE_ID: OPERATORS_IDENTITY,
  271. DATATYPE_MEMBER: OPERATORS_IDENTITY,
  272. DATATYPE_TEXT: OPERATORS_TEXT,
  273. DATATYPE_INT: OPERATORS_NUMERIC,
  274. DATATYPE_FLOAT: OPERATORS_NUMERIC,
  275. DATATYPE_TIMESPAN: OPERATORS_NUMERIC,
  276. }
  277. WHITESPACE_CHARS: str = ' \t\n\r'
  278. STRING_QUOTE_CHARS: str = '\'"'
  279. SYMBOL_CHARS: str = 'abcdefghijklmnopqrstuvwxyz.'
  280. VALUE_CHARS: str = '0123456789dhms<@!>'
  281. OP_CHARS: str = '<=>!(),'
  282. MAX_EXPRESSION_NESTING: int = 8
  283. @classmethod
  284. def expression_str_from_context(cls, context: Context, name: str) -> str:
  285. """
  286. Extracts the statement string from an "add" command context.
  287. """
  288. pattern_str: str = context.message.content
  289. command_chain = [ name ]
  290. cmd = context.command
  291. while cmd:
  292. command_chain.insert(0, cmd.name)
  293. cmd = cmd.parent
  294. command_chain[0] = f'{context.prefix}{command_chain[0]}'
  295. for cmd in command_chain:
  296. if pattern_str.startswith(cmd):
  297. pattern_str = pattern_str[len(cmd):].lstrip()
  298. elif pattern_str.startswith(f'"{cmd}"'):
  299. pattern_str = pattern_str[len(cmd) + 2:].lstrip()
  300. return pattern_str
  301. @classmethod
  302. def parse_statement(cls, name: str, statement: str) -> PatternStatement:
  303. """
  304. Parses a user-provided message filter statement into a PatternStatement.
  305. Raises PatternError on failure.
  306. """
  307. tokens: list[str] = cls.__tokenize(statement)
  308. token_index: int = 0
  309. actions, token_index = cls.__read_actions(tokens, token_index)
  310. expression, token_index = cls.__read_expression(tokens, token_index)
  311. return PatternStatement(name, actions, expression, statement)
  312. @classmethod
  313. def __tokenize(cls, statement: str) -> list[str]:
  314. """
  315. Converts a message filter statement into a list of tokens.
  316. """
  317. tokens: list[str] = []
  318. in_quote: Union[bool, str] = False
  319. in_escape: bool = False
  320. all_token_types: set[str] = { 'sym', 'op', 'val' }
  321. possible_token_types: set[str] = set(all_token_types)
  322. current_token: str = ''
  323. for ch in statement:
  324. if in_quote:
  325. if in_escape:
  326. if ch == 'n':
  327. current_token += '\n'
  328. elif ch == 't':
  329. current_token += '\t'
  330. else:
  331. current_token += ch
  332. in_escape = False
  333. elif ch == '\\':
  334. in_escape = True
  335. elif ch == in_quote:
  336. current_token += ch
  337. tokens.append(current_token)
  338. current_token = ''
  339. possible_token_types |= all_token_types
  340. in_quote = False
  341. else:
  342. current_token += ch
  343. else:
  344. if ch in cls.STRING_QUOTE_CHARS:
  345. if len(current_token) > 0:
  346. tokens.append(current_token)
  347. possible_token_types |= all_token_types
  348. in_quote = ch
  349. current_token = ch
  350. elif ch == '\\':
  351. raise PatternError("Unexpected \\ outside quoted string")
  352. elif ch in cls.WHITESPACE_CHARS:
  353. if len(current_token) > 0:
  354. tokens.append(current_token)
  355. current_token = ''
  356. possible_token_types |= all_token_types
  357. else:
  358. possible_ch_types = set()
  359. if ch in cls.SYMBOL_CHARS:
  360. possible_ch_types.add('sym')
  361. if ch in cls.VALUE_CHARS:
  362. possible_ch_types.add('val')
  363. if ch in cls.OP_CHARS:
  364. possible_ch_types.add('op')
  365. if len(current_token) > 0 and \
  366. possible_ch_types.isdisjoint(possible_token_types):
  367. if len(current_token) > 0:
  368. tokens.append(current_token)
  369. current_token = ''
  370. possible_token_types |= all_token_types
  371. possible_token_types &= possible_ch_types
  372. current_token += ch
  373. if len(current_token) > 0:
  374. tokens.append(current_token)
  375. # Some symbols might be glommed onto other tokens. Split 'em up.
  376. prefixes_to_split = [ '!', '(', ',' ]
  377. suffixes_to_split = [ ')', ',' ]
  378. i = 0
  379. while i < len(tokens):
  380. token = tokens[i]
  381. mutated = False
  382. for prefix in prefixes_to_split:
  383. if token.startswith(prefix) and len(token) > len(prefix):
  384. tokens.insert(i, prefix)
  385. tokens[i + 1] = token[len(prefix):]
  386. i += 1
  387. mutated = True
  388. break
  389. if mutated:
  390. continue
  391. for suffix in suffixes_to_split:
  392. if token.endswith(suffix) and len(token) > len(suffix):
  393. tokens[i] = token[0:-len(suffix)]
  394. tokens.insert(i + 1, suffix)
  395. mutated = True
  396. break
  397. if mutated:
  398. continue
  399. i += 1
  400. return tokens
  401. @classmethod
  402. def __read_actions(cls,
  403. tokens: list[str],
  404. token_index: int) -> tuple[list[PatternAction], int]:
  405. """
  406. Reads the actions from a list of statement tokens. Returns a tuple
  407. containing a list of PatternActions and the token index this method
  408. left off at (the token after the "if").
  409. """
  410. actions: list[PatternAction] = []
  411. current_action_tokens = []
  412. while token_index < len(tokens):
  413. token = tokens[token_index]
  414. if token == 'if':
  415. if len(current_action_tokens) > 0:
  416. a = PatternAction(current_action_tokens[0],
  417. current_action_tokens[1:])
  418. cls.__validate_action(a)
  419. actions.append(a)
  420. token_index += 1
  421. return actions, token_index
  422. elif token == ',':
  423. if len(current_action_tokens) < 1:
  424. raise PatternError('Unexpected ,')
  425. a = PatternAction(current_action_tokens[0],
  426. current_action_tokens[1:])
  427. cls.__validate_action(a)
  428. actions.append(a)
  429. current_action_tokens = []
  430. else:
  431. current_action_tokens.append(token)
  432. token_index += 1
  433. raise PatternError('Unexpected end of line in action list')
  434. @classmethod
  435. def __validate_action(cls, action: PatternAction) -> None:
  436. args: list[str] = cls.ACTION_TO_ARGS.get(action.action)
  437. if args is None:
  438. raise PatternError(f'Unknown action "{action.action}"')
  439. if len(action.arguments) != len(args):
  440. if len(args) == 0:
  441. raise PatternError(f'Action "{action.action}" expects no ' + \
  442. f'arguments, got {len(action.arguments)}.')
  443. raise PatternError(f'Action "{action.action}" expects ' + \
  444. f'{len(args)} arguments, got {len(action.arguments)}.')
  445. for i, datatype in enumerate(args):
  446. action.arguments[i] = cls.__parse_value(action.arguments[i], datatype)
  447. @classmethod
  448. def __read_expression(cls,
  449. tokens: list[str],
  450. token_index: int,
  451. depth: int = 0,
  452. one_subexpression: bool = False) -> tuple[PatternExpression, int]:
  453. """
  454. Reads an expression from a list of statement tokens. Returns a tuple
  455. containing the PatternExpression and the token index it left off at.
  456. If one_subexpression is True then it will return after reading a
  457. single expression instead of joining multiples (for reading the
  458. subject of a NOT expression).
  459. """
  460. subexpressions = []
  461. last_compound_operator = None
  462. while token_index < len(tokens):
  463. if one_subexpression:
  464. if len(subexpressions) == 1:
  465. return subexpressions[0], token_index
  466. if len(subexpressions) > 1:
  467. raise PatternError('Too many subexpressions')
  468. if tokens[token_index] == ')':
  469. if len(subexpressions) == 0:
  470. raise PatternError('No subexpressions')
  471. if len(subexpressions) == 1:
  472. return subexpressions[0], token_index
  473. return (PatternCompoundExpression(last_compound_operator,
  474. subexpressions), token_index)
  475. if tokens[token_index] in { PatternCompoundExpression.OP_AND, PatternCompoundExpression.OP_OR }:
  476. compound_operator = tokens[token_index]
  477. if last_compound_operator and \
  478. compound_operator != last_compound_operator:
  479. subexpressions = [
  480. PatternCompoundExpression(last_compound_operator,
  481. subexpressions),
  482. ]
  483. last_compound_operator = compound_operator
  484. token_index += 1
  485. if tokens[token_index] == PatternCompoundExpression.OP_NOT:
  486. (exp, next_index) = cls.__read_expression(tokens,
  487. token_index + 1, depth + 1, one_subexpression=True)
  488. subexpressions.append(PatternCompoundExpression('!', [exp]))
  489. token_index = next_index
  490. elif tokens[token_index] == '(':
  491. (exp, next_index) = cls.__read_expression(tokens,
  492. token_index + 1, depth + 1)
  493. if tokens[next_index] != ')':
  494. raise PatternError('Expected )')
  495. subexpressions.append(exp)
  496. token_index = next_index + 1
  497. else:
  498. (simple, next_index) = cls.__read_simple_expression(tokens,
  499. token_index, depth)
  500. subexpressions.append(simple)
  501. token_index = next_index
  502. if len(subexpressions) == 0:
  503. raise PatternError('No subexpressions')
  504. elif len(subexpressions) == 1:
  505. return subexpressions[0], token_index
  506. else:
  507. return PatternCompoundExpression(last_compound_operator,
  508. subexpressions), token_index
  509. @classmethod
  510. def __read_simple_expression(cls,
  511. tokens: list[str],
  512. token_index: int,
  513. depth: int = 0) -> tuple[PatternExpression, int]:
  514. """
  515. Reads a simple expression consisting of a field name, operator, and
  516. comparison value. Returns a tuple of the PatternSimpleExpression and
  517. the token index it left off at.
  518. """
  519. if depth > cls.MAX_EXPRESSION_NESTING:
  520. raise PatternError('Expression nests too deeply')
  521. if token_index >= len(tokens):
  522. raise PatternError('Expected field name, found EOL')
  523. field: PatternField = tokens[token_index]
  524. token_index += 1
  525. datatype = cls.FIELD_TO_DATATYPE.get(field, None)
  526. if datatype is None:
  527. raise PatternError(f'No such field "{field}"')
  528. if token_index >= len(tokens):
  529. raise PatternError('Expected operator, found EOL')
  530. op = tokens[token_index]
  531. token_index += 1
  532. if op == PatternCompoundExpression.OP_NOT:
  533. if token_index >= len(tokens):
  534. raise PatternError('Expected operator, found EOL')
  535. op = '!' + tokens[token_index]
  536. token_index += 1
  537. allowed_ops = cls.DATATYPE_TO_OPERATORS[datatype]
  538. if op not in allowed_ops:
  539. if op in cls.OPERATORS_ALL:
  540. raise PatternError(f'Operator {op} cannot be used with ' + \
  541. f'field "{field}"')
  542. raise PatternError(f'Unrecognized operator "{op}" - allowed: ' + \
  543. f'{sorted(list(allowed_ops))}')
  544. if token_index >= len(tokens):
  545. raise PatternError('Expected value, found EOL')
  546. value_str = tokens[token_index]
  547. try:
  548. value = cls.__parse_value(value_str, datatype, op)
  549. except ValueError as cause:
  550. raise PatternError(f'Bad value {value_str}') from cause
  551. token_index += 1
  552. exp = PatternSimpleExpression(field, op, value)
  553. return exp, token_index
  554. @classmethod
  555. def __parse_value(cls, value: str, datatype: str, op: str = None) -> Any:
  556. """
  557. Converts a value token to its Python value. Raises ValueError on failure.
  558. """
  559. if datatype == cls.DATATYPE_ID:
  560. if not is_user_id(value):
  561. raise ValueError(f'Illegal user id value: {value}')
  562. return value
  563. if datatype == cls.DATATYPE_MEMBER:
  564. return user_id_from_mention(value)
  565. if datatype == cls.DATATYPE_TEXT:
  566. s = str_from_quoted_str(value)
  567. if op in ('matches', '!matches'):
  568. try:
  569. return re.compile(s.lower())
  570. except re.error as e:
  571. raise ValueError(f'Invalid regex: {e}') from e
  572. if op in ('containsword', '!containsword'):
  573. try:
  574. return re.compile(f'\\b{re.escape(s.lower())}\\b')
  575. except re.error as e:
  576. raise ValueError(f'Invalid regex: {e}') from e
  577. return s
  578. if datatype == cls.DATATYPE_INT:
  579. return int(value)
  580. if datatype == cls.DATATYPE_FLOAT:
  581. return float(value)
  582. if datatype == cls.DATATYPE_TIMESPAN:
  583. return timedelta_from_str(value)
  584. raise ValueError(f'Unhandled datatype {datatype}')