Experimental Discord bot written in Python
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

pattern.py 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. """
  2. Statements that match messages based on an expression and have a list of actions
  3. to take on them.
  4. """
  5. import re
  6. from abc import ABCMeta, abstractmethod
  7. from datetime import datetime, timezone
  8. from typing import Any, Union, Literal
  9. from discord import Message, utils as discordutils
  10. from discord.ext.commands import Context
  11. from rocketbot.utils import is_user_id, str_from_quoted_str, timedelta_from_str, \
  12. user_id_from_mention
  13. PatternField = Literal['content.markdown', 'content', 'content.plain', 'author', 'author.id', 'author.joinage', 'author.name', 'lastmatched']
  14. PatternComparisonOperator = Literal['==', '!=', '<', '>', '<=', '>=', 'contains', '!contains', 'matches', '!matches', 'containsword', '!containsword']
  15. PatternBooleanOperator = Literal['!', 'and', 'or']
  16. PatternActionType = Literal['ban', 'delete', 'kick', 'modinfo', 'modwarn', 'reply']
  17. class PatternError(RuntimeError):
  18. """
  19. Error thrown when parsing a pattern statement.
  20. """
  21. class PatternDeprecationError(PatternError):
  22. """
  23. Error raised by PatternStatement.check_deprecated_syntax.
  24. """
  25. class PatternAction:
  26. """
  27. Describes one action to take on a matched message or its author.
  28. """
  29. TYPE_BAN: PatternActionType = 'ban'
  30. TYPE_DELETE: PatternActionType = 'delete'
  31. TYPE_KICK: PatternActionType = 'kick'
  32. TYPE_INFORM_MODS: PatternActionType = 'modinfo'
  33. TYPE_WARN_MODS: PatternActionType = 'modwarn'
  34. TYPE_REPLY: PatternActionType = 'reply'
  35. def __init__(self, action: str, args: list[Any]):
  36. self.action = action
  37. self.arguments = list(args)
  38. def __str__(self) -> str:
  39. arg_str = ', '.join(self.arguments)
  40. return f'{self.action}({arg_str})'
  41. class PatternExpression(metaclass=ABCMeta):
  42. """
  43. Abstract message matching expression.
  44. """
  45. def __init__(self):
  46. pass
  47. @abstractmethod
  48. def matches(self, message: Message, other_fields: dict[str, Any]) -> bool:
  49. """
  50. Whether a message matches this expression. other_fields are additional
  51. fields that can be queried not contained in the message itself.
  52. """
  53. return False
  54. class PatternSimpleExpression(PatternExpression):
  55. """
  56. Message matching expression with a simple "<field> <operator> <value>"
  57. structure.
  58. """
  59. FIELD_CONTENT_MARKDOWN: PatternField = 'content.markdown'
  60. FIELD_CONTENT_PLAIN: PatternField = 'content.plain'
  61. FIELD_AUTHOR_ID: PatternField = 'author.id'
  62. FIELD_AUTHOR_JOINAGE: PatternField = 'author.joinage'
  63. FIELD_AUTHOR_NAME: PatternField = 'author.name'
  64. FIELD_LAST_MATCHED: PatternField = 'lastmatched'
  65. # Less preferred but recognized field aliases
  66. ALIAS_FIELD_CONTENT_MARKDOWN: PatternField = 'content'
  67. ALIAS_FIELD_AUTHOR_ID: PatternField = 'author'
  68. OP_EQUALS: PatternComparisonOperator = '=='
  69. OP_NOT_EQUALS: PatternComparisonOperator = '!='
  70. OP_LESS_THAN: PatternComparisonOperator = '<'
  71. OP_GREATER_THAN: PatternComparisonOperator = '>'
  72. OP_LESS_THAN_OR_EQUALS: PatternComparisonOperator = '<='
  73. OP_GREATER_THAN_OR_EQUALS: PatternComparisonOperator = '>='
  74. OP_CONTAINS: PatternComparisonOperator = 'contains'
  75. OP_NOT_CONTAINS: PatternComparisonOperator = '!contains'
  76. OP_MATCHES: PatternComparisonOperator = 'matches'
  77. OP_NOT_MATCHES: PatternComparisonOperator = '!matches'
  78. OP_CONTAINS_WORD: PatternComparisonOperator = 'containsword'
  79. OP_NOT_CONTAINS_WORD: PatternComparisonOperator = '!containsword'
  80. def __init__(self, field: PatternField, operator: PatternComparisonOperator, value: Any):
  81. super().__init__()
  82. self.field: PatternField = field
  83. self.operator: PatternComparisonOperator = operator
  84. self.value: Any = value
  85. def __field_value(self, message: Message, other_fields: dict[str, Any]) -> Any:
  86. cls = PatternSimpleExpression
  87. if self.field in (cls.FIELD_CONTENT_MARKDOWN, cls.ALIAS_FIELD_CONTENT_MARKDOWN):
  88. return message.content
  89. if self.field == cls.FIELD_CONTENT_PLAIN:
  90. return discordutils.remove_markdown(message.clean_content)
  91. if self.field in (cls.FIELD_AUTHOR_ID, cls.ALIAS_FIELD_AUTHOR_ID):
  92. return str(message.author.id)
  93. if self.field == cls.FIELD_AUTHOR_JOINAGE:
  94. return message.created_at - message.author.joined_at
  95. if self.field == cls.FIELD_AUTHOR_NAME:
  96. return message.author.name
  97. if self.field == cls.FIELD_LAST_MATCHED:
  98. long_ago = datetime(year=1900, month=1, day=1, hour=0, minute=0, second=0, tzinfo=timezone.utc)
  99. last_matched = other_fields.get('last_matched') or long_ago
  100. return message.created_at - last_matched
  101. raise ValueError(f'Bad field name "{self.field}"')
  102. def matches(self, message: Message, other_fields: dict[str, Any]) -> bool:
  103. cls = PatternSimpleExpression
  104. field_value = self.__field_value(message, other_fields)
  105. if self.operator == cls.OP_EQUALS:
  106. if isinstance(field_value, str) and isinstance(self.value, str):
  107. return field_value.lower() == self.value.lower()
  108. return field_value == self.value
  109. if self.operator == cls.OP_NOT_EQUALS:
  110. if isinstance(field_value, str) and isinstance(self.value, str):
  111. return field_value.lower() != self.value.lower()
  112. return field_value != self.value
  113. if self.operator == cls.OP_LESS_THAN:
  114. return field_value < self.value
  115. if self.operator == cls.OP_GREATER_THAN:
  116. return field_value > self.value
  117. if self.operator == cls.OP_LESS_THAN_OR_EQUALS:
  118. return field_value <= self.value
  119. if self.operator == cls.OP_GREATER_THAN_OR_EQUALS:
  120. return field_value >= self.value
  121. if self.operator == cls.OP_CONTAINS:
  122. return self.value.lower() in field_value.lower()
  123. if self.operator == cls.OP_NOT_CONTAINS:
  124. return self.value.lower() not in field_value.lower()
  125. if self.operator in (cls.OP_MATCHES, cls.OP_CONTAINS_WORD):
  126. return self.value.search(field_value.lower()) is not None
  127. if self.operator in (cls.OP_NOT_MATCHES, cls.OP_NOT_CONTAINS_WORD):
  128. return self.value.search(field_value.lower()) is None
  129. raise ValueError(f'Bad operator {self.operator}')
  130. def __str__(self) -> str:
  131. return f'({self.field} {self.operator} {self.value})'
  132. class PatternCompoundExpression(PatternExpression):
  133. """
  134. Message matching expression that combines several child expressions with
  135. a boolean operator.
  136. """
  137. OP_NOT = '!'
  138. OP_AND = 'and'
  139. OP_OR = 'or'
  140. def __init__(self, operator: PatternBooleanOperator, operands: list[PatternExpression]):
  141. super().__init__()
  142. self.operator: PatternBooleanOperator = operator
  143. self.operands = list(operands)
  144. def matches(self, message: Message, other_fields: dict[str, Any]) -> bool:
  145. if self.operator == PatternCompoundExpression.OP_NOT:
  146. return not self.operands[0].matches(message, other_fields)
  147. if self.operator == PatternCompoundExpression.OP_AND:
  148. for op in self.operands:
  149. if not op.matches(message, other_fields):
  150. return False
  151. return True
  152. if self.operator == PatternCompoundExpression.OP_OR:
  153. for op in self.operands:
  154. if op.matches(message, other_fields):
  155. return True
  156. return False
  157. raise ValueError(f'Bad operator "{self.operator}"')
  158. def __str__(self) -> str:
  159. if self.operator == PatternCompoundExpression.OP_NOT:
  160. return f'(!( {self.operands[0]} ))'
  161. strs = map(str, self.operands)
  162. joined = f' {self.operator} '.join(strs)
  163. return f'( {joined} )'
  164. class PatternStatement:
  165. """
  166. A full message match statement. If a message matches the given expression,
  167. the given actions should be performed.
  168. """
  169. DEFAULT_PRIORITY: int = 100
  170. def __init__(self,
  171. name: str,
  172. actions: list[PatternAction],
  173. expression: PatternExpression,
  174. original: str,
  175. priority: int = DEFAULT_PRIORITY):
  176. self.name: str = name
  177. self.actions: list[PatternAction] = list(actions) # PatternAction[]
  178. self.expression: PatternExpression = expression
  179. self.original: str = original
  180. self.priority: int = priority
  181. def check_deprecations(self) -> None:
  182. """
  183. Tests whether this statement uses any deprecated syntax. Will raise a
  184. PatternDeprecationError if one is found.
  185. """
  186. self.__check_deprecations(self.expression)
  187. @classmethod
  188. def __check_deprecations(cls, expression: PatternExpression) -> None:
  189. if isinstance(expression, PatternSimpleExpression):
  190. s: PatternSimpleExpression = expression
  191. if s.field in PatternCompiler.DEPRECATED_FIELDS:
  192. raise PatternDeprecationError(f'"{s.field}" field is deprecated')
  193. elif isinstance(expression, PatternCompoundExpression):
  194. c: PatternCompoundExpression = expression
  195. for oper in c.operands:
  196. cls.__check_deprecations(oper)
  197. def to_json(self) -> dict[str, Any]:
  198. """
  199. Returns a JSON representation of this statement.
  200. """
  201. return {
  202. 'name': self.name,
  203. 'priority': self.priority,
  204. 'statement': self.original,
  205. }
  206. @classmethod
  207. def from_json(cls, json: dict[str, Any]):
  208. """
  209. Gets a PatternStatement from its JSON representation.
  210. """
  211. ps = PatternCompiler.parse_statement(json['name'], json['statement'])
  212. ps.priority = json.get('priority', cls.DEFAULT_PRIORITY)
  213. return ps
  214. class PatternCompiler:
  215. """
  216. Parses a user-provided message filter statement into a PatternStatement.
  217. """
  218. DATATYPE_FLOAT: str = 'float'
  219. DATATYPE_ID: str = 'id'
  220. DATATYPE_INT: str = 'int'
  221. DATATYPE_MEMBER: str = 'Member'
  222. DATATYPE_REGEX: str = 'regex'
  223. DATATYPE_TEXT: str = 'text'
  224. DATATYPE_TIMESPAN: str = 'timespan'
  225. FIELD_TO_DATATYPE: dict[PatternField, str] = {
  226. PatternSimpleExpression.ALIAS_FIELD_AUTHOR_ID: DATATYPE_MEMBER,
  227. PatternSimpleExpression.FIELD_AUTHOR_ID: DATATYPE_ID,
  228. PatternSimpleExpression.FIELD_AUTHOR_JOINAGE: DATATYPE_TIMESPAN,
  229. PatternSimpleExpression.FIELD_AUTHOR_NAME: DATATYPE_TEXT,
  230. PatternSimpleExpression.ALIAS_FIELD_CONTENT_MARKDOWN: DATATYPE_TEXT, # deprecated, use content.markdown or content.plain
  231. PatternSimpleExpression.FIELD_CONTENT_MARKDOWN: DATATYPE_TEXT,
  232. PatternSimpleExpression.FIELD_CONTENT_PLAIN: DATATYPE_TEXT,
  233. PatternSimpleExpression.FIELD_LAST_MATCHED: DATATYPE_TIMESPAN,
  234. }
  235. DEPRECATED_FIELDS: set[PatternField] = { 'content' }
  236. ACTION_TO_ARGS: dict[PatternActionType, list[str]] = {
  237. PatternAction.TYPE_BAN: [],
  238. PatternAction.TYPE_DELETE: [],
  239. PatternAction.TYPE_KICK: [],
  240. PatternAction.TYPE_INFORM_MODS: [],
  241. PatternAction.TYPE_WARN_MODS: [],
  242. PatternAction.TYPE_REPLY: [ DATATYPE_TEXT ],
  243. }
  244. OPERATORS_IDENTITY: set[PatternComparisonOperator] = {
  245. PatternSimpleExpression.OP_EQUALS,
  246. PatternSimpleExpression.OP_NOT_EQUALS,
  247. }
  248. OPERATORS_COMPARISON: set[PatternComparisonOperator] = {
  249. PatternSimpleExpression.OP_LESS_THAN,
  250. PatternSimpleExpression.OP_GREATER_THAN,
  251. PatternSimpleExpression.OP_LESS_THAN_OR_EQUALS,
  252. PatternSimpleExpression.OP_GREATER_THAN_OR_EQUALS,
  253. }
  254. OPERATORS_NUMERIC: set[PatternComparisonOperator] = OPERATORS_IDENTITY | OPERATORS_COMPARISON
  255. OPERATORS_TEXT: set[PatternComparisonOperator] = OPERATORS_IDENTITY | {
  256. PatternSimpleExpression.OP_CONTAINS,
  257. PatternSimpleExpression.OP_NOT_CONTAINS,
  258. PatternSimpleExpression.OP_CONTAINS_WORD,
  259. PatternSimpleExpression.OP_NOT_CONTAINS_WORD,
  260. PatternSimpleExpression.OP_MATCHES,
  261. PatternSimpleExpression.OP_NOT_MATCHES,
  262. }
  263. OPERATORS_ALL: set[str] = OPERATORS_IDENTITY | OPERATORS_COMPARISON | OPERATORS_TEXT
  264. DATATYPE_TO_OPERATORS: dict[str, set[PatternComparisonOperator]] = {
  265. DATATYPE_ID: OPERATORS_IDENTITY,
  266. DATATYPE_MEMBER: OPERATORS_IDENTITY,
  267. DATATYPE_TEXT: OPERATORS_TEXT,
  268. DATATYPE_INT: OPERATORS_NUMERIC,
  269. DATATYPE_FLOAT: OPERATORS_NUMERIC,
  270. DATATYPE_TIMESPAN: OPERATORS_NUMERIC,
  271. }
  272. WHITESPACE_CHARS: str = ' \t\n\r'
  273. STRING_QUOTE_CHARS: str = '\'"'
  274. SYMBOL_CHARS: str = 'abcdefghijklmnopqrstuvwxyz.'
  275. VALUE_CHARS: str = '0123456789dhms<@!>'
  276. OP_CHARS: str = '<=>!(),'
  277. MAX_EXPRESSION_NESTING: int = 8
  278. @classmethod
  279. def expression_str_from_context(cls, context: Context, name: str) -> str:
  280. """
  281. Extracts the statement string from an "add" command context.
  282. """
  283. pattern_str: str = context.message.content
  284. command_chain = [ name ]
  285. cmd = context.command
  286. while cmd:
  287. command_chain.insert(0, cmd.name)
  288. cmd = cmd.parent
  289. command_chain[0] = f'{context.prefix}{command_chain[0]}'
  290. for cmd in command_chain:
  291. if pattern_str.startswith(cmd):
  292. pattern_str = pattern_str[len(cmd):].lstrip()
  293. elif pattern_str.startswith(f'"{cmd}"'):
  294. pattern_str = pattern_str[len(cmd) + 2:].lstrip()
  295. return pattern_str
  296. @classmethod
  297. def parse_statement(cls, name: str, statement: str) -> PatternStatement:
  298. """
  299. Parses a user-provided message filter statement into a PatternStatement.
  300. Raises PatternError on failure.
  301. """
  302. tokens: list[str] = cls.__tokenize(statement)
  303. token_index: int = 0
  304. actions, token_index = cls.__read_actions(tokens, token_index)
  305. expression, token_index = cls.__read_expression(tokens, token_index)
  306. return PatternStatement(name, actions, expression, statement)
  307. @classmethod
  308. def __tokenize(cls, statement: str) -> list[str]:
  309. """
  310. Converts a message filter statement into a list of tokens.
  311. """
  312. tokens: list[str] = []
  313. in_quote: Union[bool, str] = False
  314. in_escape: bool = False
  315. all_token_types: set[str] = { 'sym', 'op', 'val' }
  316. possible_token_types: set[str] = set(all_token_types)
  317. current_token: str = ''
  318. for ch in statement:
  319. if in_quote:
  320. if in_escape:
  321. if ch == 'n':
  322. current_token += '\n'
  323. elif ch == 't':
  324. current_token += '\t'
  325. else:
  326. current_token += ch
  327. in_escape = False
  328. elif ch == '\\':
  329. in_escape = True
  330. elif ch == in_quote:
  331. current_token += ch
  332. tokens.append(current_token)
  333. current_token = ''
  334. possible_token_types |= all_token_types
  335. in_quote = False
  336. else:
  337. current_token += ch
  338. else:
  339. if ch in cls.STRING_QUOTE_CHARS:
  340. if len(current_token) > 0:
  341. tokens.append(current_token)
  342. possible_token_types |= all_token_types
  343. in_quote = ch
  344. current_token = ch
  345. elif ch == '\\':
  346. raise PatternError("Unexpected \\ outside quoted string")
  347. elif ch in cls.WHITESPACE_CHARS:
  348. if len(current_token) > 0:
  349. tokens.append(current_token)
  350. current_token = ''
  351. possible_token_types |= all_token_types
  352. else:
  353. possible_ch_types = set()
  354. if ch in cls.SYMBOL_CHARS:
  355. possible_ch_types.add('sym')
  356. if ch in cls.VALUE_CHARS:
  357. possible_ch_types.add('val')
  358. if ch in cls.OP_CHARS:
  359. possible_ch_types.add('op')
  360. if len(current_token) > 0 and \
  361. possible_ch_types.isdisjoint(possible_token_types):
  362. if len(current_token) > 0:
  363. tokens.append(current_token)
  364. current_token = ''
  365. possible_token_types |= all_token_types
  366. possible_token_types &= possible_ch_types
  367. current_token += ch
  368. if len(current_token) > 0:
  369. tokens.append(current_token)
  370. # Some symbols might be glommed onto other tokens. Split 'em up.
  371. prefixes_to_split = [ '!', '(', ',' ]
  372. suffixes_to_split = [ ')', ',' ]
  373. i = 0
  374. while i < len(tokens):
  375. token = tokens[i]
  376. mutated = False
  377. for prefix in prefixes_to_split:
  378. if token.startswith(prefix) and len(token) > len(prefix):
  379. tokens.insert(i, prefix)
  380. tokens[i + 1] = token[len(prefix):]
  381. i += 1
  382. mutated = True
  383. break
  384. if mutated:
  385. continue
  386. for suffix in suffixes_to_split:
  387. if token.endswith(suffix) and len(token) > len(suffix):
  388. tokens[i] = token[0:-len(suffix)]
  389. tokens.insert(i + 1, suffix)
  390. mutated = True
  391. break
  392. if mutated:
  393. continue
  394. i += 1
  395. return tokens
  396. @classmethod
  397. def __read_actions(cls,
  398. tokens: list[str],
  399. token_index: int) -> tuple[list[PatternAction], int]:
  400. """
  401. Reads the actions from a list of statement tokens. Returns a tuple
  402. containing a list of PatternActions and the token index this method
  403. left off at (the token after the "if").
  404. """
  405. actions: list[PatternAction] = []
  406. current_action_tokens = []
  407. while token_index < len(tokens):
  408. token = tokens[token_index]
  409. if token == 'if':
  410. if len(current_action_tokens) > 0:
  411. a = PatternAction(current_action_tokens[0],
  412. current_action_tokens[1:])
  413. cls.__validate_action(a)
  414. actions.append(a)
  415. token_index += 1
  416. return actions, token_index
  417. elif token == ',':
  418. if len(current_action_tokens) < 1:
  419. raise PatternError('Unexpected ,')
  420. a = PatternAction(current_action_tokens[0],
  421. current_action_tokens[1:])
  422. cls.__validate_action(a)
  423. actions.append(a)
  424. current_action_tokens = []
  425. else:
  426. current_action_tokens.append(token)
  427. token_index += 1
  428. raise PatternError('Unexpected end of line in action list')
  429. @classmethod
  430. def __validate_action(cls, action: PatternAction) -> None:
  431. args: list[str] = cls.ACTION_TO_ARGS.get(action.action)
  432. if args is None:
  433. raise PatternError(f'Unknown action "{action.action}"')
  434. if len(action.arguments) != len(args):
  435. if len(args) == 0:
  436. raise PatternError(f'Action "{action.action}" expects no ' + \
  437. f'arguments, got {len(action.arguments)}.')
  438. raise PatternError(f'Action "{action.action}" expects ' + \
  439. f'{len(args)} arguments, got {len(action.arguments)}.')
  440. for i, datatype in enumerate(args):
  441. action.arguments[i] = cls.__parse_value(action.arguments[i], datatype)
  442. @classmethod
  443. def __read_expression(cls,
  444. tokens: list[str],
  445. token_index: int,
  446. depth: int = 0,
  447. one_subexpression: bool = False) -> tuple[PatternExpression, int]:
  448. """
  449. Reads an expression from a list of statement tokens. Returns a tuple
  450. containing the PatternExpression and the token index it left off at.
  451. If one_subexpression is True then it will return after reading a
  452. single expression instead of joining multiples (for reading the
  453. subject of a NOT expression).
  454. """
  455. subexpressions = []
  456. last_compound_operator = None
  457. while token_index < len(tokens):
  458. if one_subexpression:
  459. if len(subexpressions) == 1:
  460. return subexpressions[0], token_index
  461. if len(subexpressions) > 1:
  462. raise PatternError('Too many subexpressions')
  463. if tokens[token_index] == ')':
  464. if len(subexpressions) == 0:
  465. raise PatternError('No subexpressions')
  466. if len(subexpressions) == 1:
  467. return subexpressions[0], token_index
  468. return (PatternCompoundExpression(last_compound_operator,
  469. subexpressions), token_index)
  470. if tokens[token_index] in { PatternCompoundExpression.OP_AND, PatternCompoundExpression.OP_OR }:
  471. compound_operator = tokens[token_index]
  472. if last_compound_operator and \
  473. compound_operator != last_compound_operator:
  474. subexpressions = [
  475. PatternCompoundExpression(last_compound_operator,
  476. subexpressions),
  477. ]
  478. last_compound_operator = compound_operator
  479. token_index += 1
  480. if tokens[token_index] == PatternCompoundExpression.OP_NOT:
  481. (exp, next_index) = cls.__read_expression(tokens,
  482. token_index + 1, depth + 1, one_subexpression=True)
  483. subexpressions.append(PatternCompoundExpression('!', [exp]))
  484. token_index = next_index
  485. elif tokens[token_index] == '(':
  486. (exp, next_index) = cls.__read_expression(tokens,
  487. token_index + 1, depth + 1)
  488. if tokens[next_index] != ')':
  489. raise PatternError('Expected )')
  490. subexpressions.append(exp)
  491. token_index = next_index + 1
  492. else:
  493. (simple, next_index) = cls.__read_simple_expression(tokens,
  494. token_index, depth)
  495. subexpressions.append(simple)
  496. token_index = next_index
  497. if len(subexpressions) == 0:
  498. raise PatternError('No subexpressions')
  499. elif len(subexpressions) == 1:
  500. return subexpressions[0], token_index
  501. else:
  502. return PatternCompoundExpression(last_compound_operator,
  503. subexpressions), token_index
  504. @classmethod
  505. def __read_simple_expression(cls,
  506. tokens: list[str],
  507. token_index: int,
  508. depth: int = 0) -> tuple[PatternExpression, int]:
  509. """
  510. Reads a simple expression consisting of a field name, operator, and
  511. comparison value. Returns a tuple of the PatternSimpleExpression and
  512. the token index it left off at.
  513. """
  514. if depth > cls.MAX_EXPRESSION_NESTING:
  515. raise PatternError('Expression nests too deeply')
  516. if token_index >= len(tokens):
  517. raise PatternError('Expected field name, found EOL')
  518. field: PatternField = tokens[token_index]
  519. token_index += 1
  520. datatype = cls.FIELD_TO_DATATYPE.get(field, None)
  521. if datatype is None:
  522. raise PatternError(f'No such field "{field}"')
  523. if token_index >= len(tokens):
  524. raise PatternError('Expected operator, found EOL')
  525. op = tokens[token_index]
  526. token_index += 1
  527. if op == PatternCompoundExpression.OP_NOT:
  528. if token_index >= len(tokens):
  529. raise PatternError('Expected operator, found EOL')
  530. op = '!' + tokens[token_index]
  531. token_index += 1
  532. allowed_ops = cls.DATATYPE_TO_OPERATORS[datatype]
  533. if op not in allowed_ops:
  534. if op in cls.OPERATORS_ALL:
  535. raise PatternError(f'Operator {op} cannot be used with ' + \
  536. f'field "{field}"')
  537. raise PatternError(f'Unrecognized operator "{op}" - allowed: ' + \
  538. f'{sorted(list(allowed_ops))}')
  539. if token_index >= len(tokens):
  540. raise PatternError('Expected value, found EOL')
  541. value_str = tokens[token_index]
  542. try:
  543. value = cls.__parse_value(value_str, datatype, op)
  544. except ValueError as cause:
  545. raise PatternError(f'Bad value {value_str}') from cause
  546. token_index += 1
  547. exp = PatternSimpleExpression(field, op, value)
  548. return exp, token_index
  549. @classmethod
  550. def __parse_value(cls, value: str, datatype: str, op: str = None) -> Any:
  551. """
  552. Converts a value token to its Python value. Raises ValueError on failure.
  553. """
  554. if datatype == cls.DATATYPE_ID:
  555. if not is_user_id(value):
  556. raise ValueError(f'Illegal user id value: {value}')
  557. return value
  558. if datatype == cls.DATATYPE_MEMBER:
  559. return user_id_from_mention(value)
  560. if datatype == cls.DATATYPE_TEXT:
  561. s = str_from_quoted_str(value)
  562. if op in ('matches', '!matches'):
  563. try:
  564. return re.compile(s.lower())
  565. except re.error as e:
  566. raise ValueError(f'Invalid regex: {e}') from e
  567. if op in ('containsword', '!containsword'):
  568. try:
  569. return re.compile(f'\\b{re.escape(s.lower())}\\b')
  570. except re.error as e:
  571. raise ValueError(f'Invalid regex: {e}') from e
  572. return s
  573. if datatype == cls.DATATYPE_INT:
  574. return int(value)
  575. if datatype == cls.DATATYPE_FLOAT:
  576. return float(value)
  577. if datatype == cls.DATATYPE_TIMESPAN:
  578. return timedelta_from_str(value)
  579. raise ValueError(f'Unhandled datatype {datatype}')