Rocketsoup 1 gadu atpakaļ
vecāks
revīzija
7c8c875593
2 mainītis faili ar 21 papildinājumiem un 37 dzēšanām
  1. 1
    1
      .pylintrc
  2. 20
    36
      search.py

+ 1
- 1
.pylintrc Parādīt failu

@@ -1,5 +1,5 @@
1 1
 [MESSAGES CONTROL]
2
-disable=bad-indentation, invalid-name
2
+disable=bad-indentation, bare-except, global-statement, invalid-name
3 3
 
4 4
 [FORMAT]
5 5
 

+ 20
- 36
search.py Parādīt failu

@@ -1,16 +1,17 @@
1
+"""Script for searching email messages in a collection of zip files of raw email message files."""
1 2
 import argparse
2
-import re
3
+import os
3 4
 import platform
5
+import re
4 6
 import subprocess
7
+import sys
8
+from email.message import EmailMessage
9
+from email.parser import BytesParser
10
+from email.utils import parsedate
11
+from enum import Enum
5 12
 from tempfile import TemporaryDirectory
6 13
 from typing import List, Optional, Union
7
-from enum import Enum
8
-from email.utils import parsedate
9
-from email.parser import BytesParser
10
-from email.message import EmailMessage
11 14
 from zipfile import ZipFile
12
-import sys
13
-import os
14 15
 
15 16
 class BooleanOperator(Enum):
16 17
 	"""Boolean combinatory operator enum."""
@@ -138,7 +139,6 @@ class Options:
138 139
 		self.subject: Optional[str] = None
139 140
 		self.before: Optional[List[int]] = None
140 141
 		self.after: Optional[List[int]] = None
141
-		self.raw: bool = False
142 142
 
143 143
 args: Options = Options()
144 144
 message_filter: Filter = None
@@ -186,11 +186,11 @@ def filename_from_email(email: EmailMessage) -> str:
186 186
 	filename += '.eml'
187 187
 	return filename
188 188
 
189
-def walk_directory(path: str) -> None:
189
+def walk_directory(directory: str) -> None:
190 190
 	"""Spiders a directory looking for subdirectories and email zip archives."""
191 191
 	global zip_count
192
-	for f in os.listdir(path):
193
-		full_path = path + os.sep + f
192
+	for f in os.listdir(directory):
193
+		full_path = directory + os.sep + f
194 194
 		if f.lower().endswith('.zip'):
195 195
 			zip_count += 1
196 196
 			process_zip_file(full_path)
@@ -199,14 +199,14 @@ def walk_directory(path: str) -> None:
199 199
 
200 200
 def process_zip_file(zip_path: str) -> None:
201 201
 	"""Processes a zip file of email messages."""
202
-	global zip_result_count
202
+	global parser, zip_result_count
203 203
 	print('Searching ' + zip_path + '...')
204 204
 	zip_result_count = 0
205
-	with ZipFile(zip_path, mode='r') as zip:
206
-		for entry in zip.filelist:
205
+	with ZipFile(zip_path, mode='r') as z:
206
+		for entry in z.filelist:
207 207
 			if entry.is_dir():
208 208
 				continue
209
-			data = zip.read(entry)
209
+			data = z.read(entry)
210 210
 			parser = BytesParser()
211 211
 			try:
212 212
 				email = parser.parsebytes(data)
@@ -221,12 +221,11 @@ def process_zip_file(zip_path: str) -> None:
221 221
 
222 222
 def search_content(email: EmailMessage) -> None:
223 223
 	"""Processes an email message in a zip file."""
224
-	global result_count, zip_result_count
225 224
 	if message_filter.matches(email):
226 225
 		save_message(email)
227 226
 
228 227
 def search_raw_content(raw_bytes: bytes) -> None:
229
-	global result_count, zip_result_count
228
+	"""Searches an unparsed email message."""
230 229
 	encodings = [ 'ascii', 'iso-8859-1', 'utf-8' ]
231 230
 	content = None
232 231
 	for encoding in encodings:
@@ -266,14 +265,11 @@ def save_raw_message(content: bytes) -> None:
266 265
 def parse_arguments():
267 266
 	"""Parses command-line arguments to `args`."""
268 267
 	global args, parser
269
-	# TODO: Revisit raw mode and how unparseable emails should be handled
270 268
 	parser = argparse.ArgumentParser(
271 269
 		prog='search.py',
272 270
 		description='Searches a directory of zipped email messages. ' + \
273 271
 			'Messages are assumed to be stored one per file within the zip files (Maildir format). ' + \
274
-			'Input directories are searched recursively for any zip files contained within.',
275
-		epilog='If raw mode is enabled, any messages that cannot be decoded ' + \
276
-			'will be searched as raw text.'
272
+			'Input directories are searched recursively for any zip files contained within.'
277 273
 	)
278 274
 	parser.add_argument(
279 275
 		'keywords',
@@ -327,17 +323,10 @@ def parse_arguments():
327 323
 		metavar='YYYY-MM-DD',
328 324
 		help='date to search on or before'
329 325
 	)
330
-	parser.add_argument(
331
-		'-r', '--raw',
332
-		default=False,
333
-		action='store_true',
334
-		help='allows searching unparseable messages as raw text'
335
-	)
336 326
 	args = parser.parse_args()
337 327
 
338 328
 def validate_arguments():
339 329
 	"""Validate and parse special field types"""
340
-	global args
341 330
 	args.keywords = args.keywords[0]  # no idea why it nests it 2D
342 331
 	if args.before is not None:
343 332
 		m = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', args.before)
@@ -349,13 +338,6 @@ def validate_arguments():
349 338
 		if m is None:
350 339
 			parser.error('after date must be in YYYY-MM-DD format (e.g. 2015-03-28)')
351 340
 		args.after = [ int(m.group(1)), int(m.group(2)), int(m.group(3)) ]
352
-	if args.raw:
353
-		if getattr(args, 'from') is not None or \
354
-			getattr(args, 'to') is not None or \
355
-			args.subject is not None or \
356
-			args.before is not None or \
357
-			args.after is not None:
358
-			print('Warning: Cannot search header fields in raw mode. Ignoring.', file=sys.stderr)
359 341
 	if args.dir is None:
360 342
 		args.dir = [ '.' ]
361 343
 	else:
@@ -369,6 +351,7 @@ def validate_arguments():
369 351
 			parser.error(f'output path \'{args.output}\' does not exist or is not a directory')
370 352
 
371 353
 def construct_filter():
354
+	"""Sets `filter` from parsed command line arguments."""
372 355
 	global message_filter
373 356
 	criteria: List[Filter] = []
374 357
 	keyword_filters = []
@@ -377,7 +360,8 @@ def construct_filter():
377 360
 		if len(k) > 0:
378 361
 			keyword_filters.append(BodyKeywordFilter(k, case_sensitive=args.casesensitive))
379 362
 	if len(keyword_filters) > 0:
380
-		criteria.append(BooleanFilter(BooleanOperator.or_op if args.any else BooleanOperator.and_op, keyword_filters))
363
+		op = BooleanOperator.or_op if args.any else BooleanOperator.and_op
364
+		criteria.append(BooleanFilter(op, keyword_filters))
381 365
 	if getattr(args, 'from') is not None:
382 366
 		criteria.append(HeaderFilter('from', getattr(args, 'from')))
383 367
 	if getattr(args, 'to') is not None:

Notiek ielāde…
Atcelt
Saglabāt