Bladeren bron

Cleanup

master
Rocketsoup 1 jaar geleden
bovenliggende
commit
7c8c875593
2 gewijzigde bestanden met toevoegingen van 21 en 37 verwijderingen
  1. 1
    1
      .pylintrc
  2. 20
    36
      search.py

+ 1
- 1
.pylintrc Bestand weergeven

1
 [MESSAGES CONTROL]
1
 [MESSAGES CONTROL]
2
-disable=bad-indentation, invalid-name
2
+disable=bad-indentation, bare-except, global-statement, invalid-name
3
 
3
 
4
 [FORMAT]
4
 [FORMAT]
5
 
5
 

+ 20
- 36
search.py Bestand weergeven

1
+"""Script for searching email messages in a collection of zip files of raw email message files."""
1
 import argparse
2
 import argparse
2
-import re
3
+import os
3
 import platform
4
 import platform
5
+import re
4
 import subprocess
6
 import subprocess
7
+import sys
8
+from email.message import EmailMessage
9
+from email.parser import BytesParser
10
+from email.utils import parsedate
11
+from enum import Enum
5
 from tempfile import TemporaryDirectory
12
 from tempfile import TemporaryDirectory
6
 from typing import List, Optional, Union
13
 from typing import List, Optional, Union
7
-from enum import Enum
8
-from email.utils import parsedate
9
-from email.parser import BytesParser
10
-from email.message import EmailMessage
11
 from zipfile import ZipFile
14
 from zipfile import ZipFile
12
-import sys
13
-import os
14
 
15
 
15
 class BooleanOperator(Enum):
16
 class BooleanOperator(Enum):
16
 	"""Boolean combinatory operator enum."""
17
 	"""Boolean combinatory operator enum."""
138
 		self.subject: Optional[str] = None
139
 		self.subject: Optional[str] = None
139
 		self.before: Optional[List[int]] = None
140
 		self.before: Optional[List[int]] = None
140
 		self.after: Optional[List[int]] = None
141
 		self.after: Optional[List[int]] = None
141
-		self.raw: bool = False
142
 
142
 
143
 args: Options = Options()
143
 args: Options = Options()
144
 message_filter: Filter = None
144
 message_filter: Filter = None
186
 	filename += '.eml'
186
 	filename += '.eml'
187
 	return filename
187
 	return filename
188
 
188
 
189
-def walk_directory(path: str) -> None:
189
+def walk_directory(directory: str) -> None:
190
 	"""Spiders a directory looking for subdirectories and email zip archives."""
190
 	"""Spiders a directory looking for subdirectories and email zip archives."""
191
 	global zip_count
191
 	global zip_count
192
-	for f in os.listdir(path):
193
-		full_path = path + os.sep + f
192
+	for f in os.listdir(directory):
193
+		full_path = directory + os.sep + f
194
 		if f.lower().endswith('.zip'):
194
 		if f.lower().endswith('.zip'):
195
 			zip_count += 1
195
 			zip_count += 1
196
 			process_zip_file(full_path)
196
 			process_zip_file(full_path)
199
 
199
 
200
 def process_zip_file(zip_path: str) -> None:
200
 def process_zip_file(zip_path: str) -> None:
201
 	"""Processes a zip file of email messages."""
201
 	"""Processes a zip file of email messages."""
202
-	global zip_result_count
202
+	global parser, zip_result_count
203
 	print('Searching ' + zip_path + '...')
203
 	print('Searching ' + zip_path + '...')
204
 	zip_result_count = 0
204
 	zip_result_count = 0
205
-	with ZipFile(zip_path, mode='r') as zip:
206
-		for entry in zip.filelist:
205
+	with ZipFile(zip_path, mode='r') as z:
206
+		for entry in z.filelist:
207
 			if entry.is_dir():
207
 			if entry.is_dir():
208
 				continue
208
 				continue
209
-			data = zip.read(entry)
209
+			data = z.read(entry)
210
 			parser = BytesParser()
210
 			parser = BytesParser()
211
 			try:
211
 			try:
212
 				email = parser.parsebytes(data)
212
 				email = parser.parsebytes(data)
221
 
221
 
222
 def search_content(email: EmailMessage) -> None:
222
 def search_content(email: EmailMessage) -> None:
223
 	"""Processes an email message in a zip file."""
223
 	"""Processes an email message in a zip file."""
224
-	global result_count, zip_result_count
225
 	if message_filter.matches(email):
224
 	if message_filter.matches(email):
226
 		save_message(email)
225
 		save_message(email)
227
 
226
 
228
 def search_raw_content(raw_bytes: bytes) -> None:
227
 def search_raw_content(raw_bytes: bytes) -> None:
229
-	global result_count, zip_result_count
228
+	"""Searches an unparsed email message."""
230
 	encodings = [ 'ascii', 'iso-8859-1', 'utf-8' ]
229
 	encodings = [ 'ascii', 'iso-8859-1', 'utf-8' ]
231
 	content = None
230
 	content = None
232
 	for encoding in encodings:
231
 	for encoding in encodings:
266
 def parse_arguments():
265
 def parse_arguments():
267
 	"""Parses command-line arguments to `args`."""
266
 	"""Parses command-line arguments to `args`."""
268
 	global args, parser
267
 	global args, parser
269
-	# TODO: Revisit raw mode and how unparseable emails should be handled
270
 	parser = argparse.ArgumentParser(
268
 	parser = argparse.ArgumentParser(
271
 		prog='search.py',
269
 		prog='search.py',
272
 		description='Searches a directory of zipped email messages. ' + \
270
 		description='Searches a directory of zipped email messages. ' + \
273
 			'Messages are assumed to be stored one per file within the zip files (Maildir format). ' + \
271
 			'Messages are assumed to be stored one per file within the zip files (Maildir format). ' + \
274
-			'Input directories are searched recursively for any zip files contained within.',
275
-		epilog='If raw mode is enabled, any messages that cannot be decoded ' + \
276
-			'will be searched as raw text.'
272
+			'Input directories are searched recursively for any zip files contained within.'
277
 	)
273
 	)
278
 	parser.add_argument(
274
 	parser.add_argument(
279
 		'keywords',
275
 		'keywords',
327
 		metavar='YYYY-MM-DD',
323
 		metavar='YYYY-MM-DD',
328
 		help='date to search on or before'
324
 		help='date to search on or before'
329
 	)
325
 	)
330
-	parser.add_argument(
331
-		'-r', '--raw',
332
-		default=False,
333
-		action='store_true',
334
-		help='allows searching unparseable messages as raw text'
335
-	)
336
 	args = parser.parse_args()
326
 	args = parser.parse_args()
337
 
327
 
338
 def validate_arguments():
328
 def validate_arguments():
339
 	"""Validate and parse special field types"""
329
 	"""Validate and parse special field types"""
340
-	global args
341
 	args.keywords = args.keywords[0]  # no idea why it nests it 2D
330
 	args.keywords = args.keywords[0]  # no idea why it nests it 2D
342
 	if args.before is not None:
331
 	if args.before is not None:
343
 		m = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', args.before)
332
 		m = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', args.before)
349
 		if m is None:
338
 		if m is None:
350
 			parser.error('after date must be in YYYY-MM-DD format (e.g. 2015-03-28)')
339
 			parser.error('after date must be in YYYY-MM-DD format (e.g. 2015-03-28)')
351
 		args.after = [ int(m.group(1)), int(m.group(2)), int(m.group(3)) ]
340
 		args.after = [ int(m.group(1)), int(m.group(2)), int(m.group(3)) ]
352
-	if args.raw:
353
-		if getattr(args, 'from') is not None or \
354
-			getattr(args, 'to') is not None or \
355
-			args.subject is not None or \
356
-			args.before is not None or \
357
-			args.after is not None:
358
-			print('Warning: Cannot search header fields in raw mode. Ignoring.', file=sys.stderr)
359
 	if args.dir is None:
341
 	if args.dir is None:
360
 		args.dir = [ '.' ]
342
 		args.dir = [ '.' ]
361
 	else:
343
 	else:
369
 			parser.error(f'output path \'{args.output}\' does not exist or is not a directory')
351
 			parser.error(f'output path \'{args.output}\' does not exist or is not a directory')
370
 
352
 
371
 def construct_filter():
353
 def construct_filter():
354
+	"""Sets `filter` from parsed command line arguments."""
372
 	global message_filter
355
 	global message_filter
373
 	criteria: List[Filter] = []
356
 	criteria: List[Filter] = []
374
 	keyword_filters = []
357
 	keyword_filters = []
377
 		if len(k) > 0:
360
 		if len(k) > 0:
378
 			keyword_filters.append(BodyKeywordFilter(k, case_sensitive=args.casesensitive))
361
 			keyword_filters.append(BodyKeywordFilter(k, case_sensitive=args.casesensitive))
379
 	if len(keyword_filters) > 0:
362
 	if len(keyword_filters) > 0:
380
-		criteria.append(BooleanFilter(BooleanOperator.or_op if args.any else BooleanOperator.and_op, keyword_filters))
363
+		op = BooleanOperator.or_op if args.any else BooleanOperator.and_op
364
+		criteria.append(BooleanFilter(op, keyword_filters))
381
 	if getattr(args, 'from') is not None:
365
 	if getattr(args, 'from') is not None:
382
 		criteria.append(HeaderFilter('from', getattr(args, 'from')))
366
 		criteria.append(HeaderFilter('from', getattr(args, 'from')))
383
 	if getattr(args, 'to') is not None:
367
 	if getattr(args, 'to') is not None:

Laden…
Annuleren
Opslaan