forked from DidierStevens/Beta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
instantiation.py
585 lines (511 loc) · 23.4 KB
/
instantiation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
#!/usr/bin/env python
from __future__ import print_function
__description__ = "Tool to instantiate variables"
__author__ = 'Didier Stevens'
__version__ = '0.0.1'
__date__ = '2018/07/29'
"""
Source code put in the public domain by Didier Stevens, no Copyright
https://DidierStevens.com
Use at your own risk
History:
2018/07/29: start
Todo:
"""
import optparse
import glob
import collections
import time
import sys
import textwrap
import os
import gzip
import re
import fnmatch
from contextlib import contextmanager
def PrintManual():
manual = '''
Manual:
Errors occuring when opening a file are reported (and logged if logging is turned on), and the program moves on to the next file.
Errors occuring when reading & processing a file are reported (and logged if logging is turned on), and the program stops unless option ignoreprocessingerrors is used.
Option --grep can be used to select (grep) lines that have to be processed.
If this option is not used, all lines will be processed.
To select particular lines to be processed, used option --grep and provide a regular expression. All lines matching this regular expression will be processed.
You can also use a capture group in your regular expression. The line to be processed will become the content of the first capture group (and not the complete line).
The regular expression matching operation is case sensitive. Use option --grepoptions i to make the matching operation case insensitive.
Use option --grepoptions v to invert the selection.
Use option --grepoptions F to match a fixed string in stead of a regular expression.
Option --begingrep can be used to select the first line from which on lines have to be processed.
If this option is not used, all lines will be processed.
To select the first line to be processed, used option --begingrep and provide a regular expression. The line matching this regular expression and all following lines will be processed (depending on --grep and --endgrep).
The regular expression matching operation is case sensitive. Use option --begingrepoptions i to make the matching operation case insensitive.
Use option --begingrepoptions v to invert the selection.
Use option --begingrepoptions F to match a fixed string in stead of a regular expression.
Option --endgrep can be used to select the last line to be processed.
If this option is not used, all lines will be processed.
To select the last line to be processed, used option --endgrep and provide a regular expression. The line matching this regular expression will be the last line to be processed (depending on --grep).
The regular expression matching operation is case sensitive. Use option --endgrepoptions i to make the matching operation case insensitive.
Use option --endgrepoptions v to invert the selection.
Use option --endgrepoptions F to match a fixed string in stead of a regular expression.
When combining --begingrep and --endgrep, make sure that --endgrep does not match a line before --begingrep does.
The lines are written to standard output, except when option -o is used. When option -o is used, the lines are written to the filename specified by option -o.
Filenames used with option -o starting with # have special meaning.
#c#example.txt will write output both to the console (stdout) and file example.txt.
#g# will write output to a file with a filename generated by the tool like this: toolname-date-time.txt.
#g#KEYWORD will write output to a file with a filename generated by the tool like this: toolname-KEYWORD-date-time.txt.
Use #p#filename to display execution progress.
To process several files while creating seperate output files for each input file, use -o #s#%f%.result *.
This will create output files with the name of the inputfile and extension .result.
There are several variables available when creating separate output files:
%f% is the full filename (with directory if present)
%b% is the base name: the filename without directory
%d% is the directory
%r% is the root: the filename without extension
%e% is the extension
Most options can be combined, like #ps# for example.
#l# is used for literal filenames: if the output filename has to start with # (#example.txt for example), use filename #l##example.txt for example.
'''
for line in manual.split('\n'):
print(textwrap.fill(line))
DEFAULT_SEPARATOR = ','
QUOTE = '"'
def PrintError(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def File2Strings(filename):
try:
f = open(filename, 'r')
except:
return None
try:
return map(lambda line:line.rstrip('\n'), f.readlines())
except:
return None
finally:
f.close()
def ProcessAt(argument):
if argument.startswith('@'):
strings = File2Strings(argument[1:])
if strings == None:
raise Exception('Error reading %s' % argument)
else:
return strings
else:
return [argument]
# CIC: Call If Callable
def CIC(expression):
if callable(expression):
return expression()
else:
return expression
# IFF: IF Function
def IFF(expression, valueTrue, valueFalse):
if expression:
return CIC(valueTrue)
else:
return CIC(valueFalse)
class cVariables():
def __init__(self, variablesstring='', separator=DEFAULT_SEPARATOR):
self.dVariables = {}
if variablesstring == '':
return
for variable in variablesstring.split(separator):
name, value = VariableNameValue(variable)
self.dVariables[name] = value
def SetVariable(self, name, value):
self.dVariables[name] = value
def Instantiate(self, astring):
for key, value in self.dVariables.items():
astring = astring.replace('%' + key + '%', value)
return astring
class cOutput():
def __init__(self, filenameOption=None):
self.filenameOption = filenameOption
self.separateFiles = False
self.progress = False
self.console = False
self.fOut = None
if self.filenameOption:
if self.ParseHash(self.filenameOption):
if not self.separateFiles and self.filename != '':
self.fOut = open(self.filename, 'w')
elif self.filenameOption != '':
self.fOut = open(self.filenameOption, 'w')
def ParseHash(self, option):
if option.startswith('#'):
position = self.filenameOption.find('#', 1)
if position > 1:
switches = self.filenameOption[1:position]
self.filename = self.filenameOption[position + 1:]
for switch in switches:
if switch == 's':
self.separateFiles = True
elif switch == 'p':
self.progress = True
elif switch == 'c':
self.console = True
elif switch == 'l':
pass
elif switch == 'g':
if self.filename != '':
extra = self.filename + '-'
else:
extra = ''
self.filename = '%s-%s%s.txt' % (os.path.splitext(os.path.basename(sys.argv[0]))[0], extra, self.FormatTime())
else:
return False
return True
return False
@staticmethod
def FormatTime(epoch=None):
if epoch == None:
epoch = time.time()
return '%04d%02d%02d-%02d%02d%02d' % time.localtime(epoch)[0:6]
def Line(self, line):
if self.fOut == None or self.console:
try:
print(line)
except UnicodeEncodeError:
encoding = sys.stdout.encoding
print(line.encode(encoding, errors='backslashreplace').decode(encoding))
# sys.stdout.flush()
if self.fOut != None:
self.fOut.write(line + '\n')
self.fOut.flush()
def LineTimestamped(self, line):
self.Line('%s: %s' % (self.FormatTime(), line))
def Filename(self, filename, index, total):
self.separateFilename = filename
if self.progress:
PrintError('%d/%d %s' % (index + 1, total, self.separateFilename))
if self.separateFiles and self.filename != '':
oFilenameVariables = cVariables()
oFilenameVariables.SetVariable('f', self.separateFilename)
basename = os.path.basename(self.separateFilename)
oFilenameVariables.SetVariable('b', basename)
oFilenameVariables.SetVariable('d', os.path.dirname(self.separateFilename))
root, extension = os.path.splitext(basename)
oFilenameVariables.SetVariable('r', root)
oFilenameVariables.SetVariable('e', extension)
self.Close()
self.fOut = open(oFilenameVariables.Instantiate(self.filename), 'w')
def Close(self):
if self.fOut != None:
self.fOut.close()
self.fOut = None
class cExpandFilenameArguments():
def __init__(self, filenames, literalfilenames=False, recursedir=False, checkfilenames=False, expressionprefix=None):
self.containsUnixShellStyleWildcards = False
self.warning = False
self.message = ''
self.filenameexpressions = []
self.expressionprefix = expressionprefix
self.literalfilenames = literalfilenames
expression = ''
if len(filenames) == 0:
self.filenameexpressions = [['', '']]
elif literalfilenames:
self.filenameexpressions = [[filename, ''] for filename in filenames]
elif recursedir:
for dirwildcard in filenames:
if expressionprefix != None and dirwildcard.startswith(expressionprefix):
expression = dirwildcard[len(expressionprefix):]
else:
if dirwildcard.startswith('@'):
for filename in ProcessAt(dirwildcard):
self.filenameexpressions.append([filename, expression])
elif os.path.isfile(dirwildcard):
self.filenameexpressions.append([dirwildcard, expression])
else:
if os.path.isdir(dirwildcard):
dirname = dirwildcard
basename = '*'
else:
dirname, basename = os.path.split(dirwildcard)
if dirname == '':
dirname = '.'
for path, dirs, files in os.walk(dirname):
for filename in fnmatch.filter(files, basename):
self.filenameexpressions.append([os.path.join(path, filename), expression])
else:
for filename in list(collections.OrderedDict.fromkeys(sum(map(self.Glob, sum(map(ProcessAt, filenames), [])), []))):
if expressionprefix != None and filename.startswith(expressionprefix):
expression = filename[len(expressionprefix):]
else:
self.filenameexpressions.append([filename, expression])
self.warning = self.containsUnixShellStyleWildcards and len(self.filenameexpressions) == 0
if self.warning:
self.message = "Your filename argument(s) contain Unix shell-style wildcards, but no files were matched.\nCheck your wildcard patterns or use option literalfilenames if you don't want wildcard pattern matching."
return
if self.filenameexpressions == [] and expression != '':
self.filenameexpressions = [['', expression]]
if checkfilenames:
self.CheckIfFilesAreValid()
def Glob(self, filename):
if not ('?' in filename or '*' in filename or ('[' in filename and ']' in filename)):
return [filename]
self.containsUnixShellStyleWildcards = True
return glob.glob(filename)
def CheckIfFilesAreValid(self):
valid = []
doesnotexist = []
isnotafile = []
for filename, expression in self.filenameexpressions:
hashfile = False
try:
hashfile = FilenameCheckHash(filename, self.literalfilenames)[0] == FCH_DATA
except:
pass
if filename == '' or hashfile:
valid.append([filename, expression])
elif not os.path.exists(filename):
doesnotexist.append(filename)
elif not os.path.isfile(filename):
isnotafile.append(filename)
else:
valid.append([filename, expression])
self.filenameexpressions = valid
if len(doesnotexist) > 0:
self.warning = True
self.message += 'The following files do not exist and will be skipped: ' + ' '.join(doesnotexist) + '\n'
if len(isnotafile) > 0:
self.warning = True
self.message += 'The following files are not regular files and will be skipped: ' + ' '.join(isnotafile) + '\n'
def Filenames(self):
if self.expressionprefix == None:
return [filename for filename, expression in self.filenameexpressions]
else:
return self.filenameexpressions
def ToString(value):
if isinstance(value, str):
return value
else:
return str(value)
def Quote(value, separator, quote):
value = ToString(value)
if separator in value or value == '':
return quote + value + quote
else:
return value
def MakeCSVLine(row, separator, quote):
return separator.join([Quote(value, separator, quote) for value in row])
class cLogfile():
def __init__(self, keyword, comment):
self.starttime = time.time()
self.errors = 0
if keyword == '':
self.oOutput = None
else:
self.oOutput = cOutput('%s-%s-%s.log' % (os.path.splitext(os.path.basename(sys.argv[0]))[0], keyword, self.FormatTime()))
self.Line('Start')
self.Line('UTC', '%04d%02d%02d-%02d%02d%02d' % time.gmtime(time.time())[0:6])
self.Line('Comment', comment)
self.Line('Args', repr(sys.argv))
self.Line('Version', __version__)
self.Line('Python', repr(sys.version_info))
self.Line('Platform', sys.platform)
self.Line('CWD', repr(os.getcwd()))
@staticmethod
def FormatTime(epoch=None):
if epoch == None:
epoch = time.time()
return '%04d%02d%02d-%02d%02d%02d' % time.localtime(epoch)[0:6]
def Line(self, *line):
if self.oOutput != None:
self.oOutput.Line(MakeCSVLine((self.FormatTime(), ) + line, DEFAULT_SEPARATOR, QUOTE))
def LineError(self, *line):
self.Line('Error', *line)
self.errors += 1
def Close(self):
if self.oOutput != None:
self.Line('Finish', '%d error(s)' % self.errors, '%d second(s)' % (time.time() - self.starttime))
self.oOutput.Close()
class cGrep():
def __init__(self, expression, options):
self.expression = expression
self.options = options
if self.expression == '' and self.options != '':
raise Exception('Option --grepoptions can not be used without option --grep')
self.dogrep = self.expression != ''
self.oRE = None
self.invert = False
self.caseinsensitive = False
self.fixedstring = False
if self.dogrep:
flags = 0
for option in self.options:
if option == 'i':
flags = re.IGNORECASE
self.caseinsensitive = True
elif option == 'v':
self.invert = True
elif option == 'F':
self.fixedstring = True
else:
raise Exception('Unknown grep option: %s' % option)
self.oRE = re.compile(self.expression, flags)
def Grep(self, line):
if self.fixedstring:
if self.caseinsensitive:
found = self.expression.lower() in line.lower()
else:
found = self.expression in line
if self.invert:
return not found, line
else:
return found, line
else:
oMatch = self.oRE.search(line)
if self.invert:
return oMatch == None, line
if oMatch != None and len(oMatch.groups()) > 0:
line = oMatch.groups()[0]
return oMatch != None, line
def ProcessFile(fIn, oBeginGrep, oGrep, oEndGrep, fullread):
if fIn == None:
return
begin = oBeginGrep == None or not oBeginGrep.dogrep
end = False
returnendline = False
if fullread:
yield fIn.read()
else:
for line in fIn:
line = line.rstrip('\n\r')
if not begin:
begin, line = oBeginGrep.Grep(line)
if not begin:
continue
if not end and oEndGrep != None and oEndGrep.dogrep:
end, line = oEndGrep.Grep(line)
if end:
returnendline = True
if end and not returnendline:
continue
selected = True
if oGrep != None and oGrep.dogrep:
selected, line = oGrep.Grep(line)
if not selected:
continue
if end and returnendline:
returnendline = False
yield line
def AnalyzeFileError(filename):
PrintError('Error opening file %s' % filename)
PrintError(sys.exc_info()[1])
try:
if not os.path.exists(filename):
PrintError('The file does not exist')
elif os.path.isdir(filename):
PrintError('The file is a directory')
elif not os.path.isfile(filename):
PrintError('The file is not a regular file')
except:
pass
@contextmanager
def TextFile(filename, oLogfile):
if filename == '':
fIn = sys.stdin
elif os.path.splitext(filename)[1].lower() == '.gz':
try:
fIn = gzip.GzipFile(filename, 'rb')
except:
AnalyzeFileError(filename)
oLogfile.LineError('Opening file %s %s' % (filename, repr(sys.exc_info()[1])))
fIn = None
else:
try:
fIn = open(filename, 'r')
except:
AnalyzeFileError(filename)
oLogfile.LineError('Opening file %s %s' % (filename, repr(sys.exc_info()[1])))
fIn = None
if fIn != None:
oLogfile.Line('Success', 'Opening file %s' % filename)
yield fIn
if fIn != None:
if sys.exc_info()[1] != None:
oLogfile.LineError('Reading file %s %s' % (filename, repr(sys.exc_info()[1])))
if fIn != sys.stdin:
fIn.close()
def ReplaceFunction(oMatch):
global dVariables
return dVariables[oMatch.groups()[0]]
def ProcessTextFile(filename, oBeginGrep, oGrep, oEndGrep, oOutput, oLogfile, options):
global dVariables
dVariables = {}
with TextFile(filename, oLogfile) as fIn:
try:
for line in ProcessFile(fIn, oBeginGrep, oGrep, oEndGrep, False):
# ----- Put your line processing code here -----
for oMatch in re.finditer(options.assignment, line, re.I):
dVariables[oMatch.groups()[0]] = oMatch.groups()[1]
# ----------------------------------------------
except:
oLogfile.LineError('Processing file %s %s' % (filename, repr(sys.exc_info()[1])))
if not options.ignoreprocessingerrors:
raise
if sys.version_info[0] < 3:
sys.exc_clear()
for key1, value1 in dVariables.items():
value1Saved = value1
value1 = re.sub(options.instantiation, ReplaceFunction, value1)
if value1 != value1Saved:
oOutput.Line(value1)
def InstantiateCOutput(options):
filenameOption = None
if options.output != '':
filenameOption = options.output
return cOutput(filenameOption)
def ProcessTextFiles(filenames, oLogfile, options):
oGrep = cGrep(options.grep, options.grepoptions)
oBeginGrep = cGrep(options.begingrep, options.begingrepoptions)
oEndGrep = cGrep(options.endgrep, options.endgrepoptions)
oOutput = InstantiateCOutput(options)
for index, filename in enumerate(filenames):
oOutput.Filename(filename, index, len(filenames))
ProcessTextFile(filename, oBeginGrep, oGrep, oEndGrep, oOutput, oLogfile, options)
oOutput.Close()
def Main():
moredesc = '''
Arguments:
@file: process each file listed in the text file specified
wildcards are supported
Source code put in the public domain by Didier Stevens, no Copyright
Use at your own risk
https://DidierStevens.com'''
oParser = optparse.OptionParser(usage='usage: %prog [options] [[@]file ...]\n' + __description__ + moredesc, version='%prog ' + __version__)
oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual')
oParser.add_option('-o', '--output', type=str, default='', help='Output to file (# supported)')
oParser.add_option('-a', '--assignment', type=str, default='set\s+([^=]+)=([^&]*)', help='Assignment regex')
oParser.add_option('-i', '--instantiation', type=str, default='%([^%]+)%', help='Instantiation regex')
oParser.add_option('--grep', type=str, default='', help='Grep expression')
oParser.add_option('--grepoptions', type=str, default='', help='grep options (ivF)')
oParser.add_option('--begingrep', type=str, default='', help='Grep expression for begin line')
oParser.add_option('--begingrepoptions', type=str, default='', help='begingrep options (ivF)')
oParser.add_option('--endgrep', type=str, default='', help='Grep expression for end line')
oParser.add_option('--endgrepoptions', type=str, default='', help='endgrep options (ivF)')
oParser.add_option('--literalfilenames', action='store_true', default=False, help='Do not interpret filenames')
oParser.add_option('--recursedir', action='store_true', default=False, help='Recurse directories (wildcards and here files (@...) allowed)')
oParser.add_option('--checkfilenames', action='store_true', default=False, help='Perform check if files exist prior to file processing')
oParser.add_option('--logfile', type=str, default='', help='Create logfile with given keyword')
oParser.add_option('--logcomment', type=str, default='', help='A string with comments to be included in the log file')
oParser.add_option('--ignoreprocessingerrors', action='store_true', default=False, help='Ignore errors during file processing')
(options, args) = oParser.parse_args()
if options.man:
oParser.print_help()
PrintManual()
return
oLogfile = cLogfile(options.logfile, options.logcomment)
oExpandFilenameArguments = cExpandFilenameArguments(args, options.literalfilenames, options.recursedir, options.checkfilenames)
oLogfile.Line('FilesCount', str(len(oExpandFilenameArguments.Filenames())))
oLogfile.Line('Files', repr(oExpandFilenameArguments.Filenames()))
if oExpandFilenameArguments.warning:
PrintError('\nWarning:')
PrintError(oExpandFilenameArguments.message)
oLogfile.Line('Warning', repr(oExpandFilenameArguments.message))
ProcessTextFiles(oExpandFilenameArguments.Filenames(), oLogfile, options)
if oLogfile.errors > 0:
PrintError('Number of errors: %d' % oLogfile.errors)
oLogfile.Close()
if __name__ == '__main__':
Main()