-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathjsonl-to-csv.py
129 lines (111 loc) · 3.78 KB
/
jsonl-to-csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
"""
Converts an `exceptions.log` file using to CSV.
"""
__author__ = "David Morris"
__version__ = "0.1.0"
__license__ = "MIT"
import sys
import csv
import json
import logging
import argparse
import logging.config
import pathlib
import bundlefun
from pathlib import Path
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
def main(args):
# create logger
logger = logging.getLogger("root")
FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(format=FORMAT)
logger.setLevel(logging.INFO)
# logger.setLevel(logging.DEBUG)
p = pathlib.Path(sys.argv[1])
args.filename_path = pathlib.Path(args.filename)
status_codes = bundlefun.get_wanted_kv_headers(logtype=args.log_type)
fieldnames = status_codes[args.log_type][args.section]
if args.no_line_number is False:
fieldnames.append("line_number")
if args.no_line_length is False:
fieldnames.append("line_length")
if args.csv_file is None:
args.csv_file = f"""{args.log_type}.csv"""
args.csv_path = pathlib.Path(args.csv_file)
else:
args.csv_path = pathlib.Path(args.csv_file)
with open(args.csv_path, "w") as csvfile:
writer = csv.DictWriter(csvfile, delimiter=",", fieldnames=fieldnames)
if args.no_header == False:
writer.writeheader()
open_fn = bundlefun.open_file_handle(args.filename_path)
with open_fn(args.filename_path, "rb") as jsonl_file:
# with open(p, "r") as jsonl_file:
line_count = 0
for line in jsonl_file:
line_count += 1
length_of_line = len(line)
logger.debug(f"""line: {line_count} """)
try:
json_line = json.loads(line)
parsed_line = {
k: json_line[k] for k in fieldnames if k in json_line
}
except Exception as e:
logger.debug(f"""{e}""")
else:
# line_count starts at zero. Add 1 to get
# line number in the file.
if args.no_line_number is False:
parsed_line["line_number"] = line_count + 1
if args.no_line_length is False:
parsed_line["line_length"] = length_of_line
logger.debug(f"""{parsed_line}""")
writer.writerow(parsed_line)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="a jsonl file")
parser.add_argument(
"--log-type",
action="store",
dest="log_type",
default="exceptions",
help="--log-type the type of a the file in the log-formats.json file.",
)
parser.add_argument(
"--csv-file",
action="store",
dest="csv_file",
default=None,
help="--csv-file <csv output file name>",
)
parser.add_argument(
"-l",
"--no-line-number",
action="store_true",
default=False,
help="--no-line-number don't add the line number column to the csv.",
)
parser.add_argument(
"-s",
"--no-line-length",
action="store_true",
default=False,
help="--no-line-length don't t add the line length column to the csv. ",
)
parser.add_argument(
"-n",
"--no-header",
action="store_true",
help="--no-header <dont print the header in the csv",
)
parser.add_argument(
"--section",
action="store",
dest="section",
default="core",
help="--section in `log-formats.json` to match against default is `core`.",
)
args = parser.parse_args()
main(args)