-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtabtodb.py
70 lines (58 loc) · 2.01 KB
/
tabtodb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
TSV/CSV to DB | RPINerd, 08/22/23
Given either a tab- or comma-delimited file, build a simple SQLite database
"""
import argparse
import csv
import logging
import sqlite3
from pathlib import Path
def main(args: argparse.Namespace) -> None:
""""""
# TODO handle multiple input files each as a table
# Set up the database
conn = sqlite3.connect(args.output)
c = conn.cursor()
# Create the table
c.execute("DROP TABLE IF EXISTS data")
c.execute(
"""CREATE TABLE data (
accession TEXT PRIMARY KEY,
taxid TEXT,
gi TEXT
)"""
)
# Read in the input file
with Path.open(args.input, "r", encoding="local") as input_file:
if args.tab:
reader = csv.reader(input_file, delimiter="\t")
else:
reader = csv.reader(input_file, delimiter=",")
for row in reader:
# Skip the header
if row[0] == "accession":
continue
accession = row[1]
taxid = row[2]
gi = row[3]
c.execute(
"INSERT INTO data (accession, taxid, gi) VALUES (?, ?, ?)",
(accession, taxid, gi),
)
# Commit the changes and close the connection
conn.commit()
conn.close()
if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="")
parser.add_argument("-i", "--input", help="Input file", required=True)
parser.add_argument("-o", "--output", help="Output database file", required=True)
parser.add_argument("-v", "--verbose", help="Lots of status messages", action="store_true")
parser.add_argument("-t", "--tab", help="Input file is tab-delimited", action="store_true")
args = parser.parse_args()
# Set up logging
if args.verbose:
logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
else:
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
main(args)