-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsvfwf.py
executable file
·188 lines (175 loc) · 5.25 KB
/
csvfwf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/python3
import sys
from sys import stdout, argv, stderr
import csv
#default delimiter
delimiter = ","
#default delimiter for input files
inpdelim = ","
#smart alignment by default
ALIGN = 0
#compress the table again
COMPRESS = False
#number of empty rows, which seperate mulitple tables
TABLESEP = 2
def RemoveEmptyLastColumn(table):
"""remove the last column if it's empty"""
lastcol = [row[-1] for row in table]
if all(not str(val).strip() for val in lastcol):
return [row[:-1] for row in table]
return table
def GetTable(CSVFile):
"""read csv to a table"""
tables = []
table = []
nempty = 0
with open(CSVFile, newline='') as csvfile:
csvobj = csv.reader(csvfile,delimiter=inpdelim)
for row in csvobj:
empty_row = all(not str(val).strip() for val in row)
if empty_row:
nempty += 1
if len(row) == 0:
row.append("")
if nempty >= TABLESEP and not empty_row:
# next table
tables.append(RemoveEmptyLastColumn(table))
nempty = 0
table = []
table.append(row)
if table:
tables.append(RemoveEmptyLastColumn(table))
return tables
def OnlyEntry(row,icol):
"""check, if it's the only entry in the row"""
for icolx in range(len(row)):
if icolx != icol:
if str(row[icolx]).strip():
return False
return True
def NextColumnDense(vals,icol,extlast):
"""check if the next column is dense (e.g., >20% of elements non empty)"""
nval=0
last=0
for irow in range(len(vals)):
if icol+1 >= len(vals[irow]):
last+=1
elif str(vals[irow][icol+1]).strip():
nval += 1
if last == len(vals):
return extlast
else:
return (nval/len(vals)>0.2)
def CanExtend(row,icol,extlast):
"""check if the next element in the row is empty"""
if icol+1 >= len(row):
return extlast
if not str(row[icol+1]).strip():
return True
return False
def FixWidth(vals):
"""generate a table with fixed width columns"""
table1 = []
offsrow = [0] * len(vals)
ncols = max([len(vals[irow]) for irow in range(len(vals))])
if ncols == 0:
# only empty rows
return vals
for icol in range(ncols):
#find the optimal width for each column
maxlen=0
for irow in range(len(vals)):
if icol >= len(vals[irow]):
vals[irow].append("")
val=str(vals[irow][icol])
val=val.strip()
quoted=False
if delimiter in val:
#add quotes
val='"'+val+'"'
quoted=True
vals[irow][icol]=val
itlen=len(val)+offsrow[irow]
if offsrow[irow] == 0 and not quoted:
itlen += 1
if itlen > maxlen:
# check whether one can expand to the next column
if not NextColumnDense(vals,icol,offsrow[irow]) or not CanExtend(vals[irow],icol,offsrow[irow]):
#if not OnlyEntry(vals[irow],icol):
maxlen = itlen
fcol=[]
for irow in range(len(vals)):
rowwidth = maxlen
if offsrow[irow] > 0:
if offsrow[irow] < maxlen:
rowwidth = maxlen - offsrow[irow]
else:
rowwidth = 0
offsrow[irow] -= maxlen - rowwidth
alignright = True
if ALIGN == -1:
alignright = False
elif ALIGN == 0:
if vals[irow][icol].startswith('"') or vals[irow][icol].startswith('='):
alignright = False
if COMPRESS:
fcol.append(vals[irow][icol])
elif alignright:
fcol.append('{:>{}}'.format(vals[irow][icol],rowwidth))
else:
fcol.append('{:{}}'.format(vals[irow][icol],rowwidth))
itlen = len(vals[irow][icol])
if itlen > maxlen:
offsrow[irow] = itlen - maxlen;
table1.append(fcol)
#transpose the table
return [list(i) for i in zip(*table1)]
def PrintTable(table):
for irow in range(len(table)):
if not all('' == s or s.isspace() for s in table[irow][:]):
for icol in range(len(table[0])):
print(table[irow][icol],delimiter,end="",sep="")
print()
ArgsLoop = iter(sys.argv[1:])
for Arg in ArgsLoop:
if Arg.startswith("-"):
if Arg.startswith("-do"):
# output delimiter
Arg = next(ArgsLoop)
delimiter = Arg
elif Arg.startswith("-di"):
# input delimiter
Arg = next(ArgsLoop)
inpdelim = Arg
elif Arg.startswith("-l"):
# align left
ALIGN = -1
elif Arg.startswith("-r"):
# align right
ALIGN = 1
elif Arg.startswith("-x"):
# make it ugly again
COMPRESS = True
elif Arg.startswith("-s"):
# different tables separated by that many rows
Arg = next(ArgsLoop)
TABLESEP = int(Arg)
elif Arg.startswith("-h"):
print("Usage: csvfwf [OPTIONS] <input CSV file>")
print("Transform CSV files to fixed-width-format CSV. The output will go to the standard output.")
print("Options:")
print("-do: output delimiter (default: comma)")
print("-di: input delimiter (default: comma)")
print("-l: align columns left")
print("-r: align columns right")
print("-x: remove fixed-width format")
print("-s: number of empty rows separating tables (default:",TABLESEP,")")
print("-h: display this help message")
else:
print("option "+Arg+" not known")
else:
CSVFile = Arg
tables = GetTable(CSVFile)
for table in tables:
table = FixWidth(table)
PrintTable(table)