-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathndl_search_xml2tsv.py
73 lines (57 loc) · 2.15 KB
/
ndl_search_xml2tsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from datetime import *
import glob
import kassis_config
import kassis_ndl_xmlloader
import csv
import os
def pickup(item):
row = []
row.append(item['dcterms_title'])
if item['dc_titles'] and len(item['dc_titles']) > 0:
row.append(item['dc_titles'][0].get('value', ''))
row.append(item['dc_titles'][0].get('transcription', ''))
else:
row.append('')
row.append('')
if item['isbn'] and len(item['isbn']) > 0:
row.append(item['isbn'][0])
else:
row.append('')
if item['publishers'] and len(item['publishers']) > 0:
row.append(item['publishers'][0].get('name', ''))
row.append(item['publishers'][0].get('transcription', ''))
else:
row.append('')
row.append('')
if item['creators'] and len(item['creators']) > 0:
row.append(item['creators'][0])
else:
row.append('')
return row
def xml2csv(importpath, exportpath):
total_record = 0
print(f"importpath={importpath} exportpath={exportpath}")
files = glob.glob(importpath)
for filename in files:
jsonlist = kassis_ndl_xmlloader.xml2json(filename)
if len(jsonlist) == 0:
print("Warn: json data is zero")
else:
fn = os.path.basename(filename)
name, ext = os.path.splitext(fn)
jsonfilename = f"{name}.tsv"
writepath = os.path.join(exportpath, jsonfilename)
with open(writepath, 'w') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL, delimiter="\t", lineterminator='\n')
# header
writer.writerow(['主タイトル','タイトル','タイトル読み','ISBN','出版者','出版者ヨミ','著者'])
for i in jsonlist:
writer.writerow(pickup(i))
# end of loop
if __name__ == '__main__':
print(f"@start time={datetime.now().strftime('%Y/%m/%d %H:%M:%S')}")
config = kassis_config.get()
importpath = config['xml2csv']['xml_import']
exportpath = config['xml2csv']['csv_export']
xml2csv(importpath, exportpath)
print(f"@finish time={datetime.now().strftime('%Y/%m/%d %H:%M:%S')}")