-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlt_to_shaarli.py
executable file
·236 lines (195 loc) · 8.2 KB
/
lt_to_shaarli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim: set expandtab tabstop=4 shiftwidth=4 :
# lt_dump_to_shaarli.py - Takes a JSON dump from Librarything and pumps the
# entries, one at a time, into a Shaarli instance so it can be used as a
# searchable card catalogue (because LT doesn't have a search API). LT's
# JSON dumps have serious internal inconsistencies, so there are a lot of
# key checks to see if some key or other exists.
#
# I strongly suggest running this tool with --dryrun a few times because you
# will probably have to clean up your data - adding a few JSON keys with
# sane defaults (like: ["copies"]: 1 because you own one copy of something).
# This is not a fire-and-forget kind of deal.
# By: The Doctor
# License: GPLv3
# v2.0 - Ported to Python 3.
# v1.0 - Initial release.
# TO-DO:
# Load modules.
import argparse
import json
import jwt
import logging
import os.path
import requests
import sys
import time
from html.parser import HTMLParser
# Constants.
# Build the JWT headers.
jwt_headers = {}
jwt_headers["alg"] = "HS512"
jwt_headers["typ"] = "JWT"
# Global variables.
argparser = None
args = None
loglevel = None
jsonfile = None
parser = None
lt_books = {}
old_book = {}
new_book = {}
payload = {}
jwt_token = None
headers = {}
request = None
# Functions.
# Figure out what to set the logging level to. There isn't a straightforward
# way of doing this because Python uses constants that are actually integers
# under the hood, and I'd really like to be able to do something like
# loglevel = 'logging.' + loglevel
# I can't have a pony, either. Takes a string, returns a Python loglevel.
def process_loglevel(loglevel):
if loglevel == "critical":
return 50
if loglevel == "error":
return 40
if loglevel == "warning":
return 30
if loglevel == "info":
return 20
if loglevel == "debug":
return 10
if loglevel == "notset":
return 0
# Core code...
# Set up a command line argument parser.
argparser = argparse.ArgumentParser(description="A command line utility which takes a JSON dump from Librarything and pumps it into a Shaarli instance using the API.")
argparser.add_argument("--loglevel", action="store", default="info",
help="Valid log levels: critical, error, warning, info, debug, notset. Defaults to info.")
argparser.add_argument("--apikey", action="store", required=True,
help="API key for a Shaarli instance.")
argparser.add_argument("--url", action="store", required=True,
help="Full URL to a Shaarli instance.")
argparser.add_argument("--books", action="store", required=True,
help="Full path to a JSON document containing a Librarything JSON dump.")
argparser.add_argument("--dryrun", action="store_true", default=False,
help="If set, the utility will not try to write anything.")
argparser.add_argument("--delay", action="store", type=float, default=2.5,
help="Number of seconds to wait in between requests. Defaults to 2.5.")
# Parse the argument vector.
args = argparser.parse_args()
# Set up logging.
loglevel = process_loglevel(args.loglevel.lower())
logging.basicConfig(level=loglevel, format="%(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
# Make sure the Librarything dumpfile exists.
if not os.path.exists(args.books):
logger.error("Unable to open Librarything dump " + args.books + ".")
sys.exit(1)
# Read in the Unmark JSON dump.
with open(args.books, "r") as jsonfile:
lt_books = json.load(jsonfile)
# Set the content type header.
headers["Content-Type"] = "application/json"
# Allocate an HTML parser to unescape HTML entities.
parser = HTMLParser.HTMLParser()
# Roll through each book in the JSON document.
for book in lt_books:
logger.debug("Now processing book " + str(book) + ".")
# Set up the data structures to make life easier.
old_book = lt_books[book]
new_book = {}
new_book["url"] = ""
new_book["private"] = True
new_book["description"] = ""
new_book["tags"] = []
# Title of the book.
new_book["title"] = parser.unescape(old_book["title"])
new_book["title"] = new_book["title"].strip().title()
# Authors of the book.
if "authors" in list(old_book.keys()):
# Case: authors is a populated list.
if len(old_book["authors"]):
new_book["description"] = new_book["description"] + "Author: " + parser.unescape(old_book["authors"][0]["fl"]) + "\n"
# Case: primaryauthor exists in the JSON dump.
elif "primaryauthor" in list(old_book.keys()):
tmp = old_book["primaryauthor"]
tmp = tmp.split(",")
tmp = tmp.reverse()
tmp = " ".join(tmp)
tmp = tmp.strip()
tmp = parser.unescape(tmp)
new_book["description"] = new_book["description"] + "Author: " + tmp + "\n"
# Case: No authors credited for this book.
else:
new_book["description"] = new_book["description"] + "Author: unknown\n"
# Tags
if "tags" in list(old_book.keys()):
for i in old_book["tags"]:
new_book["tags"].append(i.replace(" ", "_"))
# ISBN
if "isbn" in list(old_book.keys()):
if old_book["isbn"]["2"] is not None:
new_book["description"] = new_book["description"] + "ISBN: " + old_book["isbn"]["2"] + "\n"
elif old_book["isbn"]["0"] is not None:
new_book["description"] = new_book["description"] + "ISBN: " + old_book["isbn"]["0"] + "\n"
elif "originalisbn" in list(old_book.keys()):
new_book["description"] = new_book["description"] + "ISBN: " + old_book["originalisbn"] + "\n"
else:
new_book["description"] = new_book["description"] + "ISBN: unknown\n"
# Publication data
if "publication" in list(old_book.keys()):
new_book["description"] = new_book["description"] + "Publication: " + parser.unescape(old_book["publication"]) + "\n"
else:
new_book["description"] = new_book["description"] + "Publication: unknown\n"
# Year of publication
if "date" in list(old_book.keys()):
new_book["description"] = new_book["description"] + "Year: " + old_book["date"] + "\n"
else:
new_book["description"] = new_book["description"] + "Year: unknown\n"
# Dewey decimal code
if "ddc" in list(old_book.keys()):
new_book["description"] = new_book["description"] + "Dewey Decimal Code: " + old_book["ddc"]["code"][0] + "\n"
else:
new_book["description"] = new_book["description"] + "Dewey Decimal Code: unknown\n"
# Library of Congress code
if "lcc" in list(old_book.keys()):
if old_book["lcc"]:
new_book["description"] = new_book["description"] + "Library of Congress: " + old_book["lcc"]["code"] + "\n"
else:
new_book["description"] = new_book["description"] + "Library of Congress: unknown\n"
# Number of copies owned
if "copies" in list(old_book.keys()):
new_book["description"] = new_book["description"] + "Number of copies owned: " + old_book["copies"] + "\n"
else:
new_book["description"] = new_book["description"] + "Number of copies owned: 1\n"
# Number of pages
if "pages" in list(old_book.keys()):
new_book["description"] = new_book["description"] + "Pages: " + old_book["pages"] + "\n"
else:
new_book["description"] = new_book["description"] + "Pages: unknown\n"
# Build the JWT payload. The IAT time must be in UTC!
payload["iat"] = int(time.mktime(time.localtime()))
logger.debug("Value of payload: " + str(payload))
# Build a JWT token.
jwt_token = jwt.encode(payload, args.apikey, algorithm="HS512",
headers=jwt_headers)
logger.debug("Value of jwt_token: " + jwt_token)
# Set the authorization header.
headers["Authorization"] = "Bearer " + jwt_token
# Send the new book to Shaarli if this isn't a dry run.
if not args.dryrun:
logger.info("Sending book" + str(new_book))
response = requests.post(args.url+"/api/v1/links",
data=json.dumps(new_book), headers=headers)
logger.debug(json.dumps(response.json()))
else:
logger.info("Not sending book because this is a dry run.")
print(json.dumps(new_book))
# Sleep or a couple of seconds so we don't overwhelm the server.
if not args.dryrun:
time.sleep(args.delay)
# Fin.
sys.exit(0)