-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadwise.py
201 lines (163 loc) · 7.19 KB
/
readwise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import requests
from docx import Document
import os
import csv
from openai import OpenAI
import time
# Instantiate OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Initialize a new Word document
document = Document()
# Your Readwise API token from environment variables
api_token = os.environ.get("READWISE")
# Set up the headers with your Readwise API token
headers = {"Authorization": f"Token {api_token}"}
# CSV file to save the data
csv_file = "Highlights_with_Tags.csv"
success_log_file = "updated_highlights_log.txt"
# Maximum number of retries
MAX_RETRIES = 5
def generate_tags_from_openai(highlight_text):
"""
Use OpenAI to generate up to 3 tags for a given highlight using the chat completion model.
"""
try:
prompt = f'Generate up to 3 high-level relevant tags for the following highlight: "{highlight_text}". They should be separated by commas and not have hashes. Use British English. They should be restricted to: Economics, Technology, Startups, Science, Physics, Biology, Chemistry, Entrepreneurship, Liberalism, Philosophy, Environment, Religion, Politics, History, Psychology, Sociology, Statistics, United Kingdom, Quotes, Film, Music, Marketing, Politics, Personal Finance, Design, CBT, Lifetips, Europe, United States, Critical Thinking, IdPol, Health, Finance, Agriculture, Productivity, and Literature.'
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
],
max_tokens=50,
temperature=0.5,
)
# Extract the tags from the response content
tags_text = response.choices[0].message.content.strip()
tags = [tag.strip() for tag in tags_text.split(",") if tag.strip()]
return tags
except Exception as e:
print(f"Error generating tags: {e}")
return []
def normalize_text(text):
"""
Normalizes the text by stripping extra spaces, converting to lowercase,
and removing special characters that might affect matching.
"""
return text.strip().lower()
def find_matching_highlight(book, highlight_text):
"""
Finds a matching highlight in the book based on partial text matching.
"""
normalized_highlight_text = normalize_text(highlight_text)
for highlight in book["highlights"]:
normalized_book_highlight = normalize_text(highlight["text"])
# Perform partial matching (first 100 characters)
if normalized_highlight_text[:100] in normalized_book_highlight:
return highlight
# No match found
return None
def load_updated_highlight_ids():
"""
Load the IDs of highlights that have already been successfully updated from the log file.
"""
if os.path.exists(success_log_file):
with open(success_log_file, "r") as f:
return {line.strip() for line in f.readlines()}
return set()
def log_successful_update(highlight_id):
"""
Log a successfully updated highlight ID to the file.
"""
with open(success_log_file, "a") as f:
f.write(f"{highlight_id}\n")
def update_highlight_tags(highlight_id, tags):
"""
Updates the tags for a specific highlight using Readwise API with exponential backoff.
"""
url = f"https://readwise.io/api/v2/highlights/{highlight_id}/"
payload = {"tags": [{"name": tag} for tag in tags]}
retries = 0
backoff = 2 # Start with 2 seconds
while retries < MAX_RETRIES:
try:
# Send a PATCH request to update the highlight
response = requests.patch(url, headers=headers, json=payload)
if response.status_code == 200:
print(f"Successfully updated highlight {highlight_id} with tags: {tags}")
log_successful_update(highlight_id) # Log the successful update
return True
elif response.status_code == 429:
# Handle rate limiting
print(f"Rate limited. Waiting for {backoff} seconds before retrying.")
time.sleep(backoff)
backoff *= 2 # Double the backoff time
retries += 1
else:
# Handle other errors
print(f"Failed to update highlight {highlight_id}. Status code: {response.status_code}")
print(response.text)
return False
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
return False
print(f"Max retries exceeded for highlight {highlight_id}.")
return False
def fetch_highlights():
"""
Fetch highlights from Readwise and return them, with pagination support.
"""
url = "https://readwise.io/api/v2/export/"
next_page_cursor = None
all_highlights = []
while True:
params = {"pageCursor": next_page_cursor} if next_page_cursor else {}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data_export = response.json()
all_highlights.extend(data_export.get("results", []))
next_page_cursor = data_export.get("nextPageCursor")
if not next_page_cursor:
break # No more pages
else:
print(f"Failed to fetch export data. Status code: {response.status_code}")
print(response.text)
break
return all_highlights
def update_tags_from_csv():
"""
Reads the CSV file and updates the corresponding highlights with new tags.
"""
highlights_data = fetch_highlights()
# Load the IDs of already updated highlights from the log file
updated_highlight_ids = load_updated_highlight_ids()
with open(csv_file, mode="r", encoding="utf-8") as file:
reader = csv.DictReader(file)
for row in reader:
title = row["Book Title"]
highlight_text = row["Highlight"]
tags = row["Tags"].split(", ")
matching_highlight = None
for book in highlights_data:
if book["title"] == title:
matching_highlight = find_matching_highlight(book, highlight_text)
if matching_highlight:
break
if matching_highlight:
highlight_id = matching_highlight["id"]
# Skip the highlight if it has already been updated
if str(highlight_id) in updated_highlight_ids:
print(f"Skipping already updated highlight {highlight_id}.")
continue
# Update the highlight's tags
update_highlight_tags(highlight_id, tags)
else:
# Log the text comparison for debugging
print(f"No matching highlight found for: {highlight_text}")
# Print comparison logs
print(f"Tried to match CSV highlight: {highlight_text}")
for book in highlights_data:
for highlight in book["highlights"]:
print(f"Compared with Readwise highlight: {highlight['text'][:100]}")
# Start the process
update_tags_from_csv()