-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmercury_parser.py
69 lines (52 loc) · 1.78 KB
/
mercury_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
import maya
from config import MERCURY_API_URL
MERCURY_API = '{0}?url='.format(MERCURY_API_URL)
class ParsedArticle(object):
"""docstring for ParsedArticle"""
def __init__(self, parser):
super(ParsedArticle, self).__init__()
self._parser = parser
self.title = None
self.content = None
self.date_published = None
self.lead_image_url = None
self.dek = None
self.url = None
self.domain = None
self.excerpt = None
self.word_count = None
self.direction = None
self.total_pages = None
self.rendered_pages = None
self.next_page_url = None
def __repr__(self):
return '<ParsedArticle url={0!r}>'.format(self.url)
@classmethod
def from_dict(klass, d, parser):
# The new ParsedArticle.
p = klass(parser=parser)
# Add all values from returned JSON object to instance.
for key, value in d.items():
setattr(p, key, value)
# Proper Datetimes.
if p.date_published:
try:
p.date_published = maya.MayaDT.from_iso8601(p.date_published).datetime()
except:
p.date_published = None
return p
def next(self):
if self.next_page_url:
return self._parser.parse(self.next_page_url)
class ParserAPI(object):
def __init__(self, api_key):
super(ParserAPI, self).__init__()
self.api_key = api_key
self._session = requests.Session()
def parse(self, url):
url = '{0}{1}'.format(MERCURY_API, url)
headers = {'x-api-key': self.api_key}
r = self._session.get(url, headers=headers)
p = ParsedArticle.from_dict(r.json(), parser=self)
return p