Skip to content

Commit

Permalink
Merge pull request #62 from kelockhart/myads-date
Browse files Browse the repository at this point in the history
Better logic around myADS last sent date
  • Loading branch information
kelockhart authored Aug 31, 2020
2 parents 4abfc86 + 6705d4f commit 91efafb
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 15 deletions.
2 changes: 1 addition & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
# import endpoints
HARBOUR_MYADS_IMPORT_ENDPOINT = 'https://api.adsabs.harvard.edu/v1/harbour/myads/classic/%s'
MYADS_DAILY_TIME_RANGE = 2 # days
MYADS_WEEKLY_TIME_RANGE = 25 # days
MYADS_WEEKLY_TIME_RANGE = 6 # days

# arXiv categories and sub-categories
ALLOWED_ARXIV_CLASSES = ['astro-ph',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
git+https://github.com/adsabs/[email protected]
git+https://github.com/adsabs/[email protected]
alembic==0.8.9
psycopg2==2.8.3
psycopg2==2.7
Flask-Script==2.0.5
Werkzeug==0.16.1
55 changes: 50 additions & 5 deletions vault_service/tests/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import cgi
from StringIO import StringIO
import datetime
from dateutil import parser

project_home = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))
if project_home not in sys.path:
Expand Down Expand Up @@ -344,8 +345,6 @@ def test_template_query(self):
r = self.client.get(url_for('user.get_myads', user_id='4'),
headers={'Authorization': 'secret'})

start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date()

self.assertStatus(r, 200)
self.assertEquals(r.json[0]['id'], query_id)
self.assertEquals(r.json[0]['name'], 'keyword1, etc.')
Expand Down Expand Up @@ -412,6 +411,8 @@ def test_template_query(self):
else:
start_date = adsmutils.get_date().date()

end_date = adsmutils.get_date().date()

self.assertStatus(r, 200)
self.assertEquals(r.json[0]['id'], query_id)
self.assertEquals(r.json[0]['name'], 'keyword1, etc. - Recent Papers')
Expand All @@ -422,6 +423,24 @@ def test_template_query(self):
self.assertEquals(r.json[0]['template'], 'arxiv')
self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2')
self.assertEquals(r.json[0]['classes'], [u'astro-ph'])
self.assertTrue('entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_date, end_date) in r.json[0]['query'][0]['q'])

# check the stored query via the pipeline export using the start date option
# this should use the original start date, since the passed date is later
start_iso = (adsmutils.get_date() + datetime.timedelta(days=5)).isoformat()
r = self.client.get(url_for('user.get_myads', user_id='4', start_isodate=start_iso),
headers={'Authorization': 'secret'})

self.assertTrue('entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_date, end_date) in r.json[0]['query'][0]['q'])

# this should use the passed date, since it's before the default start date
start_iso = (adsmutils.get_date() - datetime.timedelta(days=15)).isoformat()
r = self.client.get(url_for('user.get_myads', user_id='4', start_isodate=start_iso),
headers={'Authorization': 'secret'})

start_iso_date = parser.parse(start_iso).date()
self.assertTrue(
'entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_iso_date, end_date) in r.json[0]['query'][0]['q'])

# edit the stored query
r = self.client.put(url_for('user.myads_notifications', myads_id=query_id),
Expand Down Expand Up @@ -582,6 +601,17 @@ def test_template_query(self):
self.assertEquals(r.json[0]['frequency'], 'weekly')
self.assertEquals(r.json[0]['type'], 'template')

r = self.client.get(url_for('user.get_myads', user_id=4),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'})

self.assertTrue(r.json[4]['query'][0]['q'] == 'citations(author:"Kurtz, Michael")')

# a passed start date shouldn't matter to citations queries
r2 = self.client.get(url_for('user.get_myads', user_id=4, start_isodate=start_iso_date),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'})

self.assertTrue(r2.json[4]['query'][0]['q'] == r.json[4]['query'][0]['q'])

# test the author query construction
r = self.client.post(url_for('user.myads_notifications'),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'},
Expand All @@ -596,8 +626,6 @@ def test_template_query(self):
r = self.client.get(url_for('user.myads_notifications', myads_id=query_id),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'})

start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date()

self.assertStatus(r, 200)
self.assertEquals(r.json[0]['id'], query_id)
self.assertEquals(r.json[0]['name'], 'Favorite Authors - Recent Papers')
Expand All @@ -606,6 +634,23 @@ def test_template_query(self):
self.assertEquals(r.json[0]['frequency'], 'weekly')
self.assertEquals(r.json[0]['type'], 'template')

# check start dates in constructed query - no start date should default to now - the weekly time range
r = self.client.get(url_for('user.get_myads', user_id=4),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'})

start_date = (adsmutils.get_date() - datetime.timedelta(days=self.app.config.get('MYADS_WEEKLY_TIME_RANGE'))).date()
self.assertTrue('author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_date, end_date)
in r.json[5]['query'][0]['q'])

# passing an earlier start date should respect that date
start_iso = (adsmutils.get_date() - datetime.timedelta(days=40)).isoformat()
r = self.client.get(url_for('user.get_myads', user_id=4, start_isodate=start_iso),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'})

start_iso_date = parser.parse(start_iso).date()
self.assertTrue('author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_iso_date, end_date)
in r.json[5]['query'][0]['q'])

@httpretty.activate
def test_myads_execute_notification(self):

Expand Down Expand Up @@ -647,7 +692,7 @@ def test_myads_execute_notification(self):
r = self.client.get(url_for('user.execute_myads_query', myads_id=query_id),
headers={'Authorization': 'secret', 'X-Adsws-Uid': user_id})

start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date()
start_date = (adsmutils.get_date() - datetime.timedelta(days=self.app.config.get('MYADS_WEEKLY_TIME_RANGE'))).date()

self.assertStatus(r, 200)
self.assertEquals(r.json, [{'q': 'author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"] '
Expand Down
27 changes: 19 additions & 8 deletions vault_service/views/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def _get_general_query_data(session, query_id):
data = urlparse.parse_qs(query)
return data

def _create_myads_query(template_type, frequency, data, classes=None):
def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None):
"""
Creates a query based on the stored myADS setup (for templated queries only)
:param frequency: daily or weekly
Expand All @@ -629,6 +629,9 @@ def _create_myads_query(template_type, frequency, data, classes=None):
out = []
beg_pubyear = (get_date() - datetime.timedelta(days=180)).year
end_date = get_date().date()
weekly_time_range = current_app.config.get('MYADS_WEEKLY_TIME_RANGE', 6)
if start_isodate:
start_isodate = parser.parse(start_isodate).date()
if template_type in ('arxiv', None):
if frequency == 'daily':
# on Mondays, deal with the weekend properly
Expand All @@ -638,8 +641,11 @@ def _create_myads_query(template_type, frequency, data, classes=None):
else:
start_date = get_date().date()
elif frequency == 'weekly':
time_range = current_app.config.get('MYADS_WEEKLY_TIME_RANGE', 25)
start_date = (get_date() - datetime.timedelta(days=time_range)).date()
start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date()

# if the provided last sent date is prior to normal start date, use the earlier date
if start_isodate and (start_isodate < start_date):
start_date = start_isodate

if template_type == 'arxiv':
if not classes:
Expand Down Expand Up @@ -675,14 +681,18 @@ def _create_myads_query(template_type, frequency, data, classes=None):
out.append({'q': q, 'sort': sort})
elif template_type == 'authors':
keywords = data
start_date = (get_date() - datetime.timedelta(days=25)).date()
start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date()
if start_isodate and (start_isodate < start_date):
start_date = start_isodate
q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
format(keywords, start_date, end_date, beg_pubyear)
sort = 'score desc, bibcode desc'
out.append({'q': q, 'sort': sort})
elif template_type == 'keyword':
keywords = data
start_date = (get_date() - datetime.timedelta(days=25)).date()
start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date()
if start_isodate and (start_isodate < start_date):
start_date = start_isodate
# most recent
q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
format(keywords, start_date, end_date, beg_pubyear)
Expand Down Expand Up @@ -714,7 +724,8 @@ def _create_myads_query(template_type, frequency, data, classes=None):

@advertise(scopes=['ads-consumer:myads'], rate_limit = [1000, 3600*24])
@bp.route('/get-myads/<user_id>', methods=['GET'])
def get_myads(user_id):
@bp.route('/get-myads/<user_id>/<start_isodate>', methods=['GET'])
def get_myads(user_id, start_isodate=None):
'''
Fetches a myADS profile for the pipeline for a given uid
'''
Expand Down Expand Up @@ -757,11 +768,11 @@ def get_myads(user_id):
query = None
else:
data = _get_general_query_data(session, s.query_id)
query = _create_myads_query(s.template, s.frequency, data, classes=s.classes)
query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate)
else:
qid = None
data = s.data.encode('utf-8') if s.data else s.data
query = _create_myads_query(s.template, s.frequency, data, classes=s.classes)
query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate)

o['qid'] = qid
o['query'] = query
Expand Down

0 comments on commit 91efafb

Please sign in to comment.