diff --git a/config.py b/config.py index 8ca7b14..3cb3a7e 100644 --- a/config.py +++ b/config.py @@ -35,7 +35,7 @@ # import endpoints HARBOUR_MYADS_IMPORT_ENDPOINT = 'https://api.adsabs.harvard.edu/v1/harbour/myads/classic/%s' MYADS_DAILY_TIME_RANGE = 2 # days -MYADS_WEEKLY_TIME_RANGE = 25 # days +MYADS_WEEKLY_TIME_RANGE = 6 # days # arXiv categories and sub-categories ALLOWED_ARXIV_CLASSES = ['astro-ph', diff --git a/requirements.txt b/requirements.txt index d1b0275..1fba639 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ git+https://github.com/adsabs/ADSMicroserviceUtils.git@v1.1.5 git+https://github.com/adsabs/ADSParser.git@v1.0.3 alembic==0.8.9 -psycopg2==2.8.3 +psycopg2==2.7 Flask-Script==2.0.5 Werkzeug==0.16.1 diff --git a/vault_service/tests/test_user.py b/vault_service/tests/test_user.py index 56be884..28d7c63 100644 --- a/vault_service/tests/test_user.py +++ b/vault_service/tests/test_user.py @@ -7,6 +7,7 @@ import cgi from StringIO import StringIO import datetime +from dateutil import parser project_home = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')) if project_home not in sys.path: @@ -344,8 +345,6 @@ def test_template_query(self): r = self.client.get(url_for('user.get_myads', user_id='4'), headers={'Authorization': 'secret'}) - start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date() - self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'keyword1, etc.') @@ -412,6 +411,8 @@ def test_template_query(self): else: start_date = adsmutils.get_date().date() + end_date = adsmutils.get_date().date() + self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'keyword1, etc. - Recent Papers') @@ -422,6 +423,24 @@ def test_template_query(self): self.assertEquals(r.json[0]['template'], 'arxiv') self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2') self.assertEquals(r.json[0]['classes'], [u'astro-ph']) + self.assertTrue('entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_date, end_date) in r.json[0]['query'][0]['q']) + + # check the stored query via the pipeline export using the start date option + # this should use the original start date, since the passed date is later + start_iso = (adsmutils.get_date() + datetime.timedelta(days=5)).isoformat() + r = self.client.get(url_for('user.get_myads', user_id='4', start_isodate=start_iso), + headers={'Authorization': 'secret'}) + + self.assertTrue('entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_date, end_date) in r.json[0]['query'][0]['q']) + + # this should use the passed date, since it's before the default start date + start_iso = (adsmutils.get_date() - datetime.timedelta(days=15)).isoformat() + r = self.client.get(url_for('user.get_myads', user_id='4', start_isodate=start_iso), + headers={'Authorization': 'secret'}) + + start_iso_date = parser.parse(start_iso).date() + self.assertTrue( + 'entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_iso_date, end_date) in r.json[0]['query'][0]['q']) # edit the stored query r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), @@ -582,6 +601,17 @@ def test_template_query(self): self.assertEquals(r.json[0]['frequency'], 'weekly') self.assertEquals(r.json[0]['type'], 'template') + r = self.client.get(url_for('user.get_myads', user_id=4), + headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'}) + + self.assertTrue(r.json[4]['query'][0]['q'] == 'citations(author:"Kurtz, Michael")') + + # a passed start date shouldn't matter to citations queries + r2 = self.client.get(url_for('user.get_myads', user_id=4, start_isodate=start_iso_date), + headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'}) + + self.assertTrue(r2.json[4]['query'][0]['q'] == r.json[4]['query'][0]['q']) + # test the author query construction r = self.client.post(url_for('user.myads_notifications'), headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'}, @@ -596,8 +626,6 @@ def test_template_query(self): r = self.client.get(url_for('user.myads_notifications', myads_id=query_id), headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'}) - start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date() - self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'Favorite Authors - Recent Papers') @@ -606,6 +634,23 @@ def test_template_query(self): self.assertEquals(r.json[0]['frequency'], 'weekly') self.assertEquals(r.json[0]['type'], 'template') + # check start dates in constructed query - no start date should default to now - the weekly time range + r = self.client.get(url_for('user.get_myads', user_id=4), + headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'}) + + start_date = (adsmutils.get_date() - datetime.timedelta(days=self.app.config.get('MYADS_WEEKLY_TIME_RANGE'))).date() + self.assertTrue('author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_date, end_date) + in r.json[5]['query'][0]['q']) + + # passing an earlier start date should respect that date + start_iso = (adsmutils.get_date() - datetime.timedelta(days=40)).isoformat() + r = self.client.get(url_for('user.get_myads', user_id=4, start_isodate=start_iso), + headers={'Authorization': 'secret', 'X-Adsws-Uid': '4'}) + + start_iso_date = parser.parse(start_iso).date() + self.assertTrue('author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_iso_date, end_date) + in r.json[5]['query'][0]['q']) + @httpretty.activate def test_myads_execute_notification(self): @@ -647,7 +692,7 @@ def test_myads_execute_notification(self): r = self.client.get(url_for('user.execute_myads_query', myads_id=query_id), headers={'Authorization': 'secret', 'X-Adsws-Uid': user_id}) - start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date() + start_date = (adsmutils.get_date() - datetime.timedelta(days=self.app.config.get('MYADS_WEEKLY_TIME_RANGE'))).date() self.assertStatus(r, 200) self.assertEquals(r.json, [{'q': 'author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"] ' diff --git a/vault_service/views/user.py b/vault_service/views/user.py index 6247001..88a3b2b 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -615,7 +615,7 @@ def _get_general_query_data(session, query_id): data = urlparse.parse_qs(query) return data -def _create_myads_query(template_type, frequency, data, classes=None): +def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None): """ Creates a query based on the stored myADS setup (for templated queries only) :param frequency: daily or weekly @@ -629,6 +629,9 @@ def _create_myads_query(template_type, frequency, data, classes=None): out = [] beg_pubyear = (get_date() - datetime.timedelta(days=180)).year end_date = get_date().date() + weekly_time_range = current_app.config.get('MYADS_WEEKLY_TIME_RANGE', 6) + if start_isodate: + start_isodate = parser.parse(start_isodate).date() if template_type in ('arxiv', None): if frequency == 'daily': # on Mondays, deal with the weekend properly @@ -638,8 +641,11 @@ def _create_myads_query(template_type, frequency, data, classes=None): else: start_date = get_date().date() elif frequency == 'weekly': - time_range = current_app.config.get('MYADS_WEEKLY_TIME_RANGE', 25) - start_date = (get_date() - datetime.timedelta(days=time_range)).date() + start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date() + + # if the provided last sent date is prior to normal start date, use the earlier date + if start_isodate and (start_isodate < start_date): + start_date = start_isodate if template_type == 'arxiv': if not classes: @@ -675,14 +681,18 @@ def _create_myads_query(template_type, frequency, data, classes=None): out.append({'q': q, 'sort': sort}) elif template_type == 'authors': keywords = data - start_date = (get_date() - datetime.timedelta(days=25)).date() + start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date() + if start_isodate and (start_isodate < start_date): + start_date = start_isodate q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(keywords, start_date, end_date, beg_pubyear) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) elif template_type == 'keyword': keywords = data - start_date = (get_date() - datetime.timedelta(days=25)).date() + start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date() + if start_isodate and (start_isodate < start_date): + start_date = start_isodate # most recent q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(keywords, start_date, end_date, beg_pubyear) @@ -714,7 +724,8 @@ def _create_myads_query(template_type, frequency, data, classes=None): @advertise(scopes=['ads-consumer:myads'], rate_limit = [1000, 3600*24]) @bp.route('/get-myads/', methods=['GET']) -def get_myads(user_id): +@bp.route('/get-myads//', methods=['GET']) +def get_myads(user_id, start_isodate=None): ''' Fetches a myADS profile for the pipeline for a given uid ''' @@ -757,11 +768,11 @@ def get_myads(user_id): query = None else: data = _get_general_query_data(session, s.query_id) - query = _create_myads_query(s.template, s.frequency, data, classes=s.classes) + query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate) else: qid = None data = s.data.encode('utf-8') if s.data else s.data - query = _create_myads_query(s.template, s.frequency, data, classes=s.classes) + query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate) o['qid'] = qid o['query'] = query