diff --git a/vault_service/tests/test_user.py b/vault_service/tests/test_user.py index 28d7c63..1528c88 100644 --- a/vault_service/tests/test_user.py +++ b/vault_service/tests/test_user.py @@ -651,6 +651,56 @@ def test_template_query(self): self.assertTrue('author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_iso_date, end_date) in r.json[5]['query'][0]['q']) + @httpretty.activate + def test_non_ascii_myads(self): + + httpretty.register_uri( + httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), + content_type='application/json', + status=200, + body="""{ + "responseHeader":{ + "status":0, "QTime":0, + "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, + "response":{"numFound":10456930,"start":0,"docs":[ + { "bibcode":"2005JGRC..110.4002G" }, + { "bibcode":"2005JGRC..110.4003N" }, + { "bibcode":"2005JGRC..110.4004Y" }]}}""") + + r = self.client.post(url_for('user.query'), + headers={'Authorization': 'secret'}, + data=json.dumps({'q': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"'}), + content_type='application/json') + with self.app.session_scope() as session: + q = session.query(Query).filter_by(qid=r.json['qid']).first() + + self.assertStatus(r, 200) + + self.assert_(r.json['qid'], 'qid is missing') + qid = r.json['qid'] + + # some test data is unicode, some utf-8 because we use utf-8 encoding by default in bumblebee + test_data = [{'type': 'template', 'template': 'keyword', 'data': u'author:"Galindo-Guil, Francisco Jos\xe9"'}, + {'type': 'template', 'template': 'authors', 'data': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"'}, + {'type': 'template', 'template': 'citations', 'data': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"'}, + {'type': 'template', 'template': 'arxiv', 'data': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"', 'classes': ['astro-ph']}, + {'type': 'query', 'name': 'Query 1', 'qid': qid, 'stateful': True, 'frequency': 'daily'} + ] + + for t in test_data: + q = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-Adsws-Uid': '101'}, + data=json.dumps(t), + content_type='application/json') + + self.assertStatus(q, 200) + + s = self.client.get(url_for('user.execute_myads_query', myads_id=q.json['id']), + headers={'Authorization': 'secret', 'X-Adsws-Uid': '101'}) + + self.assertStatus(s, 200) + self.assertIn(unicode('Galindo-Guil, Francisco Jos\xc3\xa9', 'utf-8'), s.json[0]['q']) + @httpretty.activate def test_myads_execute_notification(self): diff --git a/vault_service/views/user.py b/vault_service/views/user.py index 88a3b2b..a8d6540 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -608,11 +608,13 @@ def _get_general_query_data(session, query_id): data = {} q = session.query(Query).filter_by(id=query_id).one() if q and q.query: + # note that json.loads returns unicode by default in Python 2 query = json.loads(q.query).get('query') if query: - # Parse query string such as: + # Parse url encoded query string such as: # u'fq=%7B%21type%3Daqp+v%3D%24fq_database%7D&fq_database=%28database%3Aastronomy%29&q=star&sort=citation_count+desc%2C+bibcode+desc' - data = urlparse.parse_qs(query) + # must pass byte string to parse_q - urls only use ascii characters, so ascii encoding is fine + data = urlparse.parse_qs(query.encode('ascii')) return data def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None):