Skip to content

Commit

Permalink
Merge pull request #65 from kelockhart/unicode-fix
Browse files Browse the repository at this point in the history
Fix for non-ascii characters in general queries in myADS
  • Loading branch information
kelockhart authored Oct 15, 2020
2 parents eb5f223 + 5c5be42 commit 1f1aa8d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
50 changes: 50 additions & 0 deletions vault_service/tests/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,56 @@ def test_template_query(self):
self.assertTrue('author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"]'.format(start_iso_date, end_date)
in r.json[5]['query'][0]['q'])

@httpretty.activate
def test_non_ascii_myads(self):

httpretty.register_uri(
httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'),
content_type='application/json',
status=200,
body="""{
"responseHeader":{
"status":0, "QTime":0,
"params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}},
"response":{"numFound":10456930,"start":0,"docs":[
{ "bibcode":"2005JGRC..110.4002G" },
{ "bibcode":"2005JGRC..110.4003N" },
{ "bibcode":"2005JGRC..110.4004Y" }]}}""")

r = self.client.post(url_for('user.query'),
headers={'Authorization': 'secret'},
data=json.dumps({'q': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"'}),
content_type='application/json')
with self.app.session_scope() as session:
q = session.query(Query).filter_by(qid=r.json['qid']).first()

self.assertStatus(r, 200)

self.assert_(r.json['qid'], 'qid is missing')
qid = r.json['qid']

# some test data is unicode, some utf-8 because we use utf-8 encoding by default in bumblebee
test_data = [{'type': 'template', 'template': 'keyword', 'data': u'author:"Galindo-Guil, Francisco Jos\xe9"'},
{'type': 'template', 'template': 'authors', 'data': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"'},
{'type': 'template', 'template': 'citations', 'data': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"'},
{'type': 'template', 'template': 'arxiv', 'data': 'author:"Galindo-Guil, Francisco Jos\xc3\xa9"', 'classes': ['astro-ph']},
{'type': 'query', 'name': 'Query 1', 'qid': qid, 'stateful': True, 'frequency': 'daily'}
]

for t in test_data:
q = self.client.post(url_for('user.myads_notifications'),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '101'},
data=json.dumps(t),
content_type='application/json')

self.assertStatus(q, 200)

s = self.client.get(url_for('user.execute_myads_query', myads_id=q.json['id']),
headers={'Authorization': 'secret', 'X-Adsws-Uid': '101'})

self.assertStatus(s, 200)
self.assertIn(unicode('Galindo-Guil, Francisco Jos\xc3\xa9', 'utf-8'), s.json[0]['q'])

@httpretty.activate
def test_myads_execute_notification(self):

Expand Down
6 changes: 4 additions & 2 deletions vault_service/views/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,11 +608,13 @@ def _get_general_query_data(session, query_id):
data = {}
q = session.query(Query).filter_by(id=query_id).one()
if q and q.query:
# note that json.loads returns unicode by default in Python 2
query = json.loads(q.query).get('query')
if query:
# Parse query string such as:
# Parse url encoded query string such as:
# u'fq=%7B%21type%3Daqp+v%3D%24fq_database%7D&fq_database=%28database%3Aastronomy%29&q=star&sort=citation_count+desc%2C+bibcode+desc'
data = urlparse.parse_qs(query)
# must pass byte string to parse_q - urls only use ascii characters, so ascii encoding is fine
data = urlparse.parse_qs(query.encode('ascii'))
return data

def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None):
Expand Down

0 comments on commit 1f1aa8d

Please sign in to comment.