Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugzilla bugs downloader #104

Merged
merged 6 commits into from
Feb 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 38 additions & 11 deletions crashsimilarity/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ class BugzillaDownloader(Downloader):
def __init__(self, cache=None):
super().__init__(cache)

@staticmethod
def _clean_signatures(signatures):
clean_signatures = set()
for sig in signatures.split('\r\n'):
pos = sig.find('[@')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do a single substring if you do something like:

start_pos = ...
end_pos = ...
if start_pos != -1 and end_pos != -1:
    ...

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forget it, the pre-existing code was already doing this, so it's fine. We can change it later.

if pos != -1:
sig = sig[pos + 2:]
pos = sig.rfind(']')
if pos != -1:
sig = sig[:pos]
clean_signatures.add(sig.strip())
return list(clean_signatures)

def download_signatures(self, bug_id):
key = ('bugzilla_bug', bug_id, utils.utc_today())
if self._cache and key in self._cache:
Expand All @@ -40,19 +53,33 @@ def download_signatures(self, bug_id):

params = {'id': bug_id}
response = self.get_with_retries(self._URL, params)
signatures = set()
for sig in self._json_or_raise(response)['bugs'][0]['cf_crash_signature'].split('\r\n'):
pos = sig.find('[@')
if pos != -1:
sig = sig[pos + 2:]
pos = sig.rfind(']')
if pos != -1:
sig = sig[:pos]
signatures.add(sig.strip())
signatures = self._json_or_raise(response)['bugs'][0]['cf_crash_signature']
cleaned_signatures = self._clean_signatures(signatures)

if self._cache:
self._cache[key] = list(signatures)
return list(signatures)
self._cache[key] = cleaned_signatures
return cleaned_signatures

def download_bugs(self, from_date, to_date):
"""
:param from_date: string "YYYY-MM-DD"
:param to_date: string "YYYY-MM-DD"
:return: list of bugs
"""
params = {'include_fields': 'id,cf_crash_signature',
'chfield': '[Bug creation]',
'chfieldfrom': from_date,
'chfieldto': to_date,
'f2': 'cf_crash_signature',
'o2': 'isnotempty',
'product': ['Firefox', 'Core']}

logging.info("Fetching bugs from Bugzilla..")
response = self.get_with_retries(self._URL, params)
bugs = self._json_or_raise(response)['bugs']
for bug in bugs:
bug['cf_crash_signature'] = self._clean_signatures(bug['cf_crash_signature'])
return bugs


class SocorroDownloader(Downloader):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ def test_download_signatures_for_bug_id(self):
self.assertCountEqual(resp, crash_signatures)
self.assertCountEqual(resp2, crash_signatures2)

def test_download_bugs(self):
bugzilla = BugzillaDownloader()
bugs_list = bugzilla.download_bugs('2018-01-01', '2018-02-01')
self.assertIsInstance(bugs_list, list)
self.assertGreater(len(bugs_list), 0)
for bug in bugs_list:
self.assertIn('id', bug)
self.assertIn('cf_crash_signature', bug)
self.assertIsInstance(bug['id'], int)
self.assertIsInstance(bug['cf_crash_signature'], list)

# cache tests can fail if started just before midnight. nobody should care
def test_downloader_actual_cache(self):
days_42 = timedelta(days=42)
Expand Down