Skip to content

Commit

Permalink
Data update and bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
adambuttrick committed Jul 19, 2024
1 parent b156846 commit 425089b
Show file tree
Hide file tree
Showing 15 changed files with 46,961 additions and 37,673 deletions.
5,127 changes: 2,106 additions & 3,021 deletions data/aggregate_mapped.csv

Large diffs are not rendered by default.

18,248 changes: 9,637 additions & 8,611 deletions data/aggregate_unmapped.csv

Large diffs are not rendered by default.

45,655 changes: 22,883 additions & 22,772 deletions data/crossref_funder_work_counts.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/crossref_funders.json

Large diffs are not rendered by default.

Binary file modified data/crossref_overlap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4,347 changes: 2,229 additions & 2,118 deletions data/datacite_funder_work_counts.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/datacite_funders.json

Large diffs are not rendered by default.

Binary file modified data/datacite_overlap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3,671 changes: 2,942 additions & 729 deletions data/funders.json

Large diffs are not rendered by default.

7,494 changes: 7,100 additions & 394 deletions data/members.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/ror_funder_registry_mapping.json

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
"DataCite - Aggregrate overlap": DataCite_view
}

funder_registry_version = '1.56'
ror_registry_version = '1.42'
works_count_date = '2024/03/10'
funder_registry_version = '1.58'
ror_registry_version = '1.49'
works_count_date = '2024/07/16'

def main():
sidebar_title = st.sidebar.title("Views")
Expand Down
4 changes: 0 additions & 4 deletions utilities/create_funder_id_mapping_w_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
import os


import os
import requests
import zipfile

def download_and_unzip(record_id, path='.'):
# Downloading the record from Zenodo
response = requests.get(f'https://zenodo.org/api/records/{record_id}')
Expand Down
26 changes: 18 additions & 8 deletions utilities/get_crossref_funder_work_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,23 @@
import requests


def catch_requests_exceptions(func):
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except requests.exceptions.RequestException:
return 'Error'
return wrapper
def catch_request_exceptions(max_retries=3, delay=30):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
while retries < max_retries:
try:
return func(*args, **kwargs)
except requests.exceptions.RequestException as e:
retries += 1
if retries == max_retries:
print(f"All {max_retries} attempts failed.")
return 'Error'
print(f"Request failed. Retrying in {delay} seconds... (Attempt {retries}/{max_retries})")
time.sleep(delay)
return wrapper
return decorator


def read_input_file(input_file):
Expand All @@ -29,7 +39,7 @@ def transform_funder_id(funder_id):
return re.sub('http://dx.doi.org/10.13039/', '', funder_id)


@catch_requests_exceptions
@catch_request_exceptions
def query_crossref_api(funder_id, headers):
base_url = "https://api.crossref.org/works"
params = {"filter": f"funder:{funder_id}"}
Expand Down
50 changes: 40 additions & 10 deletions utilities/get_datacite_funder_work_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,28 @@
import csv
import requests
import json
import time
from functools import wraps


def catch_request_exception(max_retries=3, delay=30):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
while retries < max_retries:
try:
return func(*args, **kwargs)
except requests.exceptions.RequestException as e:
retries += 1
if retries == max_retries:
print(f"All {max_retries} attempts failed.")
return 'Error'
print(f"Request failed. Retrying in {delay} seconds... (Attempt {retries}/{max_retries})")
time.sleep(delay)
return wrapper
return decorator

def parse_arguments():
parser = argparse.ArgumentParser(
description='Retrieve all work counts for funders in DataCite')
parser.add_argument(
'-i', '--input', help='Input CSV file', required=True)
parser.add_argument(
'-o', '--output', help='Output CSV file', default='datacite_funder_work_counts.csv')
return parser.parse_args()

def read_input_file(input_file):
funder_ids = []
Expand All @@ -21,26 +34,43 @@ def read_input_file(input_file):
funder_ids.append(funder['id'])
return funder_ids


def transform_funder_id(funder_id):
return re.sub('http://dx.doi.org/10.13039/','*', funder_id)
return re.sub('http://dx.doi.org/10.13039/', '*', funder_id)


def form_query_url(funder_id):
print(f"https://api.datacite.org/dois?query=fundingReferences.funderIdentifier:{funder_id}")
return f"https://api.datacite.org/dois?query=fundingReferences.funderIdentifier:{funder_id}"


@catch_request_exception()
def query_datacite_api(url):
response = requests.get(url)
response.raise_for_status()
return response.json()


def extract_work_count(response):
return response['meta']['total']


def write_output_csv(output_file, data):
with open(output_file, 'a') as file:
writer = csv.writer(file)
writer.writerow(data)


def parse_arguments():
parser = argparse.ArgumentParser(
description='Retrieve all work counts for funders in DataCite')
parser.add_argument(
'-i', '--input', help='Input CSV file', required=True)
parser.add_argument(
'-o', '--output', help='Output CSV file', default='datacite_funder_work_counts.csv')
return parser.parse_args()


def main():
args = parse_arguments()
funder_ids = read_input_file(args.input)
Expand All @@ -52,7 +82,7 @@ def main():
response = query_datacite_api(url)
work_count = extract_work_count(response)
write_output_csv(args.output, [funder_id, work_count])


if __name__ == "__main__":
main()

0 comments on commit 425089b

Please sign in to comment.