diff --git a/ames/harvesters/__init__.py b/ames/harvesters/__init__.py index f429deb..6bce8ef 100644 --- a/ames/harvesters/__init__.py +++ b/ames/harvesters/__init__.py @@ -17,3 +17,4 @@ from .caltechauthors import get_request_comments from .caltechauthors import get_request_id_title from .caltechauthors import get_publisher +from .caltechauthors import get_series_records diff --git a/ames/harvesters/caltechauthors.py b/ames/harvesters/caltechauthors.py index 80b182a..2f730fc 100644 --- a/ames/harvesters/caltechauthors.py +++ b/ames/harvesters/caltechauthors.py @@ -134,6 +134,33 @@ def get_group_records(group_identifier, test=False): return hits +def get_series_records(series_name, test=False, token=None): + if test: + url = "https://authors.caltechlibrary.dev/api/records" + else: + url = "https://authors.library.caltech.edu/api/records" + + query = f'?q=custom_fields.caltech%5C%3Aseries%3D"{series_name}"' + + if token: + headers = { + "Authorization": "Bearer %s" % token, + "Content-type": "application/json", + } + + url = url + query + response = requests.get(url) + total = response.json()["hits"]["total"] + pages = math.ceil(int(total) / 1000) + hits = [] + for c in range(1, pages + 1): + chunkurl = f"{url}&size=1000&page={c}" + response = requests.get(chunkurl).json() + hits += response["hits"]["hits"] + + return hits + + def get_restricted_records(token, test=False): if test: url = "https://authors.caltechlibrary.dev/api/records"