From 4ec0f4a9a6bd90429aa8f31e5127e7352fa362d5 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Mon, 16 May 2022 14:16:28 -0700 Subject: [PATCH] Add sync script and update identifier handling * Add sync_dois.sh script per #309 * Update identifier handling for NoneType identifiers #331 --- scripts/sync_dois.sh | 47 +++++++++++++++++++ .../outputs/datacite/datacite_web_parser.py | 6 ++- 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100755 scripts/sync_dois.sh diff --git a/scripts/sync_dois.sh b/scripts/sync_dois.sh new file mode 100755 index 00000000..582815a0 --- /dev/null +++ b/scripts/sync_dois.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + echo "sync_dois.sh [-h|--help] [-p|--prefix ] [-s|--submitter ]" + exit 0 + ;; + -p|--prefix) + PREFIX="$2" + shift + shift + ;; + -s|--submitter) + SUBMITTER="$2" + shift + shift + ;; + *) + echo "Unknown option $1" + exit 1 + ;; + esac +done + +if [[ -z ${PREFIX} ]]; then + PREFIX="10.17189" +fi + +if [[ -z ${SUBMITTER} ]]; then + SUBMITTER="pds-operator@jpl.nasa.gov" +fi + +echo "==================================================" +echo "Starting DOI sync for $(date)" +echo +echo "PREFIX=${PREFIX}" +echo "SUBMITTER=${SUBMITTER}" + +source /home/pds4/pds-doi-service/bin/activate + +pds-doi-init --service datacite --prefix ${PREFIX} --submitter ${SUBMITTER} + +echo "Sync complete" +echo + +exit 0 diff --git a/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py b/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py index b26cf1b8..e75492ce 100644 --- a/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py +++ b/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py @@ -108,7 +108,11 @@ def _parse_identifiers(record): identifiers = record["identifiers"] for identifier in identifiers: - identifier["identifier"] = identifier["identifier"].strip() + if identifier["identifier"] is None: + logger.warn(f"Odd metadata. NoneType identifier in record: {json.dumps(record, indent=4, sort_keys=True)}") + identifiers.remove(identifier) + else: + identifier["identifier"] = identifier["identifier"].strip() return identifiers except KeyError: