Skip to content

Commit

Permalink
Merge pull request #241 from NASA-PDS/239_datacite_sync
Browse files Browse the repository at this point in the history
DataCite Sync Script Update
  • Loading branch information
collinss-jpl authored Sep 14, 2021
2 parents 7e9acb8 + dd7b281 commit f831f39
Show file tree
Hide file tree
Showing 13 changed files with 544 additions and 285 deletions.
7 changes: 6 additions & 1 deletion src/pds_doi_service/core/actions/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""

from pds_doi_service.core.actions.action import DOICoreAction
from pds_doi_service.core.entities.doi import DoiStatus
from pds_doi_service.core.entities.doi import DoiEvent, DoiStatus
from pds_doi_service.core.input.exceptions import (InputFormatException,
DuplicatedTitleDOIException,
UnexpectedDOIActionException,
Expand Down Expand Up @@ -137,6 +137,11 @@ def _complete_dois(self, dois):
# Add 'status' field so the ranking in the workflow can be determined.
doi.status = DoiStatus.Pending if self._no_review else DoiStatus.Review

if self._no_review:
# Add the event field to instruct DataCite to publish DOI to
# findable state (should have no effect for other providers)
doi.event = DoiEvent.Publish

return dois

def _validate_dois(self, dois):
Expand Down
30 changes: 26 additions & 4 deletions src/pds_doi_service/core/actions/reserve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""

from pds_doi_service.core.actions.action import DOICoreAction
from pds_doi_service.core.entities.doi import DoiStatus
from pds_doi_service.core.entities.doi import DoiEvent, DoiStatus
from pds_doi_service.core.input.exceptions import (CriticalDOIException,
DuplicatedTitleDOIException,
InputFormatException,
Expand All @@ -28,8 +28,8 @@
from pds_doi_service.core.input.node_util import NodeUtil
from pds_doi_service.core.outputs.doi_record import CONTENT_TYPE_JSON
from pds_doi_service.core.outputs.doi_validator import DOIValidator
from pds_doi_service.core.outputs.service import DOIServiceFactory
from pds_doi_service.core.outputs.web_client import WEB_METHOD_POST
from pds_doi_service.core.outputs.service import DOIServiceFactory, SERVICE_TYPE_DATACITE
from pds_doi_service.core.outputs.web_client import WEB_METHOD_POST, WEB_METHOD_PUT
from pds_doi_service.core.util.general_util import get_logger

logger = get_logger(__name__)
Expand Down Expand Up @@ -136,6 +136,12 @@ def _complete_dois(self, dois):
# Add 'status' field so the ranking in the workflow can be determined
doi.status = DoiStatus.Reserved_not_submitted if self._dry_run else DoiStatus.Reserved

if not self._dry_run:
# Add the event field to instruct DataCite to make this entry
# hidden so it can be modified (should have no effect for other
# providers)
doi.event = DoiEvent.Hide

return dois

def _validate_dois(self, dois):
Expand Down Expand Up @@ -237,8 +243,24 @@ def run(self, **kwargs):
# Note that for both OSTI and DataCite, reserve requests should
# utilize the POST method
if not self._dry_run:
service_type = DOIServiceFactory.get_service_type()

# If a DOI has already been assigned by DataCite,
# we need to use a PUT request on the URL associated to the DOI
if service_type == SERVICE_TYPE_DATACITE and doi.doi:
method = WEB_METHOD_PUT
url = '{url}/{doi}'.format(
url=self._config.get('DATACITE', 'url'), doi=doi.doi
)
# Otherwise, for both DataCite and OSTI, just a POST request
# on the default endpoint is sufficient
else:
method = WEB_METHOD_POST
url = self._config.get(service_type.upper(), 'url')

doi, o_doi_label = self._web_client.submit_content(
method=WEB_METHOD_POST,
method=method,
url=url,
payload=io_doi_label,
content_type=CONTENT_TYPE_JSON
)
Expand Down
23 changes: 23 additions & 0 deletions src/pds_doi_service/core/entities/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class ProductType(str, Enum):
Bundle = 'Bundle'
Text = 'Text'
Dataset = 'Dataset'
Other = 'Other'


@unique
Expand Down Expand Up @@ -74,6 +75,26 @@ class DoiStatus(str, Enum):
Deactivated = 'deactivated'


@unique
class DoiEvent(str, Enum):
"""
Enumerates the possible DOI events that can be requested in a submission
to DataCite.
Events consist of:
Publish -
Moves a DOI from draft or registered state to findable
Register -
Moves a DOI from draft to registered
Hide -
Moves a DOI from findable back to registered
"""
Publish = 'publish'
Register = 'register'
Hide = 'hide'


@dataclass
class Doi:
"""The dataclass definition for a Doi object."""
Expand All @@ -82,6 +103,7 @@ class Doi:
product_type: ProductType
product_type_specific: str
related_identifier: str
identifiers: list = field(default_factory=list)
authors: list = None
keywords: set = field(default_factory=set)
editors: list = None
Expand All @@ -96,3 +118,4 @@ class Doi:
message: str = None
date_record_added: datetime = None
date_record_updated: datetime = None
event: DoiEvent = None
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
{% endif %}
"type": "dois",
"attributes": {
{% if doi.status.value == "pending" %}
"event": "publish",
{% elif doi.status.value == "findable" %}
"event": "hide",
{% if doi.event %}
"event": "{{ doi.event.value }}",
{% endif %}
{% if doi.doi %}
"doi": "{{ doi.doi }}",
Expand All @@ -24,6 +22,12 @@
"suffix": "{{ doi.id }}",
{% endif %}
"identifiers": [
{% for identifier in doi.identifiers %}
{
"identifier": "{{ identifier.identifier.strip() }}",
"identifierType": "{{ identifier.identifierType }}"
},
{% endfor %}
{
{% if doi.doi %}
"identifier": "{{ doi.doi }}",
Expand All @@ -36,10 +40,25 @@
"creators": [
{% for author in doi.authors %}
{
{% if author.name_type %}
"nameType": "{{author.name_type}}",
{% else %}
"nameType": "Personal",
"name": "{{ author['last_name'] }}, {{ author['first_name'] }}",
"givenName": "{{ author['first_name'] }}",
"familyName": "{{ author['last_name'] }}"
{% endif %}
{% if author.first_name and author.last_name %}
"name": "{{ author.first_name }} {{ author.last_name }}",
{% else %}
"name": "{{ author.name }}",
{% endif %}
"nameIdentifiers": [
{% for name_identifier in author.name_identifiers %}
{
{% for key, value in name_identifier.items() %}
"{{key}}": "{{value}}"{% if not loop.last %},{% endif +%}
{% endfor %}
}
{% endfor %}
]
}{% if not loop.last %},{% endif +%}
{% endfor %}
],
Expand All @@ -60,9 +79,20 @@
{% for editor in doi.editors %}
{
"nameType": "Personal",
"name": "{{ editor['last_name'] }}, {{ editor['first_name'] }}",
"givenName": "{{ editor['first_name'] }}",
"familyName": "{{ editor['last_name'] }}",
{% if editor.first_name and editor.last_name %}
"name": "{{ editor.first_name }} {{ editor.last_name }}",
{% else %}
"name": "{{ editor.name }}",
{% endif %}
"nameIdentifiers": [
{% for name_identifier in editor.name_identifiers %}
{
{% for key, value in name_identifier.items() %}
"{{key}}": "{{value}}"{% if not loop.last %},{% endif +%}
{% endfor %}
}
{% endfor %}
],
"contributorType": "Editor"
},
{% endfor %}
Expand All @@ -79,9 +109,8 @@
"relatedIdentifiers": [
{
"relatedIdentifier": "{{ doi.related_identifier }}",
"relatedIdentifierType": "URN",
"relationType": "HasMetadata",
"resourceTypeGeneral": "Text"
"relatedIdentifierType": {% if doi.related_identifier.lower().startswith("urn") %}"URN"{% else %}"Handle"{% endif %},
"relationType": "IsIdenticalTo"
}
],
{% if doi.description %}
Expand Down
12 changes: 8 additions & 4 deletions src/pds_doi_service/core/outputs/datacite/datacite_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pds_doi_service.core.entities.doi import ProductType, Doi
from pds_doi_service.core.outputs.doi_record import DOIRecord, CONTENT_TYPE_JSON
from pds_doi_service.core.util.config_parser import DOIConfigUtil
from pds_doi_service.core.util.general_util import get_logger
from pds_doi_service.core.util.general_util import get_logger, sanitize_json_string

logger = get_logger(__name__)

Expand Down Expand Up @@ -103,7 +103,7 @@ def create_doi_record(self, dois, content_type=CONTENT_TYPE_JSON):
doi_fields['product_type'] = ProductType.Collection

# Sort keywords so we can output them in the same order each time
doi_fields['keywords'] = sorted(doi.keywords)
doi_fields['keywords'] = sorted(map(sanitize_json_string, doi.keywords))

# Convert datetime objects to isoformat strings
if doi.date_record_added:
Expand All @@ -112,9 +112,13 @@ def create_doi_record(self, dois, content_type=CONTENT_TYPE_JSON):
if doi.date_record_updated:
doi_fields['date_record_updated'] = doi.date_record_updated.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

# Remove any extraneous whitespace from a provided description
# Cleanup extra whitespace that could break JSON format from title
# and description
if doi.title:
doi_fields['title'] = sanitize_json_string(doi.title)

if doi.description:
doi_fields['description'] = str.strip(doi.description)
doi_fields['description'] = sanitize_json_string(doi.description)

# Publication year is a must-have
doi_fields['publication_year'] = doi.publication_date.strftime('%Y')
Expand Down
10 changes: 4 additions & 6 deletions src/pds_doi_service/core/outputs/datacite/datacite_web_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ def query_doi(self, query, url=None, username=None, password=None,
Notes
-----
Queries are automatically filtered by this method to only include
DOI entries associated with the PDS client ID, which corresponds to the
username used with the query request.
Queries are NOT automatically filtered by this method. Callers should be
prepared to filter results as desired if more results are returned
by their query than expected.
Parameters
----------
Expand Down Expand Up @@ -151,15 +151,13 @@ def query_doi(self, query, url=None, username=None, password=None,
query_string = str(query)

url = url or config.get('DATACITE', 'url')
client_id = (username or config.get('DATACITE', 'user')).lower()

logger.debug('query_string: %s', query_string)
logger.debug('url: %s', url)
logger.debug('client_id: %s', client_id)

datacite_response = requests.request(
WEB_METHOD_GET, url=url, auth=auth, headers=headers,
params={"query": query_string, "client-id": client_id}
params={"query": query_string}
)

try:
Expand Down
Loading

0 comments on commit f831f39

Please sign in to comment.