From 4199fb96d00a4aed26f505d7c8c45fa17f4467cd Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Thu, 18 Nov 2021 13:00:43 -0800 Subject: [PATCH] Added the input_source field to the Doi dataclass, and input_util.py to assign the field as it parsed DOI objects from input files The action classes then provide this input source path to the transaction builder so only a single file is copied per transaction. Previously, providing a directory with multiple input files resulted in an entire copy of the directory being made for each transaction. --- src/pds_doi_service/core/actions/release.py | 19 +++++++++++++------ src/pds_doi_service/core/actions/reserve.py | 19 +++++++++++-------- src/pds_doi_service/core/actions/update.py | 2 +- src/pds_doi_service/core/entities/doi.py | 4 +--- src/pds_doi_service/core/input/input_util.py | 20 +++++++++++++++++++- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/pds_doi_service/core/actions/release.py b/src/pds_doi_service/core/actions/release.py index eae8d6a8..4a656cc4 100644 --- a/src/pds_doi_service/core/actions/release.py +++ b/src/pds_doi_service/core/actions/release.py @@ -267,25 +267,32 @@ def run(self, **kwargs): dois = self._complete_dois(dois) dois = self._validate_dois(dois) - for doi in dois: + for input_doi in dois: # Create a JSON format label to send to the service provider - io_doi_label = self._record_service.create_doi_record(doi, content_type=CONTENT_TYPE_JSON) + io_doi_label = self._record_service.create_doi_record(input_doi, content_type=CONTENT_TYPE_JSON) # If the next step is to release, submit to the service provider and # use the response label for the local transaction database entry if not self._review: # Determine the correct HTTP verb and URL for submission of this DOI - method, url = self._web_client.endpoint_for_doi(doi, self._name) + method, url = self._web_client.endpoint_for_doi(input_doi, self._name) - doi, o_doi_label = self._web_client.submit_content( + output_doi, o_doi_label = self._web_client.submit_content( url=url, method=method, payload=io_doi_label, content_type=CONTENT_TYPE_JSON ) + # Otherwise, DOI object is ready to be logged + else: + output_doi = input_doi # Otherwise, if the next step is review, the label we've already # created has marked all the Doi's as being the "review" step # so its ready to be submitted to the local transaction history transaction = self.m_transaction_builder.prepare_transaction( - self._node, self._submitter, doi, input_path=self._input, output_content_type=CONTENT_TYPE_JSON + self._node, + self._submitter, + output_doi, + input_path=input_doi.input_source, + output_content_type=CONTENT_TYPE_JSON, ) # Commit the transaction to the local database @@ -293,7 +300,7 @@ def run(self, **kwargs): # Append the latest version of the Doi object to return # as a label - output_dois.append(doi) + output_dois.append(output_doi) # Propagate input format exceptions, force flag should not affect # these being raised and certain callers (such as the API) look # for this exception specifically diff --git a/src/pds_doi_service/core/actions/reserve.py b/src/pds_doi_service/core/actions/reserve.py index 638e41ff..306532db 100644 --- a/src/pds_doi_service/core/actions/reserve.py +++ b/src/pds_doi_service/core/actions/reserve.py @@ -245,29 +245,32 @@ def run(self, **kwargs): dois = self._complete_dois(dois) dois = self._validate_dois(dois) - for doi in dois: + for input_doi in dois: # Create the JSON request label to send - io_doi_label = self._record_service.create_doi_record(doi, content_type=CONTENT_TYPE_JSON) + io_doi_label = self._record_service.create_doi_record(input_doi, content_type=CONTENT_TYPE_JSON) # Submit the Reserve request # Determine the correct HTTP verb and URL for submission of this DOI - method, url = self._web_client.endpoint_for_doi(doi, self._name) + method, url = self._web_client.endpoint_for_doi(input_doi, self._name) - doi, o_doi_label = self._web_client.submit_content( + output_doi, o_doi_label = self._web_client.submit_content( method=method, url=url, payload=io_doi_label, content_type=CONTENT_TYPE_JSON ) # Log the inputs and outputs of this transaction transaction = self.m_transaction_builder.prepare_transaction( - self._node, self._submitter, doi, input_path=self._input, output_content_type=CONTENT_TYPE_JSON + self._node, + self._submitter, + output_doi, + input_path=input_doi.input_source, + output_content_type=CONTENT_TYPE_JSON, ) # Commit the transaction to the local database transaction.log() - # Append the latest version of the Doi object to return - # as a label - output_dois.append(doi) + # Append the latest version of the Doi object to return as a label + output_dois.append(output_doi) # Propagate input format exceptions, force flag should not affect # these being raised and certain callers (such as the API) look diff --git a/src/pds_doi_service/core/actions/update.py b/src/pds_doi_service/core/actions/update.py index f13cd1e8..0a9540e6 100644 --- a/src/pds_doi_service/core/actions/update.py +++ b/src/pds_doi_service/core/actions/update.py @@ -325,7 +325,7 @@ def run(self, **kwargs): for doi in dois: transaction = self.m_transaction_builder.prepare_transaction( - self._node, self._submitter, doi, input_path=self._input, output_content_type=CONTENT_TYPE_JSON + self._node, self._submitter, doi, input_path=doi.input_source, output_content_type=CONTENT_TYPE_JSON ) transaction.log() diff --git a/src/pds_doi_service/core/entities/doi.py b/src/pds_doi_service/core/entities/doi.py index bc2dab84..65124bed 100644 --- a/src/pds_doi_service/core/entities/doi.py +++ b/src/pds_doi_service/core/entities/doi.py @@ -43,9 +43,6 @@ class DoiStatus(str, Enum): An error has occurred with the DOI submission. Unknown - Default starting state for DOI transactions. - Reserve_not_submitted - - DOI reserve request in local database, but not published/released. - Used for testing of the reserve action. Reserved - DOI reserve request submitted, but not yet published/released. Draft - @@ -127,3 +124,4 @@ class Doi: date_record_added: Optional[datetime] = None date_record_updated: Optional[datetime] = None event: Optional[DoiEvent] = None + input_source: Optional[str] = None diff --git a/src/pds_doi_service/core/input/input_util.py b/src/pds_doi_service/core/input/input_util.py index 55283ac5..e2889f49 100644 --- a/src/pds_doi_service/core/input/input_util.py +++ b/src/pds_doi_service/core/input/input_util.py @@ -504,6 +504,11 @@ def _read_from_path(self, path): Returns ------- + dois : list[doi] + The list of Doi objects parsed from the provided path. + + Raises + ------- InputFormatException If an error is encountered while reading a local file. @@ -526,6 +531,12 @@ def _read_from_path(self, path): logger.error(msg) raise InputFormatException(msg) + + # Make a note of where we can find the original input file that + # resulted in these DOI's so we can save it to the transaction + # history later on + for doi in dois: + doi.input_source = path else: logger.info("File %s has unsupported extension, ignoring", path) else: @@ -580,7 +591,14 @@ def _read_from_remote(self, input_url): temp_file.write(response.content) temp_file.seek(0) - return self._read_from_path(temp_file.name) + dois = self._read_from_path(temp_file.name) + + # Update input source to point to original URL, as the temp file paths + # assigned by _read_from_path no longer exist + for doi in dois: + doi.input_source = input_url + + return dois def parse_dois_from_input_file(self, input_file): """