From 015b103340c3b48dfbf33470a2dd76ed2aa71683 Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Tue, 16 Nov 2021 11:10:52 -0800 Subject: [PATCH 1/5] Updated DOIDataBase.create_connection() to ensure group read/write bits are always set on the db file on disk This ensures that users other than the installer/maintainer of the service can interact with the service on a fresh install of the database --- src/pds_doi_service/core/db/doi_database.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/pds_doi_service/core/db/doi_database.py b/src/pds_doi_service/core/db/doi_database.py index c883151d..c11901cc 100644 --- a/src/pds_doi_service/core/db/doi_database.py +++ b/src/pds_doi_service/core/db/doi_database.py @@ -12,7 +12,9 @@ Contains classes and functions for interfacing with the local transaction database (SQLite3). """ +import os import sqlite3 +import stat from collections import OrderedDict from datetime import datetime from datetime import timedelta @@ -94,6 +96,14 @@ def create_connection(self): except Error as my_error: logger.error("Failed to connect to database, reason: %s", my_error) + # Make sure Database has proper group permissions set + st = os.stat(self.m_database_name) + has_group_rw = bool(st.st_mode & stat.S_IRGRP & stat.S_IWGRP) + + if not has_group_rw: + logger.debug("Setting group read/write bits on database %s", self.m_database_name) + os.chmod(self.m_database_name, st.st_mode | stat.S_IRGRP | stat.S_IWGRP) + def get_connection(self, table_name=None): """ Returns a connection to the SQLite database. If a connection does From 69e489cf088975fbd4dc3cc2b0ac704d7e8aa872 Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Tue, 16 Nov 2021 11:12:33 -0800 Subject: [PATCH 2/5] Updated TransactionOnDisk.write() to ensure that any files or directories created during the process have appropriate group read/write permissions set --- .../core/outputs/transaction_on_disk.py | 94 ++++++++++++++----- 1 file changed, 69 insertions(+), 25 deletions(-) diff --git a/src/pds_doi_service/core/outputs/transaction_on_disk.py b/src/pds_doi_service/core/outputs/transaction_on_disk.py index 70371f70..9d2d12b2 100644 --- a/src/pds_doi_service/core/outputs/transaction_on_disk.py +++ b/src/pds_doi_service/core/outputs/transaction_on_disk.py @@ -19,57 +19,95 @@ import requests from pds_doi_service.core.util.config_parser import DOIConfigUtil from pds_doi_service.core.util.general_util import get_logger -from pds_doi_service.core.util.node_util import NodeUtil -logger = get_logger("pds_doi_service.core.outputs.transaction_logger") +logger = get_logger(__name__) class TransactionOnDisk: """ This class provides services to write a transaction from an action - (reserve, draft or release) to disk. + (reserve, update or release) to disk. """ m_doi_config_util = DOIConfigUtil() - m_node_util = NodeUtil() - m_doi_database = None def __init__(self): self._config = self.m_doi_config_util.get_config() def write(self, node_id, update_time, input_ref=None, output_content=None, output_content_type=None): """ - Write a the input and output products from a transaction to disk. - The location of the written files is returned. + Write the input and output products from a transaction to disk. + The location of the written files is returned. All directories and files + created will have both user and group read/write permissions set accordingly. + + Parameters + ---------- + node_id : str + PDS Node ID to associate with the transaction to disk. Determines + which subdirectory the input/output is written to. + update_time : datetime.datetime + datetime object corresponding to the time of the original transaction. + Forms part of the path where the transaction is written to on disk. + input_ref : str, optional + Path to the input file or directory to associate with the transaction. + Determines the input file(s) copied to the transaction history. + output_content : str, optional + The output label content to associate to with the transaction. + Determines the contents of the output file copied to the transaction history. + output_content_type : str, optional + The content type of output_content. Should be one of "xml" or "json". + + Returns + ------- + final_output_dir : str + Path to the directory in the transaction history created by this + method. The path has the following form: + + // + + Where is set in the INI config, + is the value provided for node_id, and is the provided + update_time as an isoformat string. + """ transaction_dir = self._config.get("OTHER", "transaction_dir") logger.debug(f"transaction_dir {transaction_dir}") # Create the local transaction history directory, if necessary. final_output_dir = os.path.join(transaction_dir, node_id, update_time.isoformat()) - os.makedirs(final_output_dir, exist_ok=True) + + # Set up the appropriate umask in-case os.makedirs needs to create any + # intermediate parent directories (its mask arg only affects the created leaf directory) + prev_umask = os.umask(0o0002) + + # Create the new transaction history directory with group-rw enabled + os.makedirs(final_output_dir, exist_ok=True, mode=0o0775) if input_ref: - input_content_type = os.path.splitext(input_ref)[-1] + if os.path.isdir(input_ref): + # Copy the input files, but do not preserve their permissions so + # the umask we set above takes precedence + copy_tree(input_ref, os.path.join(final_output_dir, "input"), preserve_mode=False) + else: + input_content_type = os.path.splitext(input_ref)[-1] - # Write input file with provided content. - # Note that the file name is always 'input' plus the extension based - # on the content_type (input.xml or input.csv or input.xlsx) - full_input_name = os.path.join(final_output_dir, "input" + input_content_type) + # Write input file with provided content. + # Note that the file name is always 'input' plus the extension based + # on the content_type (input.xml or input.csv or input.xlsx) + full_input_name = os.path.join(final_output_dir, "input" + input_content_type) - # If the provided content is actually a file name, we copy it, - # otherwise write it to external file using full_input_name as name. - if os.path.isfile(input_ref): - shutil.copy2(input_ref, full_input_name) - elif os.path.isdir(input_ref): - copy_tree(input_ref, full_input_name) - else: # remote resource - r = requests.get(input_ref, allow_redirects=True) + if os.path.isfile(input_ref): + shutil.copy2(input_ref, full_input_name) + else: # remote resource + r = requests.get(input_ref, allow_redirects=True) - with open(full_input_name, "wb") as outfile: - outfile.write(r.content) + with open(full_input_name, "wb") as outfile: + outfile.write(r.content) - r.close() + r.close() + + # Set up permissions for copied input + os.chmod(full_input_name, 0o0664) # Write output file with provided content # The extension of the file is determined by the provided content type @@ -79,6 +117,12 @@ def write(self, node_id, update_time, input_ref=None, output_content=None, outpu with open(full_output_name, "w") as outfile: outfile.write(output_content) - logger.info(f"transaction files saved in {final_output_dir}") + # Set up permissions for copied output + os.chmod(full_output_name, 0o0664) + + logger.info(f"Transaction files saved to {final_output_dir}") + + # Restore the previous umask + os.umask(prev_umask) return final_output_dir From 0f82d86331b25db6a8078a055dbc5c232e0487d7 Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Thu, 18 Nov 2021 12:59:20 -0800 Subject: [PATCH 3/5] Completed docstring for TransactionBuilder.prepare_transaction() --- .../core/outputs/transaction_builder.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/pds_doi_service/core/outputs/transaction_builder.py b/src/pds_doi_service/core/outputs/transaction_builder.py index 981ddecd..1fb32109 100644 --- a/src/pds_doi_service/core/outputs/transaction_builder.py +++ b/src/pds_doi_service/core/outputs/transaction_builder.py @@ -43,13 +43,34 @@ def __init__(self, db_name=None): def prepare_transaction(self, node_id, submitter_email, doi, input_path=None, output_content_type=CONTENT_TYPE_XML): """ - Build a Transaction from the inputs and outputs to a 'reserve', 'draft' + Build a Transaction from the inputs and outputs to a reserve, update or release action. The Transaction object is returned. The field output_content is used for writing the content to disk. This is typically the response text from a request to the DOI service provider. + Parameters + ---------- + node_id : str + The node identifier associated with the transaction. + submitter_email : str + The email address associated with the submitter of the transaction + doi : Doi + The DOI object created from the transaction. + input_path : str, optional + Path to the source input file of the provided Doi object. If provided, + the file will be copied to the local transaction history. + output_content_type : str, optional + The format to use for saving the output label to associate with the + transaction. Should be one of xml or json. Defaults to xml. + + Returns + ------- + Transaction + The prepared Transaction object. Callers of this function may call + log() on the returned Transaction to commit it to the local database. + """ if output_content_type not in VALID_CONTENT_TYPES: raise ValueError(f"Invalid content type requested, must be one of {','.join(VALID_CONTENT_TYPES)}") From b8a2527275f5558c3150fb18538f1cc3f29d55fd Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Thu, 18 Nov 2021 13:00:13 -0800 Subject: [PATCH 4/5] Added unit test suite for the Transaction classes --- .../core/outputs/test/__init__.py | 2 + .../core/outputs/test/data/pds4_bundle.xml | 1 + .../core/outputs/test/transaction_test.py | 244 ++++++++++++++++++ 3 files changed, 247 insertions(+) create mode 120000 src/pds_doi_service/core/outputs/test/data/pds4_bundle.xml create mode 100644 src/pds_doi_service/core/outputs/test/transaction_test.py diff --git a/src/pds_doi_service/core/outputs/test/__init__.py b/src/pds_doi_service/core/outputs/test/__init__.py index 2637e51b..5ccaf906 100644 --- a/src/pds_doi_service/core/outputs/test/__init__.py +++ b/src/pds_doi_service/core/outputs/test/__init__.py @@ -7,6 +7,7 @@ from . import datacite_test from . import doi_validator_test from . import osti_test +from . import transaction_test def suite(): @@ -14,4 +15,5 @@ def suite(): suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(datacite_test)) suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(doi_validator_test)) suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(osti_test)) + suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(transaction_test)) return suite diff --git a/src/pds_doi_service/core/outputs/test/data/pds4_bundle.xml b/src/pds_doi_service/core/outputs/test/data/pds4_bundle.xml new file mode 120000 index 00000000..20bdb648 --- /dev/null +++ b/src/pds_doi_service/core/outputs/test/data/pds4_bundle.xml @@ -0,0 +1 @@ +../../../actions/test/data/pds4_bundle.xml \ No newline at end of file diff --git a/src/pds_doi_service/core/outputs/test/transaction_test.py b/src/pds_doi_service/core/outputs/test/transaction_test.py new file mode 100644 index 00000000..f1233079 --- /dev/null +++ b/src/pds_doi_service/core/outputs/test/transaction_test.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python +import os +import shutil +import unittest + +from datetime import datetime +from pkg_resources import resource_filename + +from pds_doi_service.core.db.doi_database import DOIDataBase +from pds_doi_service.core.entities.doi import Doi, ProductType, DoiStatus +from pds_doi_service.core.outputs.datacite.datacite_record import DOIDataCiteRecord +from pds_doi_service.core.outputs.datacite.datacite_web_parser import DOIDataCiteWebParser +from pds_doi_service.core.outputs.doi_record import CONTENT_TYPE_JSON +from pds_doi_service.core.outputs.transaction import Transaction +from pds_doi_service.core.outputs.transaction_builder import TransactionBuilder +from pds_doi_service.core.outputs.transaction_on_disk import TransactionOnDisk + + +class TransactionTestCase(unittest.TestCase): + db_name = "doi_temp.db" + + @classmethod + def setUpClass(cls) -> None: + cls.test_dir = resource_filename(__name__, "") + + if os.path.isfile(cls.db_name): + os.remove(cls.db_name) + + @classmethod + def tearDownClass(cls) -> None: + if os.path.isfile(cls.db_name): + os.remove(cls.db_name) + + def test_transaction_logging(self): + """Test the Transaction.log() method""" + # Create a fresh transaction database + doi_database = DOIDataBase(self.db_name) + + # Create a dummy Doi object an associated output label to log + test_doi = Doi( + title="Fake DOI", + publication_date=datetime.now(), + product_type=ProductType.Dataset, + product_type_specific="PDS4 Dataset", + pds_identifier="urn:nasa:pds:fake_doi_entry::1.0", + doi="10.17189/abc123", + status=DoiStatus.Draft, + date_record_added=datetime.now(), + date_record_updated=datetime.now() + ) + + output_content = DOIDataCiteRecord().create_doi_record(test_doi) + output_content_type = CONTENT_TYPE_JSON + node_id = "eng" + submitter_email = "pds-operator@jpl.nasa.gov" + + # Create a Transaction object and log it + transaction = Transaction( + output_content, output_content_type, node_id, submitter_email, test_doi, doi_database + ) + + transaction_key = None + + try: + transaction.log() + + # Logging should result in an entry written to the database, check for it + # now + columns, rows = doi_database.select_latest_rows( + query_criterias={"doi": ["10.17189/abc123"]} + ) + + self.assertEqual(len(rows), 1) + + db_fields = dict(zip(columns, rows[0])) + + self.assertEqual(db_fields["title"], "Fake DOI") + self.assertEqual(db_fields["identifier"], "urn:nasa:pds:fake_doi_entry::1.0") + + # An entry in the local transaction history should have been written as well + transaction_key = db_fields["transaction_key"] + self.assertIsNotNone(transaction_key) + self.assertTrue(os.path.isdir(transaction_key)) + finally: + # Clean up the fake transaction, if it was created + if transaction_key and os.path.exists(transaction_key): + shutil.rmtree(transaction_key) + + +class TransactionBuilderTestCase(unittest.TestCase): + db_name = "doi_temp.db" + + @classmethod + def setUpClass(cls) -> None: + cls.test_dir = resource_filename(__name__, "") + + if os.path.isfile(cls.db_name): + os.remove(cls.db_name) + + @classmethod + def tearDownClass(cls) -> None: + if os.path.isfile(cls.db_name): + os.remove(cls.db_name) + + def test_prepare_transaction(self): + """Test the TransactionBuilder.prepare_transaction() method""" + # Create a fresh transaction database and populate it with an existing entry + doi_database = DOIDataBase(self.db_name) + + identifier = "urn:nasa:pds:existing_doi::1.0" + transaction_key = "img/2020-06-15T18:42:45.653317" + doi = "10.17189/abc123" + date_added = datetime.now() + date_updated = datetime.now() + status = DoiStatus.Draft + title = "Existing DOI" + product_type = ProductType.Collection + product_type_specific = "PDS4 Collection" + submitter = "img-submitter@jpl.nasa.gov" + discipline_node = "img" + + # Insert a row in the 'doi' table + doi_database.write_doi_info_to_database( + doi, + transaction_key, + identifier, + date_added, + date_updated, + status, + title, + product_type, + product_type_specific, + submitter, + discipline_node, + ) + + # Create the transaction builder and have it point to the same database + transaction_builder = TransactionBuilder(db_name=self.db_name) + + # Create a Doi object to be handled by the transaction builder + test_doi = Doi( + title=title, + publication_date=datetime.now(), + product_type=ProductType.Dataset, + product_type_specific="PDS4 Dataset", + pds_identifier="", + doi=doi, + status=DoiStatus.Draft, + date_record_updated=datetime.now() + ) + + # Create the transaction from the Doi + transaction = transaction_builder.prepare_transaction( + node_id=discipline_node, submitter_email=submitter, doi=test_doi, + output_content_type=CONTENT_TYPE_JSON + ) + + self.assertIsInstance(transaction, Transaction) + + # Get the output label created by the transaction builder and make sure + # it lines up with our original DOI + output_content = transaction.output_content + + output_dois, _ = DOIDataCiteWebParser().parse_dois_from_label(output_content) + + self.assertEqual(len(output_dois), 1) + + output_doi = output_dois[0] + + # These two fields should be carried over from the existing database entry, + # since they were not provided with the Doi object we prepared the transaction + # for + self.assertEqual(output_doi.pds_identifier, identifier) + self.assertEqual(output_doi.date_record_added.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + date_added.strftime("%Y-%m-%dT%H:%M:%S.%fZ")) + + +class TransactionOnDiskTestCase(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + cls.test_dir = resource_filename(__name__, "") + cls.data_dir = os.path.join(cls.test_dir, "data") + + def test_transaction_write_to_disk(self): + """Test the TransactionOnDisk.write() method""" + + transaction_on_disk = TransactionOnDisk() + + node_id = "eng" + transaction_time = datetime.now() + + # Test a transaction commit to disk + input_label = os.path.join(self.data_dir, "pds4_bundle.xml") + output_label = os.path.join(self.data_dir, "datacite_record_draft.json") + + with open(output_label, 'r') as infile: + output_content = infile.read() + + transaction_key = None + + try: + transaction_key = transaction_on_disk.write( + node_id, transaction_time, input_ref=input_label, + output_content=output_content, output_content_type=CONTENT_TYPE_JSON + ) + + # Make sure the transaction directory was created as expected + self.assertTrue(os.path.exists(transaction_key)) + self.assertTrue(os.path.isdir(transaction_key)) + + # Make sure the directory was created with the correct permissions + expected_perms = 0o0755 + self.assertEqual(os.stat(transaction_key).st_mode & expected_perms, expected_perms) + + # Make sure the input and output files were copied as expected + expected_input_file = os.path.join(transaction_key, "input.xml") + expected_output_file = os.path.join(transaction_key, "output.json") + expected_perms = 0o0664 + + for test_file, expected_file in zip((input_label, output_label), (expected_input_file, expected_output_file)): + # Check existence + self.assertTrue(os.path.exists(expected_file)) + self.assertTrue(os.path.isfile(expected_file)) + + # Check contents + with open(test_file, 'r') as infile: + test_file_contents = infile.read() + + with open(expected_file, 'r') as infile: + expected_file_contents = infile.read() + + self.assertEqual(test_file_contents, expected_file_contents) + + # Check permissions + self.assertEqual(os.stat(expected_file).st_mode & expected_perms, expected_perms) + finally: + # Cleanup the transaction dir if it was created + if transaction_key and os.path.exists(transaction_key): + shutil.rmtree(transaction_key) + + +if __name__ == "__main__": + unittest.main() From 4199fb96d00a4aed26f505d7c8c45fa17f4467cd Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Thu, 18 Nov 2021 13:00:43 -0800 Subject: [PATCH 5/5] Added the input_source field to the Doi dataclass, and input_util.py to assign the field as it parsed DOI objects from input files The action classes then provide this input source path to the transaction builder so only a single file is copied per transaction. Previously, providing a directory with multiple input files resulted in an entire copy of the directory being made for each transaction. --- src/pds_doi_service/core/actions/release.py | 19 +++++++++++++------ src/pds_doi_service/core/actions/reserve.py | 19 +++++++++++-------- src/pds_doi_service/core/actions/update.py | 2 +- src/pds_doi_service/core/entities/doi.py | 4 +--- src/pds_doi_service/core/input/input_util.py | 20 +++++++++++++++++++- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/pds_doi_service/core/actions/release.py b/src/pds_doi_service/core/actions/release.py index eae8d6a8..4a656cc4 100644 --- a/src/pds_doi_service/core/actions/release.py +++ b/src/pds_doi_service/core/actions/release.py @@ -267,25 +267,32 @@ def run(self, **kwargs): dois = self._complete_dois(dois) dois = self._validate_dois(dois) - for doi in dois: + for input_doi in dois: # Create a JSON format label to send to the service provider - io_doi_label = self._record_service.create_doi_record(doi, content_type=CONTENT_TYPE_JSON) + io_doi_label = self._record_service.create_doi_record(input_doi, content_type=CONTENT_TYPE_JSON) # If the next step is to release, submit to the service provider and # use the response label for the local transaction database entry if not self._review: # Determine the correct HTTP verb and URL for submission of this DOI - method, url = self._web_client.endpoint_for_doi(doi, self._name) + method, url = self._web_client.endpoint_for_doi(input_doi, self._name) - doi, o_doi_label = self._web_client.submit_content( + output_doi, o_doi_label = self._web_client.submit_content( url=url, method=method, payload=io_doi_label, content_type=CONTENT_TYPE_JSON ) + # Otherwise, DOI object is ready to be logged + else: + output_doi = input_doi # Otherwise, if the next step is review, the label we've already # created has marked all the Doi's as being the "review" step # so its ready to be submitted to the local transaction history transaction = self.m_transaction_builder.prepare_transaction( - self._node, self._submitter, doi, input_path=self._input, output_content_type=CONTENT_TYPE_JSON + self._node, + self._submitter, + output_doi, + input_path=input_doi.input_source, + output_content_type=CONTENT_TYPE_JSON, ) # Commit the transaction to the local database @@ -293,7 +300,7 @@ def run(self, **kwargs): # Append the latest version of the Doi object to return # as a label - output_dois.append(doi) + output_dois.append(output_doi) # Propagate input format exceptions, force flag should not affect # these being raised and certain callers (such as the API) look # for this exception specifically diff --git a/src/pds_doi_service/core/actions/reserve.py b/src/pds_doi_service/core/actions/reserve.py index 638e41ff..306532db 100644 --- a/src/pds_doi_service/core/actions/reserve.py +++ b/src/pds_doi_service/core/actions/reserve.py @@ -245,29 +245,32 @@ def run(self, **kwargs): dois = self._complete_dois(dois) dois = self._validate_dois(dois) - for doi in dois: + for input_doi in dois: # Create the JSON request label to send - io_doi_label = self._record_service.create_doi_record(doi, content_type=CONTENT_TYPE_JSON) + io_doi_label = self._record_service.create_doi_record(input_doi, content_type=CONTENT_TYPE_JSON) # Submit the Reserve request # Determine the correct HTTP verb and URL for submission of this DOI - method, url = self._web_client.endpoint_for_doi(doi, self._name) + method, url = self._web_client.endpoint_for_doi(input_doi, self._name) - doi, o_doi_label = self._web_client.submit_content( + output_doi, o_doi_label = self._web_client.submit_content( method=method, url=url, payload=io_doi_label, content_type=CONTENT_TYPE_JSON ) # Log the inputs and outputs of this transaction transaction = self.m_transaction_builder.prepare_transaction( - self._node, self._submitter, doi, input_path=self._input, output_content_type=CONTENT_TYPE_JSON + self._node, + self._submitter, + output_doi, + input_path=input_doi.input_source, + output_content_type=CONTENT_TYPE_JSON, ) # Commit the transaction to the local database transaction.log() - # Append the latest version of the Doi object to return - # as a label - output_dois.append(doi) + # Append the latest version of the Doi object to return as a label + output_dois.append(output_doi) # Propagate input format exceptions, force flag should not affect # these being raised and certain callers (such as the API) look diff --git a/src/pds_doi_service/core/actions/update.py b/src/pds_doi_service/core/actions/update.py index f13cd1e8..0a9540e6 100644 --- a/src/pds_doi_service/core/actions/update.py +++ b/src/pds_doi_service/core/actions/update.py @@ -325,7 +325,7 @@ def run(self, **kwargs): for doi in dois: transaction = self.m_transaction_builder.prepare_transaction( - self._node, self._submitter, doi, input_path=self._input, output_content_type=CONTENT_TYPE_JSON + self._node, self._submitter, doi, input_path=doi.input_source, output_content_type=CONTENT_TYPE_JSON ) transaction.log() diff --git a/src/pds_doi_service/core/entities/doi.py b/src/pds_doi_service/core/entities/doi.py index bc2dab84..65124bed 100644 --- a/src/pds_doi_service/core/entities/doi.py +++ b/src/pds_doi_service/core/entities/doi.py @@ -43,9 +43,6 @@ class DoiStatus(str, Enum): An error has occurred with the DOI submission. Unknown - Default starting state for DOI transactions. - Reserve_not_submitted - - DOI reserve request in local database, but not published/released. - Used for testing of the reserve action. Reserved - DOI reserve request submitted, but not yet published/released. Draft - @@ -127,3 +124,4 @@ class Doi: date_record_added: Optional[datetime] = None date_record_updated: Optional[datetime] = None event: Optional[DoiEvent] = None + input_source: Optional[str] = None diff --git a/src/pds_doi_service/core/input/input_util.py b/src/pds_doi_service/core/input/input_util.py index 55283ac5..e2889f49 100644 --- a/src/pds_doi_service/core/input/input_util.py +++ b/src/pds_doi_service/core/input/input_util.py @@ -504,6 +504,11 @@ def _read_from_path(self, path): Returns ------- + dois : list[doi] + The list of Doi objects parsed from the provided path. + + Raises + ------- InputFormatException If an error is encountered while reading a local file. @@ -526,6 +531,12 @@ def _read_from_path(self, path): logger.error(msg) raise InputFormatException(msg) + + # Make a note of where we can find the original input file that + # resulted in these DOI's so we can save it to the transaction + # history later on + for doi in dois: + doi.input_source = path else: logger.info("File %s has unsupported extension, ignoring", path) else: @@ -580,7 +591,14 @@ def _read_from_remote(self, input_url): temp_file.write(response.content) temp_file.seek(0) - return self._read_from_path(temp_file.name) + dois = self._read_from_path(temp_file.name) + + # Update input source to point to original URL, as the temp file paths + # assigned by _read_from_path no longer exist + for doi in dois: + doi.input_source = input_url + + return dois def parse_dois_from_input_file(self, input_file): """