diff --git a/pds_doi_core/actions/release.py b/pds_doi_core/actions/release.py index 673cb29a..da6e848e 100644 --- a/pds_doi_core/actions/release.py +++ b/pds_doi_core/actions/release.py @@ -9,7 +9,7 @@ from pds_doi_core.actions.action import DOICoreAction, logger from pds_doi_core.input.exceptions import UnknownNodeException, InputFormatException, DuplicatedTitleDOIException, \ - UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException, IllegalDOIActionException, WarningDOIException, \ + UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException, SiteURNotExistException, IllegalDOIActionException, WarningDOIException, \ CriticalDOIException from pds_doi_core.input.osti_input_validator import OSTIInputValidator from pds_doi_core.outputs.osti import DOIOutputOsti @@ -70,6 +70,30 @@ def _parse_input(self, input): return o_doi_label + def _collect_exception_classes_and_messages(self, single_exception, io_exception_classes, io_exception_messages): + # Given a single exception, collect the exception class name and message. + # The variables io_exception_classes and io_exception_messages are both input and output. + + actual_class_name = type(single_exception).__name__ + logger.debug(f"actual_class_name,type(actual_class_name) {actual_class_name},{type(actual_class_name)}") + + io_exception_classes.append (actual_class_name) # SiteURNotExistException + io_exception_messages.append(str(single_exception)) # site_url http://mysite.example.com/link/to/my-dataset-id-25901.html not exist + + return io_exception_classes, io_exception_messages + + def _raise_warn_exceptions(self, exception_classes, exception_messages): + # Raise a WarningDOIException with all the class names and messages. + + message_to_raise = '' + for ii in range(len(exception_classes)): + if ii == 0: + message_to_raise = message_to_raise + exception_classes[ii] + ':' + exception_messages[ii] + else: + # Add a comma after every message. + message_to_raise = message_to_raise + ', ' + exception_classes[ii] + ':' + exception_messages[ii] + raise WarningDOIException(message_to_raise) + def _validate_doi(self, doi_label): """ Before submitting the user input, it has to be validated against the database so a Doi object need to be built. @@ -77,6 +101,8 @@ def _validate_doi(self, doi_label): :param doi_label: :return: """ + exception_classes = [] + exception_messages = [] try: dois = DOIOstiWebParser().response_get_parse_osti_xml(doi_label) @@ -87,15 +113,28 @@ def _validate_doi(self, doi_label): self._doi_validator.validate_against_xsd(single_doi_label) # Validate the label to ensure that no rules are violated. - self._doi_validator.validate_osti_submission(doi) - + # Put validate_osti_submission() within a try/except clause to catch all the exceptions instead of + # exiting after the first exception. This allow the user to see all the things that are wrong with + # all the DOIs instead of just the first one. + try: + self._doi_validator.validate_osti_submission(doi) + except (DuplicatedTitleDOIException, UnexpectedDOIActionException, + TitleDoesNotMatchProductTypeException, SiteURNotExistException, WarningDOIException) as e: + (exception_classes, exception_messages) = self._collect_exception_classes_and_messages(e, + exception_classes, exception_messages) except Exception as e: raise # Re-raise all exceptions. + # If there is at least one exception caught, raise a WarningDOIException with all the messages. + if len(exception_classes) > 0: + self._raise_warn_exceptions(exception_classes,exception_messages) + + return 1 + def run(self, **kwargs): """ Function performs a release of a DOI that has been previously reserved. The input is a - XML text file contains previously return output from a 'reserved' action. The only + XML text file contains previously return output from a 'reserve' or 'draft' action. The only required field is 'id'. Any other fields included will be considered a replace action. """ self.parse_arguments(kwargs) @@ -117,9 +156,9 @@ def run(self, **kwargs): # warnings except (DuplicatedTitleDOIException, UnexpectedDOIActionException, - TitleDoesNotMatchProductTypeException) as e: + TitleDoesNotMatchProductTypeException, SiteURNotExistException, WarningDOIException) as e: if not self._force: - # If the user did not use force_flag, re-raise the exception. + # If the user did not use --force parameter, re-raise the exception. raise WarningDOIException(str(e)) # errors except (IllegalDOIActionException, UnknownNodeException, InputFormatException) as e: diff --git a/pds_doi_core/actions/test/draft_test.py b/pds_doi_core/actions/test/draft_test.py index f77a312c..ff0d68e0 100644 --- a/pds_doi_core/actions/test/draft_test.py +++ b/pds_doi_core/actions/test/draft_test.py @@ -18,7 +18,7 @@ def create_temporary_output_file(doi_label,filename): class MyTestCase(unittest.TestCase): db_name = 'doi_temp.db' - # Because validation has been added to each action, the force_flag=True is required as the command line is not parsed for unit test. + # Because validation has been added to each action, the force=True is required as the command line is not parsed for unit test. @classmethod def setUp(self): @@ -43,21 +43,21 @@ def test_local_dir_one_file(self): logger.info("test local dir with one file") osti_doi = self._action.run(input='input/draft_dir_one_file', node='img', - submitter='my_user@my_node.gov',force_flag=True) + submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) def test_local_dir_two_files(self): logger.info("test local dir with two files") osti_doi = self._action.run(input='input/draft_dir_two_files', node='img', - submitter='my_user@my_node.gov',force_flag=True) + submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) def test_local_bundle(self): logger.info("test local bundle") osti_doi = self._action.run(input='input/bundle_in_with_contributors.xml', node='img', - submitter='my_user@my_node.gov',force_flag=True) + submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) def test_remote_bundle(self): @@ -65,41 +65,41 @@ def test_remote_bundle(self): osti_doi = self._action.run( input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/bundle.xml', node='img', - submitter='my_user@my_node.gov',force_flag=True) + submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) - create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_bundle_doi.xml')) + #create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_bundle_doi.xml')) def test_remote_collection(self): logger.info("test remote collection") osti_doi = self._action.run( input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/data/collection_data.xml', - node='img', submitter='my_user@my_node.gov',force_flag=True) + node='img', submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) - create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_datacoll_doi.xml')) + #create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_datacoll_doi.xml')) def test_remote_browse_collection(self): logger.info("test remote browse collection") osti_doi = self._action.run( input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/browse/collection_browse.xml', - node='img', submitter='my_user@my_node.gov',force_flag=True) + node='img', submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) - create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_browsecoll_doi.xml')) + #create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_browsecoll_doi.xml')) def test_remote_calibration_collection(self): logger.info("test remote calibration collection") osti_doi = self._action.run( input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/calibration/collection_calibration.xml', - node='img', submitter='my_user@my_node.gov',force_flag=True) + node='img', submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) - create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_calibcoll_doi.xml')) + #create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_calibcoll_doi.xml')) def test_remote_document_collection(self): logger.info("test remote document collection") osti_doi = self._action.run( input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/document/collection_document.xml', - node='img', submitter='my_user@my_node.gov',force_flag=True) + node='img', submitter='my_user@my_node.gov',force=True) logger.info(osti_doi) - create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_docucoll_doi.xml')) + #create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_docucoll_doi.xml')) if __name__ == '__main__': diff --git a/pds_doi_core/actions/test/release_test.py b/pds_doi_core/actions/test/release_test.py index be61a4a6..6b5ff869 100644 --- a/pds_doi_core/actions/test/release_test.py +++ b/pds_doi_core/actions/test/release_test.py @@ -15,10 +15,14 @@ class MyTestCase(unittest.TestCase): # to demonstrate that they have new status of 'Pending' or 'Registered'. If for some reason the server has been wiped clean, this unit test will still run # but won't show any status changed to 'Registered'. # - # Because validation has been added to each action, the force_flag=True is required as the command line is not parsed for unit test. + # Because validation has been added to each action, the force=True is required as the command line is not parsed for unit test. db_name = 'doi_temp.db' logger.info("Creating test artifact database file {self.db_name}") + # Remove db_name if exist to have a fresh start otherwise exception will be raised about using existing lidvid. + if os.path.isfile(db_name): + os.remove(db_name) + logger.info(f"Removed test artifact database file {db_name}") # Release some DOIs. # Move instantation of DOICoreActionRelease() object inside tests since the temporary file is removed after each test @@ -39,7 +43,7 @@ def test_reserve(self): # Instantiate DOICoreActionRelease() here so a new database file is created and removed for each test. # The setUp() function is called per test. logger.info("test release of document from 'reserve' action. This test would only work if the authentication for OSTI has been set up and DOIs exist.") - result_list = self._action.run(input='input/DOI_Release_20200727_from_reserve.xml',node='img',submitter='Qui.T.Chau@jpl.nasa.gov',force_flag=True) + result_list = self._action.run(input='input/DOI_Release_20200727_from_reserve.xml',node='img',submitter='Qui.T.Chau@jpl.nasa.gov',force=True) logger.info(result_list) # The tearDown() function is called per test. @@ -49,7 +53,7 @@ def test_draft(self): # The setUp() function is called per test. logger.info("test release of document from 'release' output. This test would only work if the authentication for OSTI has been set up and DOIs exist.") result_list = [] - result_list = self._action.run(input='input/DOI_Release_20200727_from_draft.xml',node='img',submitter='Qui.T.Chau@jpl.nasa.gov',force_flag=True) + result_list = self._action.run(input='input/DOI_Release_20200727_from_draft.xml',node='img',submitter='Qui.T.Chau@jpl.nasa.gov',force=True) logger.info(result_list) # The tearDown() function is called per test. diff --git a/pds_doi_core/actions/test/reserve_test.py b/pds_doi_core/actions/test/reserve_test.py index ec40f9f7..6f184cdf 100644 --- a/pds_doi_core/actions/test/reserve_test.py +++ b/pds_doi_core/actions/test/reserve_test.py @@ -15,7 +15,7 @@ class MyTestCase(unittest.TestCase): # Due to addition of validation of existing 'title', 'lidvid' and 'doi' fields from local database, # the DOICoreActionReserve must be instantiated per test and the tear down per test as well to remove temporary database. - # Because validation has been added to each action, the force_flag=True is required as the command line is not parsed for unit test. + # Because validation has been added to each action, the force=True is required as the command line is not parsed for unit test. db_name = 'doi_temp.db' def setUp(self): @@ -37,7 +37,7 @@ def test_reserve_xlsx(self): self._action.run( input='input/DOI_Reserved_GEO_200318_with_corrected_identifier.xlsx', node='img', submitter='my_user@my_node.gov', - dry_run=True,force_flag=True) + dry_run=True,force=True) # The tearDown() function is called per test. @@ -49,7 +49,7 @@ def test_reserve_xlsx_and_submit(self): self._action.run( input='input/DOI_Reserved_GEO_200318_with_corrected_identifier.xlsx', node='img', submitter='my_user@my_node.gov', - dry_run=True,force_flag=True) + dry_run=True,force=True) # The tearDown() function is called per test. @@ -60,7 +60,7 @@ def test_reserve_csv(self): osti_doi = self._action.run( input='input/DOI_Reserved_GEO_200318.csv', node='img', submitter='my_user@my_node.gov', - dry_run=True,force_flag=True) + dry_run=True,force=True) logger.info(osti_doi) def test_reserve_csv_and_submit(self): @@ -68,7 +68,7 @@ def test_reserve_csv_and_submit(self): osti_doi = self._action.run( input='input/DOI_Reserved_GEO_200318.csv', node='img', submitter='my_user@my_node.gov', - dry_run=False,force_flag=True) + dry_run=False,force=True) logger.info(osti_doi) # The tearDown() function is called per test. diff --git a/pds_doi_core/input/exceptions.py b/pds_doi_core/input/exceptions.py index 8fd03c84..305b3ad2 100644 --- a/pds_doi_core/input/exceptions.py +++ b/pds_doi_core/input/exceptions.py @@ -31,3 +31,6 @@ class CriticalDOIException(Exception): class WarningDOIException(Exception): pass +class SiteURNotExistException(Exception): + pass + diff --git a/pds_doi_core/input/input_util.py b/pds_doi_core/input/input_util.py index c144e4c2..80fdb2d2 100644 --- a/pds_doi_core/input/input_util.py +++ b/pds_doi_core/input/input_util.py @@ -59,8 +59,21 @@ def _parse_rows_to_doi_meta(self, xl_sheet): doi_records = [] for index, row in xl_sheet.iterrows(): + logger.debug(f"row {row}") + # It is possible that the length of row['publication_date'] is more than needed, we only need to get the first 10 characters + # '2020-08-01' from '2020-08-01 00:00:00' + if len(row['publication_date']) >= 10: + # It is possible that the format provided is not expected, put try/except clause to catch that. + try: + publication_date_value = datetime.strptime(row['publication_date'][0:10], '%Y-%m-%d') + except Exception: + logger.error("Expecting publication_date [" + row['publication_date'] + "] with format YYYY-mm-dd") + raise InputFormatException("Expecting publication_date [" + row['publication_date'] + "] with format YYYY-mm-dd") + else: + raise InputFormatException("Expecting publication_date to be at least 10 characters: [" + row['publication_date'] + "]") + doi = Doi(title=row['title'], - publication_date=datetime.fromisoformat(row['publication_date']), + publication_date=publication_date_value, product_type='Collection', product_type_specific=row['product_type_specific'], related_identifier=row['related_resource'], diff --git a/pds_doi_core/util/doi_validator.py b/pds_doi_core/util/doi_validator.py index 60e4f7d0..b1d0fb36 100644 --- a/pds_doi_core/util/doi_validator.py +++ b/pds_doi_core/util/doi_validator.py @@ -9,10 +9,12 @@ from lxml import etree +import requests + from pds_doi_core.db.doi_database import DOIDataBase from pds_doi_core.entities.doi import Doi from pds_doi_core.input.exceptions import DuplicatedTitleDOIException, \ - IllegalDOIActionException, UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException + IllegalDOIActionException, UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException, SiteURNotExistException from pds_doi_core.util.config_parser import DOIConfigUtil from pds_doi_core.util.general_util import get_logger @@ -46,6 +48,32 @@ def get_database_name(self): def __lidvid(columns, row): return f"{row[columns.index('lid')]}::{row[columns.index('vid')]}" + def _check_field_site_url(self, doi: Doi): + """ If the site_url field exist in doi object, check to see if it is online. If the site is not online, an exception will be thrown. + """ + + logger.debug(f"doi {doi}") + logger.info(f"doi.site_url {doi.site_url}") + + if doi.site_url: + try: + response = requests.get(doi.site_url,timeout=5) + status_code = response.status_code + logger.debug(f"from_request:status_code,site_url {status_code,doi.site_url}") + # Handle cases when a connection can be made to the server but the status is greater than or equal to 400. + if status_code >= 400: + # Need to check its an 404, 503, 500, 403 etc. + raise requests.HTTPError(f"status_code,site_url {status_code,doi.site_url}") + else: + logger.debug(f"site_url {doi.site_url} indeed exist") + except (requests.exceptions.ConnectionError, Exception) as e: + error_message = f"site_url {doi.site_url} not reachable" + # Although not being able to connect is an error, the message printed is a warning. + logger.warning(error_message) + raise SiteURNotExistException(error_message) + + return 1 + def _check_field_title_duplicate(self, doi: Doi): """ Check if the same title exist already in local database for a different lidvid """ @@ -188,6 +216,8 @@ def validate(self, doi: Doi): # TO DO check id and doi fields are consistent. + # Validate the site_url first to give the user a chance to make the correction. + self._check_field_site_url(doi) self._check_field_title_duplicate(doi) self._check_field_title_content(doi) self._check_field_workflow(doi)