Skip to content

Commit

Permalink
Validate site_url availability
Browse files Browse the repository at this point in the history
Changes to support issue #70: Add catching of warning exceptions from validate_osti_submission() when site_url is not online, comment out function to write output file from draft_test.py since it is not needed, fix bug with draft_test.py to use force=True instead of force_flag=True, fix bug with release_test.py by removing existing temporary database at initial run, fix bug with reserve_test.py to use force=True instead of force_flag=True, add SiteURNotExistException to exceptions.py, fix bug with fromisoformat() since Python version on linux is 3.68 and does not support it, add error handling of publication_date, add verification of site_url being online or not in doi_validator.py.

Changes responding to first set of comments for pull request of issue #70: Add handling when a connection can be made to the server but the page does not exist, add _check_field_site_url() check for all actions and not just 'release'.

change warning message
  • Loading branch information
Qui T Chau authored and thomas loubrieu committed Sep 29, 2020
1 parent 8014621 commit 9c6bcf4
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 30 deletions.
51 changes: 45 additions & 6 deletions pds_doi_core/actions/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pds_doi_core.actions.action import DOICoreAction, logger
from pds_doi_core.input.exceptions import UnknownNodeException, InputFormatException, DuplicatedTitleDOIException, \
UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException, IllegalDOIActionException, WarningDOIException, \
UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException, SiteURNotExistException, IllegalDOIActionException, WarningDOIException, \
CriticalDOIException
from pds_doi_core.input.osti_input_validator import OSTIInputValidator
from pds_doi_core.outputs.osti import DOIOutputOsti
Expand Down Expand Up @@ -70,13 +70,39 @@ def _parse_input(self, input):

return o_doi_label

def _collect_exception_classes_and_messages(self, single_exception, io_exception_classes, io_exception_messages):
# Given a single exception, collect the exception class name and message.
# The variables io_exception_classes and io_exception_messages are both input and output.

actual_class_name = type(single_exception).__name__
logger.debug(f"actual_class_name,type(actual_class_name) {actual_class_name},{type(actual_class_name)}")

io_exception_classes.append (actual_class_name) # SiteURNotExistException
io_exception_messages.append(str(single_exception)) # site_url http://mysite.example.com/link/to/my-dataset-id-25901.html not exist

return io_exception_classes, io_exception_messages

def _raise_warn_exceptions(self, exception_classes, exception_messages):
# Raise a WarningDOIException with all the class names and messages.

message_to_raise = ''
for ii in range(len(exception_classes)):
if ii == 0:
message_to_raise = message_to_raise + exception_classes[ii] + ':' + exception_messages[ii]
else:
# Add a comma after every message.
message_to_raise = message_to_raise + ', ' + exception_classes[ii] + ':' + exception_messages[ii]
raise WarningDOIException(message_to_raise)

def _validate_doi(self, doi_label):
"""
Before submitting the user input, it has to be validated against the database so a Doi object need to be built.
Since the format of o_doi_label is same as a response from OSTI, the same parser can be used.
:param doi_label:
:return:
"""
exception_classes = []
exception_messages = []
try:
dois = DOIOstiWebParser().response_get_parse_osti_xml(doi_label)

Expand All @@ -87,15 +113,28 @@ def _validate_doi(self, doi_label):
self._doi_validator.validate_against_xsd(single_doi_label)

# Validate the label to ensure that no rules are violated.
self._doi_validator.validate_osti_submission(doi)

# Put validate_osti_submission() within a try/except clause to catch all the exceptions instead of
# exiting after the first exception. This allow the user to see all the things that are wrong with
# all the DOIs instead of just the first one.
try:
self._doi_validator.validate_osti_submission(doi)
except (DuplicatedTitleDOIException, UnexpectedDOIActionException,
TitleDoesNotMatchProductTypeException, SiteURNotExistException, WarningDOIException) as e:
(exception_classes, exception_messages) = self._collect_exception_classes_and_messages(e,
exception_classes, exception_messages)
except Exception as e:
raise # Re-raise all exceptions.

# If there is at least one exception caught, raise a WarningDOIException with all the messages.
if len(exception_classes) > 0:
self._raise_warn_exceptions(exception_classes,exception_messages)

return 1

def run(self, **kwargs):
"""
Function performs a release of a DOI that has been previously reserved. The input is a
XML text file contains previously return output from a 'reserved' action. The only
XML text file contains previously return output from a 'reserve' or 'draft' action. The only
required field is 'id'. Any other fields included will be considered a replace action.
"""
self.parse_arguments(kwargs)
Expand All @@ -117,9 +156,9 @@ def run(self, **kwargs):

# warnings
except (DuplicatedTitleDOIException, UnexpectedDOIActionException,
TitleDoesNotMatchProductTypeException) as e:
TitleDoesNotMatchProductTypeException, SiteURNotExistException, WarningDOIException) as e:
if not self._force:
# If the user did not use force_flag, re-raise the exception.
# If the user did not use --force parameter, re-raise the exception.
raise WarningDOIException(str(e))
# errors
except (IllegalDOIActionException, UnknownNodeException, InputFormatException) as e:
Expand Down
28 changes: 14 additions & 14 deletions pds_doi_core/actions/test/draft_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def create_temporary_output_file(doi_label,filename):

class MyTestCase(unittest.TestCase):
db_name = 'doi_temp.db'
# Because validation has been added to each action, the force_flag=True is required as the command line is not parsed for unit test.
# Because validation has been added to each action, the force=True is required as the command line is not parsed for unit test.

@classmethod
def setUp(self):
Expand All @@ -43,63 +43,63 @@ def test_local_dir_one_file(self):
logger.info("test local dir with one file")
osti_doi = self._action.run(input='input/draft_dir_one_file',
node='img',
submitter='my_user@my_node.gov',force_flag=True)
submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)

def test_local_dir_two_files(self):
logger.info("test local dir with two files")
osti_doi = self._action.run(input='input/draft_dir_two_files',
node='img',
submitter='my_user@my_node.gov',force_flag=True)
submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)

def test_local_bundle(self):
logger.info("test local bundle")
osti_doi = self._action.run(input='input/bundle_in_with_contributors.xml',
node='img',
submitter='my_user@my_node.gov',force_flag=True)
submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)

def test_remote_bundle(self):
logger.info("test remote bundle")
osti_doi = self._action.run(
input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/bundle.xml',
node='img',
submitter='my_user@my_node.gov',force_flag=True)
submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)
create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_bundle_doi.xml'))
#create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_bundle_doi.xml'))

def test_remote_collection(self):
logger.info("test remote collection")
osti_doi = self._action.run(
input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/data/collection_data.xml',
node='img', submitter='my_user@my_node.gov',force_flag=True)
node='img', submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)
create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_datacoll_doi.xml'))
#create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_datacoll_doi.xml'))

def test_remote_browse_collection(self):
logger.info("test remote browse collection")
osti_doi = self._action.run(
input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/browse/collection_browse.xml',
node='img', submitter='my_user@my_node.gov',force_flag=True)
node='img', submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)
create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_browsecoll_doi.xml'))
#create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_browsecoll_doi.xml'))

def test_remote_calibration_collection(self):
logger.info("test remote calibration collection")
osti_doi = self._action.run(
input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/calibration/collection_calibration.xml',
node='img', submitter='my_user@my_node.gov',force_flag=True)
node='img', submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)
create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_calibcoll_doi.xml'))
#create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_calibcoll_doi.xml'))

def test_remote_document_collection(self):
logger.info("test remote document collection")
osti_doi = self._action.run(
input='https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/document/collection_document.xml',
node='img', submitter='my_user@my_node.gov',force_flag=True)
node='img', submitter='my_user@my_node.gov',force=True)
logger.info(osti_doi)
create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_docucoll_doi.xml'))
#create_temporary_output_file(osti_doi, os.path.join(self._temporary_output_dir,'valid_docucoll_doi.xml'))


if __name__ == '__main__':
Expand Down
10 changes: 7 additions & 3 deletions pds_doi_core/actions/test/release_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ class MyTestCase(unittest.TestCase):
# to demonstrate that they have new status of 'Pending' or 'Registered'. If for some reason the server has been wiped clean, this unit test will still run
# but won't show any status changed to 'Registered'.
#
# Because validation has been added to each action, the force_flag=True is required as the command line is not parsed for unit test.
# Because validation has been added to each action, the force=True is required as the command line is not parsed for unit test.

db_name = 'doi_temp.db'
logger.info("Creating test artifact database file {self.db_name}")
# Remove db_name if exist to have a fresh start otherwise exception will be raised about using existing lidvid.
if os.path.isfile(db_name):
os.remove(db_name)
logger.info(f"Removed test artifact database file {db_name}")

# Release some DOIs.
# Move instantation of DOICoreActionRelease() object inside tests since the temporary file is removed after each test
Expand All @@ -39,7 +43,7 @@ def test_reserve(self):
# Instantiate DOICoreActionRelease() here so a new database file is created and removed for each test.
# The setUp() function is called per test.
logger.info("test release of document from 'reserve' action. This test would only work if the authentication for OSTI has been set up and DOIs exist.")
result_list = self._action.run(input='input/DOI_Release_20200727_from_reserve.xml',node='img',submitter='[email protected]',force_flag=True)
result_list = self._action.run(input='input/DOI_Release_20200727_from_reserve.xml',node='img',submitter='[email protected]',force=True)

logger.info(result_list)
# The tearDown() function is called per test.
Expand All @@ -49,7 +53,7 @@ def test_draft(self):
# The setUp() function is called per test.
logger.info("test release of document from 'release' output. This test would only work if the authentication for OSTI has been set up and DOIs exist.")
result_list = []
result_list = self._action.run(input='input/DOI_Release_20200727_from_draft.xml',node='img',submitter='[email protected]',force_flag=True)
result_list = self._action.run(input='input/DOI_Release_20200727_from_draft.xml',node='img',submitter='[email protected]',force=True)

logger.info(result_list)
# The tearDown() function is called per test.
Expand Down
10 changes: 5 additions & 5 deletions pds_doi_core/actions/test/reserve_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class MyTestCase(unittest.TestCase):
# Due to addition of validation of existing 'title', 'lidvid' and 'doi' fields from local database,
# the DOICoreActionReserve must be instantiated per test and the tear down per test as well to remove temporary database.

# Because validation has been added to each action, the force_flag=True is required as the command line is not parsed for unit test.
# Because validation has been added to each action, the force=True is required as the command line is not parsed for unit test.
db_name = 'doi_temp.db'

def setUp(self):
Expand All @@ -37,7 +37,7 @@ def test_reserve_xlsx(self):
self._action.run(
input='input/DOI_Reserved_GEO_200318_with_corrected_identifier.xlsx',
node='img', submitter='my_user@my_node.gov',
dry_run=True,force_flag=True)
dry_run=True,force=True)

# The tearDown() function is called per test.

Expand All @@ -49,7 +49,7 @@ def test_reserve_xlsx_and_submit(self):
self._action.run(
input='input/DOI_Reserved_GEO_200318_with_corrected_identifier.xlsx',
node='img', submitter='my_user@my_node.gov',
dry_run=True,force_flag=True)
dry_run=True,force=True)

# The tearDown() function is called per test.

Expand All @@ -60,15 +60,15 @@ def test_reserve_csv(self):
osti_doi = self._action.run(
input='input/DOI_Reserved_GEO_200318.csv',
node='img', submitter='my_user@my_node.gov',
dry_run=True,force_flag=True)
dry_run=True,force=True)
logger.info(osti_doi)

def test_reserve_csv_and_submit(self):
logger.info("test reserve csv file format and submit")
osti_doi = self._action.run(
input='input/DOI_Reserved_GEO_200318.csv',
node='img', submitter='my_user@my_node.gov',
dry_run=False,force_flag=True)
dry_run=False,force=True)
logger.info(osti_doi)

# The tearDown() function is called per test.
Expand Down
3 changes: 3 additions & 0 deletions pds_doi_core/input/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ class CriticalDOIException(Exception):
class WarningDOIException(Exception):
pass

class SiteURNotExistException(Exception):
pass

15 changes: 14 additions & 1 deletion pds_doi_core/input/input_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,21 @@ def _parse_rows_to_doi_meta(self, xl_sheet):
doi_records = []

for index, row in xl_sheet.iterrows():
logger.debug(f"row {row}")
# It is possible that the length of row['publication_date'] is more than needed, we only need to get the first 10 characters
# '2020-08-01' from '2020-08-01 00:00:00'
if len(row['publication_date']) >= 10:
# It is possible that the format provided is not expected, put try/except clause to catch that.
try:
publication_date_value = datetime.strptime(row['publication_date'][0:10], '%Y-%m-%d')
except Exception:
logger.error("Expecting publication_date [" + row['publication_date'] + "] with format YYYY-mm-dd")
raise InputFormatException("Expecting publication_date [" + row['publication_date'] + "] with format YYYY-mm-dd")
else:
raise InputFormatException("Expecting publication_date to be at least 10 characters: [" + row['publication_date'] + "]")

doi = Doi(title=row['title'],
publication_date=datetime.fromisoformat(row['publication_date']),
publication_date=publication_date_value,
product_type='Collection',
product_type_specific=row['product_type_specific'],
related_identifier=row['related_resource'],
Expand Down
32 changes: 31 additions & 1 deletion pds_doi_core/util/doi_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@

from lxml import etree

import requests

from pds_doi_core.db.doi_database import DOIDataBase
from pds_doi_core.entities.doi import Doi
from pds_doi_core.input.exceptions import DuplicatedTitleDOIException, \
IllegalDOIActionException, UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException
IllegalDOIActionException, UnexpectedDOIActionException, TitleDoesNotMatchProductTypeException, SiteURNotExistException
from pds_doi_core.util.config_parser import DOIConfigUtil
from pds_doi_core.util.general_util import get_logger

Expand Down Expand Up @@ -46,6 +48,32 @@ def get_database_name(self):
def __lidvid(columns, row):
return f"{row[columns.index('lid')]}::{row[columns.index('vid')]}"

def _check_field_site_url(self, doi: Doi):
""" If the site_url field exist in doi object, check to see if it is online. If the site is not online, an exception will be thrown.
"""

logger.debug(f"doi {doi}")
logger.info(f"doi.site_url {doi.site_url}")

if doi.site_url:
try:
response = requests.get(doi.site_url,timeout=5)
status_code = response.status_code
logger.debug(f"from_request:status_code,site_url {status_code,doi.site_url}")
# Handle cases when a connection can be made to the server but the status is greater than or equal to 400.
if status_code >= 400:
# Need to check its an 404, 503, 500, 403 etc.
raise requests.HTTPError(f"status_code,site_url {status_code,doi.site_url}")
else:
logger.debug(f"site_url {doi.site_url} indeed exist")
except (requests.exceptions.ConnectionError, Exception) as e:
error_message = f"site_url {doi.site_url} not reachable"
# Although not being able to connect is an error, the message printed is a warning.
logger.warning(error_message)
raise SiteURNotExistException(error_message)

return 1

def _check_field_title_duplicate(self, doi: Doi):
""" Check if the same title exist already in local database for a different lidvid
"""
Expand Down Expand Up @@ -188,6 +216,8 @@ def validate(self, doi: Doi):

# TO DO check id and doi fields are consistent.

# Validate the site_url first to give the user a chance to make the correction.
self._check_field_site_url(doi)
self._check_field_title_duplicate(doi)
self._check_field_title_content(doi)
self._check_field_workflow(doi)
Expand Down

0 comments on commit 9c6bcf4

Please sign in to comment.