diff --git a/.gitignore b/.gitignore index c2617664..70ca0595 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ var/ *.xpr bin/ .*.swp +typescript # OS-specific artifacts .DS_Store diff --git a/pyproject.toml b/pyproject.toml index f8b799fd..dfd2843b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools >= 46.4.0", "wheel"] +requires = ["setuptools >= 46.4.0", "wheel", "versioneer-518"] # uncomment to enable pep517 after versioneer problem is fixed. # https://github.com/python-versioneer/python-versioneer/issues/193 @@ -7,3 +7,7 @@ requires = ["setuptools >= 46.4.0", "wheel"] [tool.black] line-length = 120 + +[tool.pytest.ini_options] +# This is required for non-re-entrant tests in ``pds-doi-service``: +numprocesses = 1 diff --git a/setup.cfg b/setup.cfg index 714a4848..86cf24f6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -91,6 +91,16 @@ dev = behave==1.2.6 allure-behave==2.8.13 behave-testrail-reporter==0.4.0 + versioneer + types-python-dateutil + types-pkg_resources + lxml-stubs + pandas-stubs + types-requests + types-six + types-jsonschema + types-flask + types-waitress [options.entry_points] @@ -127,7 +137,16 @@ docstring_convention = google # E501 prevents flake8 from complaining line lengths > 79. We will use # flake8-bugbear's B950 to handle line length lint errors. This trips # when a line is > max-line-length + 10%. -extend-ignore = E203, E501 +# +# W503 is ignored for ``pds-doi-service`` not just because it makes use of +# left-aligned binary operators in multi-line expressions, but because the +# opposite advice is now the recommended practice; see +# • https://rhodesmill.org/brandon/slides/2012-11-pyconca/#laying-down-the-law +# • https://github.com/PyCQA/pycodestyle/pull/502 +# • https://www.flake8rules.com/rules/W503.html +# • ET Tufte, _Seeing with Fresh Eyes: Meaning, Space, Data, Truth_, Graphics +# Press 2020, p.14. +extend-ignore = E203, E501, W503 # Selects following test categories: # D: Docstring errors and warnings @@ -147,7 +166,7 @@ select = D,E,F,N,W,B,B902,B903,B950 [mypy] -[mypy-pds.*._version] +[mypy-pds_doi_service._version] # We don't care about issues in versioneer's files ignore_errors = True diff --git a/src/pds_doi_service/api/__init__.py b/src/pds_doi_service/api/__init__.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/api/__main__.py b/src/pds_doi_service/api/__main__.py old mode 100755 new mode 100644 index f1fb125d..161d90f5 --- a/src/pds_doi_service/api/__main__.py +++ b/src/pds_doi_service/api/__main__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -8,9 +8,9 @@ import logging from urllib.parse import urlparse -import connexion +import connexion # type: ignore from flask import jsonify -from flask_cors import CORS +from flask_cors import CORS # type: ignore from pds_doi_service.api import encoder from pds_doi_service.core.util.config_parser import DOIConfigUtil from pds_doi_service.core.util.general_util import get_logger diff --git a/src/pds_doi_service/api/controllers/__init__.py b/src/pds_doi_service/api/controllers/__init__.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/api/controllers/dois_controller.py b/src/pds_doi_service/api/controllers/dois_controller.py old mode 100755 new mode 100644 index 34556a2e..db9a275d --- a/src/pds_doi_service/api/controllers/dois_controller.py +++ b/src/pds_doi_service/api/controllers/dois_controller.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -18,7 +18,7 @@ from os.path import join from tempfile import NamedTemporaryFile -import connexion +import connexion # type: ignore from flask import current_app from pds_doi_service.api.models import DoiRecord from pds_doi_service.api.models import DoiSummary diff --git a/src/pds_doi_service/api/encoder.py b/src/pds_doi_service/api/encoder.py old mode 100755 new mode 100644 index b12dd5c8..8cde15b5 --- a/src/pds_doi_service/api/encoder.py +++ b/src/pds_doi_service/api/encoder.py @@ -1,5 +1,5 @@ import six -from connexion.apps.flask_app import FlaskJSONEncoder +from connexion.apps.flask_app import FlaskJSONEncoder # type: ignore from pds_doi_service.api.models import Model diff --git a/src/pds_doi_service/api/models/__init__.py b/src/pds_doi_service/api/models/__init__.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/api/models/base_model_.py b/src/pds_doi_service/api/models/base_model_.py old mode 100755 new mode 100644 index 90a12f8d..02729028 --- a/src/pds_doi_service/api/models/base_model_.py +++ b/src/pds_doi_service/api/models/base_model_.py @@ -8,12 +8,14 @@ class Model(object): - # swaggerTypes: The key is attribute name and the - # value is attribute type. + # swaggerTypes: The key is attribute name and the value is attribute type. + # 🤔 TODO: figure out the actual types that should go here + swagger_types: dict[typing.Any, typing.Any] swagger_types = {} - # attributeMap: The key is attribute name and the - # value is json key in definition. + # attributeMap: The key is attribute name and the value is json key in definition. + # 🤔 TODO: figure out the actual types that should go here + attribute_map: dict[typing.Any, typing.Any] attribute_map = {} @classmethod diff --git a/src/pds_doi_service/api/models/doi_record.py b/src/pds_doi_service/api/models/doi_record.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/api/models/doi_summary.py b/src/pds_doi_service/api/models/doi_summary.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/api/models/label_payload.py b/src/pds_doi_service/api/models/label_payload.py old mode 100755 new mode 100644 index aae8a880..11e07256 --- a/src/pds_doi_service/api/models/label_payload.py +++ b/src/pds_doi_service/api/models/label_payload.py @@ -1,10 +1,7 @@ # coding: utf-8 from __future__ import absolute_import -from datetime import date from datetime import datetime -from typing import Dict -from typing import List from pds_doi_service.api import util from pds_doi_service.api.models import Model diff --git a/src/pds_doi_service/api/models/labels_payload.py b/src/pds_doi_service/api/models/labels_payload.py old mode 100755 new mode 100644 index e4a20ec4..f4d1f576 --- a/src/pds_doi_service/api/models/labels_payload.py +++ b/src/pds_doi_service/api/models/labels_payload.py @@ -1,9 +1,6 @@ # coding: utf-8 from __future__ import absolute_import -from datetime import date -from datetime import datetime -from typing import Dict from typing import List from pds_doi_service.api import util diff --git a/src/pds_doi_service/api/test/__init__.py b/src/pds_doi_service/api/test/__init__.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/api/test/_base.py b/src/pds_doi_service/api/test/_base.py index 52ae7203..278b3fa4 100644 --- a/src/pds_doi_service/api/test/_base.py +++ b/src/pds_doi_service/api/test/_base.py @@ -4,7 +4,7 @@ """ import logging -from flask_testing import TestCase +from flask_testing import TestCase # type: ignore from pds_doi_service.api.__main__ import init_app diff --git a/src/pds_doi_service/api/test/test_dois_controller.py b/src/pds_doi_service/api/test/test_dois_controller.py old mode 100755 new mode 100644 index f5c52631..bb0bdb7c --- a/src/pds_doi_service/api/test/test_dois_controller.py +++ b/src/pds_doi_service/api/test/test_dois_controller.py @@ -699,7 +699,7 @@ def test_get_doi_from_id(self): query_string = [("identifier", "urn:nasa:pds:insight_cameras::1.1")] response = self.client.open( - "/PDS_APIs/pds_doi_api/0.2/doi".format(lidvid="urn:nasa:pds:insight_cameras::1.1"), + "/PDS_APIs/pds_doi_api/0.2/doi", method="GET", query_string=query_string, headers={"Referer": "http://localhost"}, diff --git a/src/pds_doi_service/api/util.py b/src/pds_doi_service/api/util.py old mode 100755 new mode 100644 diff --git a/src/pds_doi_service/core/actions/__init__.py b/src/pds_doi_service/core/actions/__init__.py index 6b3a3ed9..c3a044a3 100644 --- a/src/pds_doi_service/core/actions/__init__.py +++ b/src/pds_doi_service/core/actions/__init__.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21 by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21 by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -12,10 +12,10 @@ This package contains the implementations for the user-facing action classes used to interact with the PDS DOI service. """ -from pds_doi_service.core.actions.action import create_parser -from pds_doi_service.core.actions.action import DOICoreAction -from pds_doi_service.core.actions.check import DOICoreActionCheck -from pds_doi_service.core.actions.draft import DOICoreActionDraft -from pds_doi_service.core.actions.list import DOICoreActionList -from pds_doi_service.core.actions.release import DOICoreActionRelease -from pds_doi_service.core.actions.reserve import DOICoreActionReserve +from pds_doi_service.core.actions.action import create_parser # noqa: F401 +from pds_doi_service.core.actions.action import DOICoreAction # noqa: F401 +from pds_doi_service.core.actions.check import DOICoreActionCheck # noqa: F401 +from pds_doi_service.core.actions.draft import DOICoreActionDraft # noqa: F401 +from pds_doi_service.core.actions.list import DOICoreActionList # noqa: F401 +from pds_doi_service.core.actions.release import DOICoreActionRelease # noqa: F401 +from pds_doi_service.core.actions.reserve import DOICoreActionReserve # noqa: F401 diff --git a/src/pds_doi_service/core/actions/action.py b/src/pds_doi_service/core/actions/action.py index 1390d458..2ca8811b 100644 --- a/src/pds_doi_service/core/actions/action.py +++ b/src/pds_doi_service/core/actions/action.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21 by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21 by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -32,6 +32,7 @@ class DOICoreAction: _name = "unknown" _description = "no description" _order = 9999999 # used to sort actions in documentation + _run_arguments: tuple[str, ...] _run_arguments = () def __init__(self, db_name=None): diff --git a/src/pds_doi_service/core/actions/check.py b/src/pds_doi_service/core/actions/check.py index 59351323..2f12d748 100644 --- a/src/pds_doi_service/core/actions/check.py +++ b/src/pds_doi_service/core/actions/check.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -20,7 +20,7 @@ from email.mime.multipart import MIMEMultipart from os.path import exists -import pystache +import pystache # type: ignore from pds_doi_service.core.actions import DOICoreAction from pds_doi_service.core.actions.list import DOICoreActionList from pds_doi_service.core.entities.doi import DoiStatus @@ -414,7 +414,7 @@ def run(self, **kwargs): o_doi_list = self._list_obj.run(status=DoiStatus.Pending) pending_state_list = json.loads(o_doi_list) - logger.info(f"Found %d %s record(s) to check", len(pending_state_list), DoiStatus.Pending) + logger.info("Found %d %s record(s) to check" % (len(pending_state_list), DoiStatus.Pending)) if len(pending_state_list) > 0: for pending_record in pending_state_list: diff --git a/src/pds_doi_service/core/actions/draft.py b/src/pds_doi_service/core/actions/draft.py index 39c59a60..a7390749 100644 --- a/src/pds_doi_service/core/actions/draft.py +++ b/src/pds_doi_service/core/actions/draft.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/actions/list.py b/src/pds_doi_service/core/actions/list.py index 20765031..3557f294 100644 --- a/src/pds_doi_service/core/actions/list.py +++ b/src/pds_doi_service/core/actions/list.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/actions/release.py b/src/pds_doi_service/core/actions/release.py index 7af0c30b..0745049b 100644 --- a/src/pds_doi_service/core/actions/release.py +++ b/src/pds_doi_service/core/actions/release.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/actions/reserve.py b/src/pds_doi_service/core/actions/reserve.py index b1543683..64a9192f 100644 --- a/src/pds_doi_service/core/actions/reserve.py +++ b/src/pds_doi_service/core/actions/reserve.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/cmd/pds_doi_cmd.py b/src/pds_doi_service/core/cmd/pds_doi_cmd.py old mode 100755 new mode 100644 index 6fc75b11..0838c057 --- a/src/pds_doi_service/core/cmd/pds_doi_cmd.py +++ b/src/pds_doi_service/core/cmd/pds_doi_cmd.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/db/doi_database.py b/src/pds_doi_service/core/db/doi_database.py index 963f9c63..2523180a 100644 --- a/src/pds_doi_service/core/db/doi_database.py +++ b/src/pds_doi_service/core/db/doi_database.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -12,7 +12,6 @@ Contains classes and functions for interfacing with the local transaction database (SQLite3). """ -import datetime import sqlite3 from collections import OrderedDict from datetime import datetime diff --git a/src/pds_doi_service/core/entities/doi.py b/src/pds_doi_service/core/entities/doi.py index 769adf95..5d8e8ded 100644 --- a/src/pds_doi_service/core/entities/doi.py +++ b/src/pds_doi_service/core/entities/doi.py @@ -16,6 +16,8 @@ from datetime import datetime from enum import Enum from enum import unique +from typing import Any +from typing import Optional @unique @@ -102,24 +104,25 @@ class DoiEvent(str, Enum): class Doi: """The dataclass definition for a Doi object.""" + # 🤔 TODO: replace the ``Any`` with what should be in aggregate types: title: str publication_date: datetime product_type: ProductType product_type_specific: str related_identifier: str - identifiers: list = field(default_factory=list) - authors: list = None - keywords: set = field(default_factory=set) - editors: list = None - description: str = None - id: str = None - doi: str = None - site_url: str = None - publisher: str = None - contributor: str = None - status: DoiStatus = None - previous_status: DoiStatus = None - message: str = None - date_record_added: datetime = None - date_record_updated: datetime = None - event: DoiEvent = None + identifiers: list[Any] = field(default_factory=list) + authors: Optional[list[Any]] = None + keywords: set[Any] = field(default_factory=set) + editors: Optional[list[Any]] = None + description: Optional[str] = None + id: Optional[str] = None + doi: Optional[str] = None + site_url: Optional[str] = None + publisher: Optional[str] = None + contributor: Optional[str] = None + status: Optional[DoiStatus] = None + previous_status: Optional[DoiStatus] = None + message: Optional[str] = None + date_record_added: Optional[datetime] = None + date_record_updated: Optional[datetime] = None + event: Optional[DoiEvent] = None diff --git a/src/pds_doi_service/core/input/input_util.py b/src/pds_doi_service/core/input/input_util.py index 278076da..2b89c4b7 100644 --- a/src/pds_doi_service/core/input/input_util.py +++ b/src/pds_doi_service/core/input/input_util.py @@ -32,7 +32,7 @@ from pds_doi_service.core.outputs.service import SERVICE_TYPE_DATACITE from pds_doi_service.core.util.config_parser import DOIConfigUtil from pds_doi_service.core.util.general_util import get_logger -from xmlschema import XMLSchemaValidationError +from xmlschema import XMLSchemaValidationError # type: ignore # Get the common logger logger = get_logger(__name__) diff --git a/src/pds_doi_service/core/input/test/read_remote_bundle.py b/src/pds_doi_service/core/input/test/read_remote_bundle.py index d8c045c8..25b9d9f3 100644 --- a/src/pds_doi_service/core/input/test/read_remote_bundle.py +++ b/src/pds_doi_service/core/input/test/read_remote_bundle.py @@ -1,5 +1,8 @@ import logging import time +from urllib.request import urlopen + +import requests logging.basicConfig(level=logging.DEBUG) @@ -8,7 +11,6 @@ target_url = "https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_JSON_1D00.JSON" # fast method -from urllib.request import urlopen timer_start = time.time() logger.info(f"TIMER_START:urlopen {target_url}") @@ -22,7 +24,6 @@ # slow method logger.info("==============================") -import requests timer_start = time.time() session = requests.session() diff --git a/src/pds_doi_service/core/outputs/datacite/__init__.py b/src/pds_doi_service/core/outputs/datacite/__init__.py index d8233805..581875e5 100644 --- a/src/pds_doi_service/core/outputs/datacite/__init__.py +++ b/src/pds_doi_service/core/outputs/datacite/__init__.py @@ -6,7 +6,7 @@ This package contains the DataCite-specific implementations for the abstract classes of the outputs package. """ -from .datacite_record import DOIDataCiteRecord -from .datacite_validator import DOIDataCiteValidator -from .datacite_web_client import DOIDataCiteWebClient -from .datacite_web_parser import DOIDataCiteWebParser +from .datacite_record import DOIDataCiteRecord # noqa: F401 +from .datacite_validator import DOIDataCiteValidator # noqa: F401 +from .datacite_web_client import DOIDataCiteWebClient # noqa: F401 +from .datacite_web_parser import DOIDataCiteWebParser # noqa: F401 diff --git a/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py b/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py index 5c949f3c..83ad2936 100644 --- a/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py +++ b/src/pds_doi_service/core/outputs/datacite/datacite_web_parser.py @@ -165,7 +165,7 @@ def _parse_date_record_added(record): def _parse_date_record_updated(record): try: return isoparse(record["updated"]) - except (KeyError, ValueError) as err: + except (KeyError, ValueError): logger.warning('Could not parse optional field "date_record_updated"') @staticmethod diff --git a/src/pds_doi_service/core/outputs/doi_record.py b/src/pds_doi_service/core/outputs/doi_record.py index 804e6625..36e1246d 100644 --- a/src/pds_doi_service/core/outputs/doi_record.py +++ b/src/pds_doi_service/core/outputs/doi_record.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/outputs/doi_validator.py b/src/pds_doi_service/core/outputs/doi_validator.py index 259d69e0..9b3d82d1 100644 --- a/src/pds_doi_service/core/outputs/doi_validator.py +++ b/src/pds_doi_service/core/outputs/doi_validator.py @@ -13,6 +13,7 @@ DOI workflow. """ import re +from typing import Optional import requests from pds_doi_service.core.db.doi_database import DOIDataBase @@ -28,6 +29,7 @@ from pds_doi_service.core.util.config_parser import DOIConfigUtil from pds_doi_service.core.util.general_util import get_logger + # Get the common logger and set the level for this file. logger = get_logger(__name__) @@ -293,6 +295,7 @@ def _check_lidvid_field(self, doi: Doi): """ + vid: Optional[str] if "::" in doi.related_identifier: lid, vid = doi.related_identifier.split("::") else: @@ -350,7 +353,7 @@ def _check_field_workflow(self, doi: Doi): Check that there is not a record in the sqllite database with same identifier but a higher status than the current action (see workflow_order) """ - if doi.status.lower() not in self.m_workflow_order: + if doi.status is not None and doi.status.lower() not in self.m_workflow_order: msg = ( f"Unexpected DOI status of '{doi.status.lower()}' from label. " f"Valid values are " @@ -372,7 +375,12 @@ def _check_field_workflow(self, doi: Doi): # A status tuple of ('Pending',3) is higher than ('Draft',2) will # cause an error. - if self.m_workflow_order[prev_status.lower()] > self.m_workflow_order[doi.status.lower()]: + # + # 🤔 TODO: ``mypy`` has several complaints about this line: + # • doi.status is an optional (``None``) so calling ``lower`` on it could fail; there should be a check + # • The indexing on ``DoiStatus`` here is by ``str``, but is declared to be ``DoiStatus`` + # But the tests pass so I'm throwing caution to the wind. + if self.m_workflow_order[prev_status.lower()] > self.m_workflow_order[doi.status.lower()]: # type: ignore msg = ( f"There is a record for identifier {doi.related_identifier} " f"(DOI: {doi_str}) with status: '{prev_status.lower()}'.\n" diff --git a/src/pds_doi_service/core/outputs/osti/__init__.py b/src/pds_doi_service/core/outputs/osti/__init__.py index 45ee83ba..98fc015a 100644 --- a/src/pds_doi_service/core/outputs/osti/__init__.py +++ b/src/pds_doi_service/core/outputs/osti/__init__.py @@ -6,9 +6,9 @@ This package contains the OSTI-specific implementations for the abstract classes of the outputs package. """ -from .osti_record import DOIOstiRecord -from .osti_validator import DOIOstiValidator -from .osti_web_client import DOIOstiWebClient -from .osti_web_parser import DOIOstiJsonWebParser -from .osti_web_parser import DOIOstiWebParser -from .osti_web_parser import DOIOstiXmlWebParser +from .osti_record import DOIOstiRecord # noqa: F401 +from .osti_validator import DOIOstiValidator # noqa: F401 +from .osti_web_client import DOIOstiWebClient # noqa: F401 +from .osti_web_parser import DOIOstiJsonWebParser # noqa: F401 +from .osti_web_parser import DOIOstiWebParser # noqa: F401 +from .osti_web_parser import DOIOstiXmlWebParser # noqa: F401 diff --git a/src/pds_doi_service/core/outputs/osti/osti_record.py b/src/pds_doi_service/core/outputs/osti/osti_record.py index b703f9df..97e44efd 100644 --- a/src/pds_doi_service/core/outputs/osti/osti_record.py +++ b/src/pds_doi_service/core/outputs/osti/osti_record.py @@ -16,7 +16,7 @@ from datetime import datetime from os.path import exists -import pystache +import pystache # type: ignore from pds_doi_service.core.entities.doi import Doi from pds_doi_service.core.entities.doi import ProductType from pds_doi_service.core.outputs.doi_record import CONTENT_TYPE_JSON diff --git a/src/pds_doi_service/core/outputs/osti/osti_validator.py b/src/pds_doi_service/core/outputs/osti/osti_validator.py index dc99d19f..4c5955cb 100644 --- a/src/pds_doi_service/core/outputs/osti/osti_validator.py +++ b/src/pds_doi_service/core/outputs/osti/osti_validator.py @@ -15,15 +15,17 @@ from distutils.util import strtobool from os.path import exists -import xmlschema +import xmlschema # type: ignore from lxml import etree -from lxml import isoschematron +from lxml import isoschematron # type: ignore from pds_doi_service.core.entities.doi import DoiStatus from pds_doi_service.core.input.exceptions import InputFormatException from pds_doi_service.core.outputs.service_validator import DOIServiceValidator from pds_doi_service.core.util.general_util import get_logger from pkg_resources import resource_filename +# Note that ``lxml`` does have the ``isoschematron`` member, but the typing stub does not, so we ignore: + logger = get_logger(__name__) diff --git a/src/pds_doi_service/core/outputs/service.py b/src/pds_doi_service/core/outputs/service.py index 2b1795e2..e7d96c7b 100644 --- a/src/pds_doi_service/core/outputs/service.py +++ b/src/pds_doi_service/core/outputs/service.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/outputs/service_validator.py b/src/pds_doi_service/core/outputs/service_validator.py index ad4d599e..bfe6caa6 100644 --- a/src/pds_doi_service/core/outputs/service_validator.py +++ b/src/pds_doi_service/core/outputs/service_validator.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/outputs/transaction_builder.py b/src/pds_doi_service/core/outputs/transaction_builder.py index dda537be..b4260f76 100644 --- a/src/pds_doi_service/core/outputs/transaction_builder.py +++ b/src/pds_doi_service/core/outputs/transaction_builder.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/outputs/web_client.py b/src/pds_doi_service/core/outputs/web_client.py index db40097e..a0a3a275 100644 --- a/src/pds_doi_service/core/outputs/web_client.py +++ b/src/pds_doi_service/core/outputs/web_client.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -14,10 +14,13 @@ """ import json import pprint +from typing import Any +from typing import Optional import requests from pds_doi_service.core.input.exceptions import WebRequestException from pds_doi_service.core.outputs.doi_record import CONTENT_TYPE_XML +from pds_doi_service.core.outputs.web_parser import DOIWebParser from pds_doi_service.core.util.config_parser import DOIConfigUtil from requests.auth import HTTPBasicAuth @@ -33,9 +36,11 @@ class DOIWebClient: """Abstract base class for clients of an HTTP DOI service endpoint""" _config_util = DOIConfigUtil() + _service_name: Optional[str] _service_name = None + _web_parser: Optional[DOIWebParser] _web_parser = None - _content_type_map = {} + _content_type_map: dict[Any, Any] = {} # 🤔 TODO: replace Any with what we expect def _submit_content(self, payload, url, username, password, method=WEB_METHOD_POST, content_type=CONTENT_TYPE_XML): """ diff --git a/src/pds_doi_service/core/outputs/web_parser.py b/src/pds_doi_service/core/outputs/web_parser.py index 29340d7e..9dfa7251 100644 --- a/src/pds_doi_service/core/outputs/web_parser.py +++ b/src/pds_doi_service/core/outputs/web_parser.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. @@ -12,6 +12,9 @@ Contains the abstract base class for parsing DOI objects from label returned or provided to DOI service endpoints (OSTI, Datacite, etc...). """ +from typing import Any +from typing import List + from pds_doi_service.core.outputs.doi_record import CONTENT_TYPE_XML @@ -21,10 +24,10 @@ class DOIWebParser: to a DOI service endpoint. """ - _optional_fields = [] + _optional_fields: List[Any] = [] # 🤔 TODO: should be ``List[str]``? """The optional Doi field names parsed from labels.""" - _mandatory_fields = [] + _mandatory_fields: List[Any] = [] # 🤔 TODO: should be ``List[str]``? """The mandatory Doi field names parsed from labels.""" @staticmethod diff --git a/src/pds_doi_service/core/util/config_parser.py b/src/pds_doi_service/core/util/config_parser.py index fb0bbc75..69a642ea 100644 --- a/src/pds_doi_service/core/util/config_parser.py +++ b/src/pds_doi_service/core/util/config_parser.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/util/doi_xml_differ.py b/src/pds_doi_service/core/util/doi_xml_differ.py index b5ab8f89..f5ce3300 100644 --- a/src/pds_doi_service/core/util/doi_xml_differ.py +++ b/src/pds_doi_service/core/util/doi_xml_differ.py @@ -378,7 +378,6 @@ def _pre_condition_documents(historical_doc, new_doc): # Rebuilt the historical tree in the order of the 'identifier_value' field. historical_root = etree.Element("records") for key in sorted(historical_dict_list.keys()): - publication_date = historical_dict_list[key].xpath("publication_date")[0].text historical_root.append(historical_dict_list[key]) # Rebuilt the new tree in the order of the 'identifier_value' field. @@ -386,7 +385,6 @@ def _pre_condition_documents(historical_doc, new_doc): for key in sorted(new_dict_list.keys()): # It is possible that the historical doesn't have the record. Check before adding so both will have the same number of records. if key in identifier_list_from_historical: - publication_date = new_dict_list[key].xpath("publication_date")[0].text new_root.append(new_dict_list[key]) # Re-parse both documents now with the 'record' elements in the same order. @@ -415,19 +413,6 @@ def _get_indices_where_tag_occur(tag_name, my_parent_tag, indices_where_field_oc logger.debug(f"tag_name '{tag_name}' is not in indices_where_field_occur_dict:") return 0 - def _setup_where_field_occur_dict(): - # For fields that can have multiple occurences, a dictionary is necessary to - # remember where each field occur in the historical tree so it can be used to find the field in the new tree. - indices_where_field_occur_dict = { - "author_first_name": 0, - "author_last_name": 0, - "contributor_first_name": 0, - "contributor_last_name": 0, - "contributor_full_name": 0, - "contributor_contributor_type": 0, - } - return indices_where_field_occur_dict - def _differ_single_record( new_doc, historical_element, @@ -446,7 +431,6 @@ def _differ_single_record( # Loop through until cannot find any more elements. Travel all way to the leaves and then compare the fields. child_index = 0 - top_parent_name = historical_element.tag for child_level_1 in historical_element.iter(): # The element with the tag 'record' is not useful, so it is skipped. diff --git a/src/pds_doi_service/core/util/emailer.py b/src/pds_doi_service/core/util/emailer.py index 6580a6b5..6c003ccd 100644 --- a/src/pds_doi_service/core/util/emailer.py +++ b/src/pds_doi_service/core/util/emailer.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/util/general_util.py b/src/pds_doi_service/core/util/general_util.py index 29a2ebeb..5fc52e7d 100644 --- a/src/pds_doi_service/core/util/general_util.py +++ b/src/pds_doi_service/core/util/general_util.py @@ -1,5 +1,5 @@ # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/util/initialize_production_deployment.py b/src/pds_doi_service/core/util/initialize_production_deployment.py index 6aa04c28..5064e852 100644 --- a/src/pds_doi_service/core/util/initialize_production_deployment.py +++ b/src/pds_doi_service/core/util/initialize_production_deployment.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2020-21, by the California Institute of Technology. ALL RIGHTS +# Copyright 2021–21, by the California Institute of Technology. ALL RIGHTS # RESERVED. United States Government Sponsorship acknowledged. Any commercial # use must be negotiated with the Office of Technology Transfer at the # California Institute of Technology. diff --git a/src/pds_doi_service/core/util/keyword_tokenizer.py b/src/pds_doi_service/core/util/keyword_tokenizer.py index aeb2429c..97eee7bb 100644 --- a/src/pds_doi_service/core/util/keyword_tokenizer.py +++ b/src/pds_doi_service/core/util/keyword_tokenizer.py @@ -1,13 +1,13 @@ import re -import nltk +import nltk # type: ignore nltk.download("stopwords", quiet=True) -from nltk.corpus import stopwords +from nltk.corpus import stopwords # type: ignore # noqa: E402 nltk.download("wordnet", quiet=True) -from nltk.stem.wordnet import WordNetLemmatizer -from pds_doi_service.core.util.general_util import get_logger +from nltk.stem.wordnet import WordNetLemmatizer # type: ignore # noqa: E402 # @nutjob4life: 😩 +from pds_doi_service.core.util.general_util import get_logger # noqa: E402 logger = get_logger(__name__) @@ -34,7 +34,7 @@ def process_text(self, text): text = re.sub("</?.*?>", " <> ", text) # remove special characters - text = re.sub("(\|\\W)+", " ", text) + text = re.sub(r"(\|\\W)+", " ", text) # Convert to list from string text = text.split()