Skip to content

Commit

Permalink
Merge pull request #6 from Small-Bodies-Node/atlas
Browse files Browse the repository at this point in the history
Add support for ATLAS and re-organize code.
  • Loading branch information
mkelley authored Aug 21, 2024
2 parents 3f5bc51 + 7f310a3 commit 3fba9b7
Show file tree
Hide file tree
Showing 31 changed files with 696 additions and 410 deletions.
2 changes: 1 addition & 1 deletion _activate
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env false

if [ ! -f $PWD/.env ]; then
echo -e "No .env file found. Generate with '_sbnsis env' then edit."
echo -e "No .env file found. Generate with 'sbnsis env' then edit."
return 1
fi
source .env
Expand Down
196 changes: 196 additions & 0 deletions _add_atlas
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/env python3
"""
Identify new ATLAS data and add to the SBN SIS database.
"""

import os
import sys
import shlex
import logging
import sqlite3
import argparse
from glob import glob
import logging.handlers
from packaging.version import Version

from astropy.time import Time
import astropy.units as u
import pds4_tools

from sbn_survey_image_service.data.add import add_label
from sbn_survey_image_service.services.database_provider import data_provider_session


class LabelError(Exception):
pass


def get_logger():
return logging.getLogger("SBNSIS/Add ATLAS")


def setup_logger(args):
logger = get_logger()

if len(logger.handlers) > 0:
# already set up
return logger

if not os.path.exists(os.path.dirname(args.log)):
os.makedirs(os.path.dirname(args.log), exist_ok=True)

logger.setLevel(logging.DEBUG)

formatter = logging.Formatter("%(levelname)s:%(name)s:%(asctime)s: %(message)s")

handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.DEBUG if args.verbose else logging.ERROR)
handler.setFormatter(formatter)
logger.addHandler(handler)

handler = logging.FileHandler(args.log)
handler.setLevel(logging.DEBUG if args.verbose else logging.INFO)
handler.setFormatter(formatter)
logger.addHandler(handler)

logger.info("%s", " ".join([shlex.quote(s) for s in sys.argv]))

return logger


def collection_version(collection) -> Version:
"""Get the collection version."""
is_collection = (
collection.label.find("Identification_Area/product_class").text
== "Product_Collection"
)
vid = collection.label.find("Identification_Area/version_id")
if not is_collection or vid is None:
raise LabelError("This does not appear to be a valid PDS4 label.")
return Version(vid.text)


def get_lidvid(filename):
"""Return the LIDVID and data file name."""
product = pds4_tools.read(filename, quiet=True, lazy_load=True)
lid = product.label.find("Identification_Area/logical_identifier").text
vid = product.label.find("Identification_Area/version_id").text
return "::".join((lid, vid))


def get_image_labels(collection, data_directory) -> Version:
"""Get the file inventory of image files to ingest.
The label file names for all LIDVIDs ending with ".fits" in the collection
inventory will be returned.
Candidate labels are collected from xml files within `directory`.
"""

logger = get_logger()
files = {}
count = 0
for fn in glob(f"{data_directory}/*xml"):
if not fn.endswith(".fits.xml"):
continue
files[get_lidvid(fn)] = fn
count += 1
if (count % 100) == 0:
logger.debug("%d files read", count)
logger.debug("%d files read", count)

image_files = []
for lidvid in collection[0].data["LIDVID_LID"]:
lid = lidvid.split("::")[0]
if not lid.endswith(".fits"):
continue
if lidvid not in files:
raise LabelError(f"{lidvid} not found in {data_directory}")
image_files.append(files[lidvid])
return image_files


parser = argparse.ArgumentParser()
parser.add_argument(
"database", type=os.path.normpath, help="ATLAS-PDS processing database"
)
mutex = parser.add_mutually_exclusive_group()
mutex.add_argument(
"--since-date", type=Time, help="harvest metadata validated since this date"
)
mutex.add_argument(
"--since",
type=int,
help="harvest metadata validated in the past SINCE hours (default: 24)",
)
parser.add_argument(
"--log", default="./logging/add-atlas.log", help="log messages to this file"
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="log debugging messages"
)
args = parser.parse_args()

logger = setup_logger(args)

# setup database
try:
db = sqlite3.connect(f"file:{args.database}?mode=ro", uri=True)
db.row_factory = sqlite3.Row
except Exception as exc:
logger.error("Could not connect to database %s", args.database)
raise exc

logger.info("Connected to database %s", args.database)

if args.since_date:
date = args.since_date
else:
date = Time.now() - args.since * u.hr
logger.info("Checking for collections validated since %s", date.iso)

# check for new collections
cursor = db.execute(
"SELECT * FROM nn WHERE current_status = 'validated' AND recorded_at > ?",
(date.unix,),
)
results = cursor.fetchall()

if len(results) == 0:
logger.info("No new data collections found.")
else:
with data_provider_session() as session:
for row in results:
collections = [
pds4_tools.read(fn, quiet=True)
for fn in glob(f"/n/{row['location']}/collection_{row['nn']}*.xml")
]
versions = [collection_version(label) for label in collections]
latest = collections[versions.index(max(versions))]
lid = latest.label.find("Identification_Area/logical_identifier").text
vid = latest.label.find("Identification_Area/version_id").text
logger.info("Found collection %s::%s", lid, vid)

data_directory = f"/n/{row['location']}/data"
logger.debug(
"Inspecting directory %s for image products",
data_directory,
)
files = get_image_labels(latest, data_directory)
logger.info("%d image products to add", len(files))

count = 0
errored = 0
for label in files:
try:
count += add_label(label, session)
except Exception as exc:
errored += 1
logger.info(
"%d files added, %d files already in the database, %d files errored.",
count,
len(files) - count - errored,
errored,
)
logger.info("Finished.")
2 changes: 1 addition & 1 deletion _develop_apis
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#! /bin/bash
_sbnsis start --dev
sbnsis start --dev
2 changes: 1 addition & 1 deletion _initial_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ if [ ! -f $PWD/.env ]; then
To create a .env file:
source .venv/bin/activate
_sbnsis env
sbnsis env
Then edit .env
${reset_color}"""
Expand Down
3 changes: 0 additions & 3 deletions _sbnsis

This file was deleted.

38 changes: 16 additions & 22 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,24 @@
name = "sbn-survey-image-service"
description = "Serves images and cutouts via REST API."
readme = "readme.md"
authors = [
{ name = "Michael S. P. Kelley", email = "[email protected]" }
]
authors = [{ name = "Michael S. P. Kelley", email = "[email protected]" }]
license = { text = "BSD 3-Clause License" }
dynamic = ["version"]
requires-python = ">= 3.10"

dependencies = [
"Flask>=3.0",
"Flask-Cors>=4.0",
"gunicorn>=21",
"connexion>=3.0",
"swagger-ui-bundle>1.0",
"astropy>=6.0",
"connexion[flask,swagger-ui,uvicorn]~=3.0",
"gunicorn~=21.2",
"pds4_tools==1.3",
"SQLAlchemy>=2.0",
"python-dotenv>1.0",
"pytest-remotedata>=0.4",
"python-dotenv~=1.0",
"SQLAlchemy>=2.0",
]
dynamic = ["version"]

[project.optional-dependencies]
recommended = ["psycopg2-binary>=2.8"]
dev = ["autopep8", "mypy", "pycodestyle", "pytest>=7.0", "pytest-cov>=3.0"]

[project.urls]
homepage = "https://github.com/Small-Bodies-Node/sbn-survey-image-service"
Expand All @@ -28,20 +29,13 @@ requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
build-backend = 'setuptools.build_meta'

[tool.setuptools_scm]
write_to = "sbn_survey_image_service/_version.py"

[tool.setuptools]
zip-safe = false

[tool.setuptools.packages.find]

[project.optional-dependencies]
recommended = [
"psycopg2-binary>=2.8",
]
dev = [
"autopep8",
"mypy",
"pycodestyle",
"pytest>=7.0",
"pytest-cov>=3.0",
]
[project.scripts]
sbnsis = "sbn_survey_image_service.scripts.sbnsis:__main__"
sbnsis-add = "sbn_survey_image_service.data.add:__main__"
18 changes: 9 additions & 9 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ This repo has code for:
- Running the API
- Testing

Most day-to-day tasks can be accomplished with the `_sbnsis` command.
Most day-to-day tasks can be accomplished with the `sbnsis` command.

The following steps are needed to set up the code base:

Expand All @@ -55,7 +55,7 @@ The following steps are needed to set up the code base:
- Create and activate a python virtual environment.
- To use a specific Python interpreter, set the PYTHON environment variable: `PYTHON=/path/to/python3 bash _install_setup.sh`
- Install dependencies, including `fitscut`, to the virtual env.
- Create a new environment variable file and edit to suit your needs: `_sbnsis env`.
- Create a new environment variable file and edit to suit your needs: `sbnsis env`.
- Optionally test your set up:
- Be aware that the testing suite will use the database parameters specified in the `.env` file.
- The database user must have write permissions for testing.
Expand All @@ -69,25 +69,25 @@ The following steps are needed to set up the code base:

### Adding archival data

The `sbn_survey_image_service.data.add` sub-module is used to add image metadata to the database. It scans PDS3 or PDS4 labels, and saves to the database data product metadata and URLs to the label and image data. The sub-module may be run as a command-line script `python3 -m sbn_survey_image_service.data.add`. The script will automatically create the database in case it does not exist. For example, to search a NEAT survey directory for PDS4 image labels and data, and to form URLs with which the data may be retrieved:
The `sbn_survey_image_service.data.add` sub-module is used to add image metadata to the database. It scans PDS3 or PDS4 labels, and saves to the database data product metadata and URLs to the label and image data. The sub-module provides a command-line script `sbnsis-add`. The script will automatically create the database in case it does not exist. For example, to search a NEAT survey directory for PDS4 image labels and data, and to form URLs with which the data may be retrieved:

```
python3 -m sbn_survey_image_service.data.add -r \
sbnsis-add -r \
/path/to/gbo.ast.neat.survey/data_geodss/g19960417/obsdata
```

The previous example is for a survey accessible via the local file system. As an alternative, data may be served to the image service via HTTP(S). In this case, the `add` script must still be run on locally accessible labels, but an appropriate URL may be formed using the `--base-url` and `--strip-leading` parameters:
The previous example is for a survey accessible via the local file system. As an alternative, data may be served to the image service via HTTP(S). In this case, the `sbnsis-add` script must still be run on locally accessible labels, but an appropriate URL may be formed using the `--base-url` and `--strip-leading` parameters:

```
python3 -m sbn_survey_image_service.data.add -r \
sbnsis-add -r \
/path/to/gbo.ast.neat.survey/data_geodss/g19960417/obsdata \
--base-url=https://sbnarchive.psi.edu/pds4/surveys \
--strip-leading=/path/to/
```

For a summary of command-line parameters, use the `--help` option.

Due to survey-to-survey label differences, it is unlikely that the script will work with a previously untested data source. Edit the appropriate functions in `sbn_survey_image_service/data/add.py`, either `pds3_image` or `pds4_image`. For example, the NEAT survey PDS4 data set v1.0 does not have pixel scale in the label, so we have hard coded it into the `pds4_image` function.
Due to survey-to-survey label differences, it is unlikely that the script will work with a previously untested data source. Edit the appropriate functions in `sbn_survey_image_service/data/add.py`, e.g., `pds4_image()`. For example, the NEAT survey PDS4 data set v1.0 does not have pixel scale in the label, so we have hard coded it into the `pds4_image` function.

It is assumed that survey images are FITS-compatible with a World Coordinate System defined for a standard sky reference frame (ICRS). The cutout service uses the FITS header, not the PDS labels, to define the sub-frame. This is a limitation from using `fitscut`.

Expand All @@ -97,11 +97,11 @@ Whether running in development or deployment modes, the Swagger documentation is

### Development

If you have `nodemon` globally installed, then you can develop your api code and have it automatically update on changes by running `_sbnsis start --dev_`. Otherwise, just run `python -m sbn_survey_image_service.api.app`.
If you have `nodemon` globally installed, then you can develop your api code and have it automatically update on changes by running `sbnsis start --dev_`. Otherwise, just run `python -m sbn_survey_image_service.app`.

### Deployment

The `_sbnsis` takes the arguments `start|stop|status|restart` to launch the app as a background process with the gunicorn WSGI server for production serving. The number of workers is controlled with the env variable `LIVE_GUNICORN_INSTANCES`. If you have trouble getting gunicorn to work, running in non-daemon mode may help with debugging: `_sbnsis start --no-daemon`.
The `sbnsis` takes the arguments `start|stop|status|restart` to launch the app as a background process with the gunicorn WSGI server for production serving. The number of workers is controlled with the env variable `LIVE_GUNICORN_INSTANCES`. If you have trouble getting gunicorn to work, running in non-daemon mode may help with debugging: `sbnsis start --no-daemon`.

It is recommended that you make the gunicorn-powered server accesible to the outside world by proxy-passing requests through an https-enabled web server like apache.

Expand Down
4 changes: 2 additions & 2 deletions sbn_survey_image_service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from importlib.metadata import version as _version, PackageNotFoundError

# make cache directory set umask
from . import exceptions
from . import env
from .config import exceptions
from .config import env
from . import services
from . import models

Expand Down
16 changes: 16 additions & 0 deletions sbn_survey_image_service/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# file generated by setuptools_scm
# don't change, don't track in version control
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple, Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object

version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE

__version__ = version = '0.2.0.dev12+gf2ddedd.d20240821'
__version_tuple__ = version_tuple = (0, 2, 0, 'dev12', 'gf2ddedd.d20240821')
Loading

0 comments on commit 3fba9b7

Please sign in to comment.