Skip to content

Commit

Permalink
Merge pull request #90 from ShawHahnLab/release-0.0.3
Browse files Browse the repository at this point in the history
Version 0.0.3
  • Loading branch information
ressy authored Aug 20, 2020
2 parents df6ca03 + f3d6bd9 commit 694243c
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ script:
# Ensure the installation process created a viable script for the entry
# point.
- umbra -h
# Check the version argument.
- umbra --version
# Try a dry run installation.
- sudo -E $(which umbra) --action install --dry-run
# Try a live installation.
Expand Down
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
# Changelog

## 0.0.3 - 2020-08-19

### Added

* `--version` argument to command-line interface ([#87])

### Changed

* Use Unix-style line endings in report CSV ([#86])
* Suppress duplicate log messages for skipped runs ([#85])

### Fixed

* Suppress excessive logging for Box file uploads ([#89])
* Specify minimum versions for dependencies during install ([#84])

[#87]: https://github.com/ShawHahnLab/umbra/pull/89
[#87]: https://github.com/ShawHahnLab/umbra/pull/87
[#86]: https://github.com/ShawHahnLab/umbra/pull/86
[#85]: https://github.com/ShawHahnLab/umbra/pull/85
[#84]: https://github.com/ShawHahnLab/umbra/pull/84

## 0.0.2 - 2020-08-04

### Added
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
long_description_content_type="text/markdown",
url="https://github.com/ShawHahnLab/umbra",
install_requires=[
"biopython",
"boxsdk",
"biopython>=1.72",
"boxsdk>=2.7.1",
"pyopenssl", # required for boxsdk but not always pulled in
"cutadapt",
"pyyaml"
"cutadapt>=1.18",
"pyyaml>=3.13"
],
packages=setuptools.find_packages(exclude=["test_*"]),
include_package_data=True,
Expand Down
17 changes: 17 additions & 0 deletions test_umbra/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import unittest
import logging
from tempfile import TemporaryDirectory
from distutils.dir_util import copy_tree
from pathlib import Path
Expand All @@ -27,6 +28,22 @@ def md5(text):
pass
return hashlib.md5(text).hexdigest()


class DumbLogHandler(logging.Handler):
"""A log handler that just stacks log records into a list."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.records = []

def emit(self, record):
self.records.append(record)

def has_message_text(self, txt):
"""Does some text appear in any of the records?"""
return True in [txt in rec.msg for rec in self.records]


class TestBase(unittest.TestCase):
"""Some setup/teardown shared with the real test classes."""

Expand Down
20 changes: 19 additions & 1 deletion test_umbra/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
from distutils.dir_util import copy_tree, remove_tree, mkpath
from distutils.file_util import copy_file
from tempfile import TemporaryDirectory
import umbra.processor
from umbra.processor import IlluminaProcessor
from umbra.project import ProjectData
from .test_common import TestBase, CONFIG, md5
from .test_common import TestBase, CONFIG, md5, DumbLogHandler

class TestIlluminaProcessor(TestBase):
"""Main tests for IlluminaProcessor."""
Expand Down Expand Up @@ -391,6 +392,10 @@ def set_up_vars(self):
self.expected["report_md5"] = md5("")

def test_refresh(self):
"""Test that an age setting prevents loading too new or old runs.
Also, once a skipped run is logged it should not be logged again.
"""
with TemporaryDirectory() as stash:
run_stash = str(Path(stash)/self.expected["run_id"])
copy_tree(str(self.path_run), run_stash)
Expand All @@ -399,8 +404,16 @@ def test_refresh(self):
self.assertEqual(self.proc.seqinfo["runs"], set())
proj_exp = {"active": set(), "inactive": set(), "completed": set()}
self.assertEqual(self.proc.seqinfo["projects"], proj_exp)
logger = umbra.processor.LOGGER
# Refresh loads a number of Runs
handler = DumbLogHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)
self.proc.refresh()
self.assertTrue(
handler.has_message_text("skipping run; timestamp"),
"Run skipped but not logged as expected")
handler.records = []
self.assertEqual(self.proc.seqinfo["runs"], set())
self.assertEqual(self.proc.seqinfo["projects"], proj_exp)
# Copy run directory back
Expand All @@ -409,8 +422,13 @@ def test_refresh(self):
self.assertEqual(self.proc.seqinfo["projects"], proj_exp)
self.proc.start()
self.proc.refresh(wait=True)
self.assertFalse(
handler.has_message_text("skipping run; timestamp"),
"Run already skipped but incorrectly logged again")
# Except we still haven't loaded any yet (too new)
self.assertEqual(self.proc.seqinfo["projects"], proj_exp)
logger.removeHandler(handler)
logger.setLevel(logging.NOTSET)


class TestIlluminaProcessorMinRunAgeZero(TestIlluminaProcessor):
Expand Down
7 changes: 6 additions & 1 deletion umbra/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .processor import IlluminaProcessor
from . import config
from . import install
from . import __version__ as VERSION

DOCS = {}
DOCS["description"] = "Process Illumina runs."
Expand Down Expand Up @@ -37,6 +38,8 @@
PARSER.add_argument("-a", "--action", default="report",
help="program action (default: %(default)s)",
choices=["process", "report", "install"])
PARSER.add_argument("-V", "--version", action="store_true",
help="Print installed version of umbra package")
PARSER.add_argument("-v", "--verbose", action="count", default=0,
help="Increment log verbosity")
PARSER.add_argument("-q", "--quiet", action="count", default=0,
Expand Down Expand Up @@ -84,7 +87,9 @@ def main(args_raw=None):
LOGGER.setLevel(newlevel)
_setup_log(args.verbose, args.quiet)
action_args = conf.get(args.action, {})
if args.action == "process":
if args.version:
print(VERSION or "Not installed")
elif args.action == "process":
proc = IlluminaProcessor(conf["paths"]["root"], conf)
proc.watch_and_process(**action_args)
elif args.action == "report":
Expand Down
5 changes: 5 additions & 0 deletions umbra/box_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@

LOGGER = logging.getLogger(__name__)

# Box logs the entire (!) uploaded file at level INFO.
# Let's ignore INFO and below.
__BOXLOGGER = logging.getLogger("boxsdk.network.default_network")
__BOXLOGGER.setLevel(logging.WARNING)

class BoxUploader:
"""A simple Box API interface to upload files to one directory.
Expand Down
5 changes: 5 additions & 0 deletions umbra/data/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ nthreads_per_project: 1
# Configure as a number of seconds from the present time. This will be
# separately applied to the Alignment directories within each run directory as
# well.
# NOTE: This is implemented using the timestamp on each run directory, so
# anything that updates the timestamp can delay processing, potentially
# indefinitely. (This was our experience with a MiniSeq that keeps touching
# its most recently created run directory until it moves onto the next run or
# is rebooted.)
min_age: null
# How about run directories older than a certain age?
max_age: null
Expand Down
15 changes: 11 additions & 4 deletions umbra/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ def report(self, out_file=sys.stdout, max_width=60):
length will be truncated and displayed with "..." Set to 0 for no
maximum."""
entries = self.create_report()
writer = csv.DictWriter(out_file, IlluminaProcessor.REPORT_FIELDS)
writer = csv.DictWriter(
out_file, lineterminator="\n", fieldnames=IlluminaProcessor.REPORT_FIELDS)
writer.writeheader()
for entry in entries:
entry2 = entry
Expand Down Expand Up @@ -322,7 +323,9 @@ def _init_seqinfo():
"inactive": set(),
"active": set(),
"completed": set()
}
},
# just a tracker to avoid re-logging skipped runs
"runs_skipped": set()
}
return seqinfo

Expand Down Expand Up @@ -363,10 +366,14 @@ def _run_setup(self, run_dir):
# Now, check each threshold if it was specified. Careful to check for
# None here because a literal zero should be taken as its own meaning.
if min_age is not None and (time_now - time_change < min_age):
LOGGER.info("skipping run; timestamp too new:.../%s", run_dir.name)
if run not in self.seqinfo["runs_skipped"]:
LOGGER.info("skipping run; timestamp too new:.../%s", run_dir.name)
self.seqinfo["runs_skipped"].add(run)
return run
if max_age is not None and (time_now - time_change > max_age):
LOGGER.info("skipping run; timestamp too old:.../%s", run_dir.name)
if run not in self.seqinfo["runs_skipped"]:
LOGGER.info("skipping run; timestamp too old:.../%s", run_dir.name)
self.seqinfo["runs_skipped"].add(run)
return run
# pylint: disable=broad-except
try:
Expand Down

0 comments on commit 694243c

Please sign in to comment.