Skip to content

Commit

Permalink
Merge branch 'release-0.2.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
ressy committed Dec 14, 2018
2 parents 9bb8c02 + d896926 commit 1d08387
Show file tree
Hide file tree
Showing 47 changed files with 944 additions and 203 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ script:
- R CMD build .
- R CMD check *tar.gz
- ./inst/bin/demo.sh
- ./inst/bin/demo_empty.sh
82 changes: 71 additions & 11 deletions .utils/lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,75 @@
# Lint the package that contains this file's directory, minus some lint
# categories that just annoy me.


# Configure Linters -------------------------------------------------------


# This could be done via a lintr config file if I took the time to figure out
# the syntax.

linters <- list(
# Linters to add to default list.
# T_and_F_symbol is quite new as of 2018-12-10.
yes = c(
"T_and_F_symbol" # "use TRUE and FALSE, not T and F"
),
# Linters to remove from default list.
# Some of these are from older versions.
no = c(
"multiple_dots", # "Don't use dots in names"
"camel_case", # "Don't capitalize stuff"
"object_name", # "Don't use dots in names, don't capitalize"
"object_usage" # "I don't see that variable"
)
)


# Detect Package Path -----------------------------------------------------


# If run as a script
args <- commandArgs()
f <- gsub("^--file=", "", args[grep("^--file=", args)])
f <- normalizePath(f)
path <- dirname(dirname(f))

linters_no <- c("multiple_dots", # "Don't use dots in names"
"camel_case", # "Don't capitalize stuff"
"object_usage") # "I don't see that variable"
linters_no <- paste0(linters_no, "_linter")
linters <- lintr::default_linters[-match(linters_no,
names(lintr::default_linters))]
lintr::lint_package(path = path, linters = linters)
path_script <- gsub("^--file=", "", args[grep("^--file=", args)])
path_script <- normalizePath(path_script)
path_pkg <- dirname(dirname(path_script))

# If run in Rstudio for example
if (! length(path_pkg)) {
# https://stackoverflow.com/a/16046056
if (length(sys.frames())) {
path_pkg <- dirname(dirname(sys.frame(1)$ofile))
} else {
# Last fallback, for example run in separate code chunks
path_pkg <- getwd()
}
}


# Run lintr ---------------------------------------------------------------

library(lintr)
linters$combined <- lintr::default_linters

# Remove linters
linters$no <- paste0(linters$no, "_linter")
idx <- match(linters$no, names(lintr::default_linters))
idx <- idx[! is.na(idx)]
linters$combined <- lintr::default_linters[-idx]

# Add linters
linters$yes <- paste0(linters$yes, "_linter")
linters$yes <- linters$yes[linters$yes %in% ls("package:lintr")]
.names <- names(linters$yes)
linters$yes <- get(linters$yes, "package:lintr")
names(linters$yes) <- .names
linters$combined <- c(linters$combined, linters$yes)

# Run
results <- lintr::lint_package(path = path_pkg, linters = linters$combined)
results
if (length(path_script) == 1) {
if (length(results) > 0) {
quit(status = 1)
}
}
17 changes: 13 additions & 4 deletions .utils/prep_release.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#!/usr/bin/env bash

# NOTE: assumes working directory is the project directory

set -e

VERSION=$1

chiimp_check='x<-devtools::check();quit(save="no",status=length(c(x$errors,x$warnings)))'

# Run lint script
echo "Running lint check"
./.utils/lint.R

# Update version in download link in README
VER_MSG="The most recent released version is"
TAG_URL="https\\://github.com/ShawHahnLab/chiimp/releases/tag"
Expand All @@ -26,7 +32,10 @@ zip -r chiimp-v${VERISON}.zip chiimp/*
tar czvf chiimp-v${VERSION}.tgz chiimp/*
popd

# TODO show reminder of checks before tagging a release:
# * full test on all three platforms
# * make sure NEWS.md contains all updates under a heading matching this version
# * make sure GUIDE.Rmd is up-to-date and the rendered GUIDE.pdf is correct
echo
echo "REMINDER BEFORE TAGGING RELEASE $VERSION:"
echo
echo " * Run full test on Mac OS, Windows, and Linux"
echo " * Update NEWS.md with all updates under a heading matching this version"
echo " * Make sure GUIDE.Rmd is up-to-date and rendered GUIDE.pdf is correct"
echo
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: chiimp
Title: Computational, High-throughput Individual Identification through Microsatellite Profiling
Version: 0.2.1
Version: 0.2.2
Authors@R: person("Jesse", "Connell", email = "[email protected]", role = c("aut", "cre"))
Description: An R package to analyze microsatellites in high-throughput sequencing datasets.
Depends: R (>= 3.2.3)
Expand Down
17 changes: 10 additions & 7 deletions GUIDE.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

title: "CHIIMP User Guide"
author: "Jesse Connell"
date: "2018/07/23"
date: "2018/11/07"
output:
pdf_document:
toc: true
Expand Down Expand Up @@ -42,18 +42,21 @@ are installed, follow the specific instructions below for your operating system.
On Windows, double-click the `install_windows.cmd` script. This will install
the package and R dependencies, and create a desktop shortcut.

### Mac OS

On Mac OS, double-click the `install_mac.command` shell script to automatically
install the package along with R dependencies and create a desktop alias. If
the install script won't open because of a security warning, you can
right-click (control+click) and select "Open" in the menu that appears.
Apple has specific instructions [here](https://support.apple.com/kb/PH25088?locale=en_US)
about these security setings.

### Linux

On Linux, run the `install_linux.sh` shell script to automatically install the
package along with R dependencies. An icon for the program is created at
`$HOME/Desktop/CHIIMP`.

### Mac OS

On Mac OS, run the `install_mac.command` shell script to automatically install
the package along with R dependencies. An icon for the program is created at
`$HOME/Desktop/CHIIMP`.

## Input Data Organization

The information CHIIMP uses during analysis is:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ export(summarize_sample)
export(summarize_sample_guided)
export(tabulate_allele_names)
export(tally_cts_per_locus)
export(test_data)
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# chiimp 0.2.2

* Fixed heatmap plotting via updated `plot_heatmap` for cases with blank
results and only one unique non-blank value ([#22]).
* Added check in `analyze_dataset` for locus name mismatches between dataset
table and locus attributes table ([#21]).
* Added check in `prepare_dataset` for missing data directory ([#20]).
* Added check in `prepare_dataset` for zero-detected-files case.
* Added check in `load_dataset` for missing data files.
* Added check in `full_analysis` to warn if any input data files are
completely empty.

[#22]: https://github.com/ShawHahnLab/chiimp/issues/22
[#21]: https://github.com/ShawHahnLab/chiimp/issues/21
[#20]: https://github.com/ShawHahnLab/chiimp/issues/20

# chiimp 0.2.1

* Minor improvements to release process ([#14]).
Expand Down
17 changes: 16 additions & 1 deletion R/analyze_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#' in the summary data frame will be sorted according to the ordering of loci in
#' \code{locus_attrs} and by the sample attributes. Processed files are stored
#' separately (as there may be multiple samples per file) and named by input
#' file path.
#' file path. An error is thrown if any locus entries in the given dataset are
#' not found in the locus attributes data frame.
#'
#' @param dataset data frame of sample details as produced by
#' \code{\link{prepare_dataset}}.
Expand Down Expand Up @@ -53,6 +54,12 @@ analyze_dataset <- function(dataset,
summary_function=summarize_sample,
known_alleles=NULL,
name_args=list()) {
if (! all(dataset$Locus %in% locus_attrs$Locus)) {
rogue_loci <- unique(dataset$Locus[! dataset$Locus %in% locus_attrs$Locus])
msg <- paste("ERROR: Locus names in dataset not in attributes table:",
paste(rogue_loci, collapse = ", "))
stop(msg)
}
if (ncores == 0) {
ncores <- max(1, as.integer(parallel::detectCores() / 2) - 1)
}
Expand Down Expand Up @@ -132,6 +139,14 @@ analyze_dataset <- function(dataset,
summary_function = summary_function,
analyzed_files = analyzed_files)
}

# Check if any of the raw data files had no reads to start with.
empties <- sum(sapply(analyzed_files, nrow) == 0)
if (empties) {
logmsg(paste("WARNING: Zero reads for", empties, "of",
length(analyzed_files), "data files"))
}

# Recombined results into a summary data frame and a list of full sample data.
results <- tidy_analyzed_dataset(dataset, raw.results)
results$files <- analyzed_files
Expand Down
6 changes: 2 additions & 4 deletions R/analyze_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,8 @@ analyze_sample_guided <- function(seq_data, sample.attrs, fraction.min) {

switch(length(expected_lengths) + 1,
# Zero expected lengths: analyze as usual
analyze_sample(seq_data, sample.attrs, fraction.min),
analyze_sample(seq_data, sample.attrs, fraction.min), {
# One expected length: may be homozygous or heterozygous.
{
# Find rows of interest, matching expected length.
idxl <- chunk$Length == expected_lengths
within(chunk, {
Expand All @@ -128,10 +127,9 @@ analyze_sample_guided <- function(seq_data, sample.attrs, fraction.min) {
# And that's it. We make no comment on the remaining entries and
# leave them as NA.
})
},
}, {
# Two expected lengths: definitely heterozygous. No need to consider
# fractions here.
{
within(chunk, {
Category <- factor(, levels = categories)
# Exclude ambiguous sequences first.
Expand Down
25 changes: 20 additions & 5 deletions R/analyze_seqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@
#' @return data frame of dereplicated sequences with added annotations.
#'
#' @export
#'
#' @examples
#' # Starting from non-locus-specific sequences,
#' # a locus attributes table, and requiring
#' # three side-by-side motif repeats to register
#' # as a motif match for a locus,
#' raw_seq_vector <- c(test_data$seqs1$A, test_data$seqs1$B)
#' locus_attrs <- test_data$locus_attrs
#' num_adjacent_repeats <- 3
#' # Convert the character vector of sequences
#' # into a data frame with one row per
#' # unique sequence.
#' seq_data <- analyze_seqs(raw_seq_vector,
#' locus_attrs,
#' num_adjacent_repeats)
analyze_seqs <- function(seqs, locus_attrs, nrepeats) {
# Dereplicate sequences
tbl <- table(seqs)
Expand All @@ -52,7 +67,7 @@ analyze_seqs <- function(seqs, locus_attrs, nrepeats) {
Count = count,
Length = nchar(seqs),
stringsAsFactors = FALSE)
data <- data[order(data$Count, decreasing = T), ]
data <- data[order(data$Count, decreasing = TRUE), ]
rownames(data) <- NULL
# Label rows with the apparent locus by checking primer sequences. Note that
# this uses the first matching locus for each row.
Expand All @@ -74,7 +89,7 @@ analyze_seqs <- function(seqs, locus_attrs, nrepeats) {
data$FractionOfTotal <- data$Count / sum(data$Count)
data$FractionOfLocus <- with(data, {
total_per_locus <- unlist(lapply(levels(MatchingLocus), function(loc)
sum(data[MatchingLocus == loc, "Count"], na.rm = T)))
sum(data[MatchingLocus == loc, "Count"], na.rm = TRUE)))
names(total_per_locus) <- levels(MatchingLocus)
Count / total_per_locus[MatchingLocus]
})
Expand All @@ -96,7 +111,7 @@ find_matching_primer <- function(sample.data, locus_attrs) {
matches <- do.call(cbind, lapply(rownames(locus_attrs), function(locus_name) {
primer <- as.character(locus_attrs[locus_name, "Primer"])
result <- grepl(primer, substr(sample.data$Seq, 1, nchar(primer) + 10))
c(locus_name)[as.numeric( (! result) + 1)]
c(locus_name)[as.numeric((! result) + 1)]
}))
# Collapse that set down to just the first match for each entry.
first.matches <- apply(matches, 1, function(m) m[match(TRUE, !is.na(m))])
Expand Down Expand Up @@ -172,7 +187,7 @@ find_stutter <- function(sample.data, locus_attrs,
# across rows for this locus,
for (locus_name in rownames(locus_attrs)) {
idxl_main <- sample.data$MatchingLocus == locus_name
idxl_main[is.na(idxl_main)] <- F
idxl_main[is.na(idxl_main)] <- FALSE
d <- sample.data[idxl_main, ]
motif.len <- nchar(as.character(locus_attrs[locus_name, "Motif"]))
# Any given index in d could be stutter.
Expand Down Expand Up @@ -221,7 +236,7 @@ find_artifact <- function(sample.data, locus_attrs,
# across rows for this locus,
for (locus_name in rownames(locus_attrs)) {
idxl_main <- sample.data$MatchingLocus == locus_name
idxl_main[is.na(idxl_main)] <- F
idxl_main[is.na(idxl_main)] <- FALSE
d <- sample.data[idxl_main, ]
# for each length it's the index of the first (highest-count) entry with
# that length.
Expand Down
Loading

0 comments on commit 1d08387

Please sign in to comment.