-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from ShawHahnLab/release-0.2.1
Release 0.2.1
- Loading branch information
Showing
23 changed files
with
656 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,13 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
^\.travis.yml$ | ||
.utils | ||
environment.yml | ||
install_linux_conda.sh | ||
install_linux.sh | ||
install_windows.cmd | ||
install_windows.R | ||
install_mac.command | ||
README.md | ||
GUIDE.Rmd | ||
GUIDE.pdf | ||
prep_release.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# Lint the package that contains this file's directory, minus some lint | ||
# categories that just annoy me. | ||
|
||
args <- commandArgs() | ||
f <- gsub("^--file=", "", args[grep("^--file=", args)]) | ||
f <- normalizePath(f) | ||
path <- dirname(dirname(f)) | ||
|
||
linters_no <- c("multiple_dots", # "Don't use dots in names" | ||
"camel_case", # "Don't capitalize stuff" | ||
"object_usage") # "I don't see that variable" | ||
linters_no <- paste0(linters_no, "_linter") | ||
linters <- lintr::default_linters[-match(linters_no, | ||
names(lintr::default_linters))] | ||
lintr::lint_package(path = path, linters = linters) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e | ||
|
||
VERSION=$1 | ||
|
||
chiimp_check='x<-devtools::check();quit(save="no",status=length(c(x$errors,x$warnings)))' | ||
|
||
# Update version in download link in README | ||
VER_MSG="The most recent released version is" | ||
TAG_URL="https\\://github.com/ShawHahnLab/chiimp/releases/tag" | ||
SED_README="s:$VER_MSG \\[[0-9.]+\\]\\($TAG_URL/[0-9.]+\\)\\.:$VER_MSG [$VERSION]($TAG_URL/$VERSION).:" | ||
sed -i -r "$SED_README" README.md | ||
|
||
# Update version in DESCRIPTION and NEWS.md | ||
sed -i "s/Version: .*$/Version: $VERSION/" DESCRIPTION | ||
sed -i "s/# chiimp dev/# chiimp $VERSION/" NEWS.md | ||
|
||
R --slave --vanilla -e "$chiimp_check" | ||
R --slave --vanilla -e "rmarkdown::render('GUIDE.Rmd', output_file = 'GUIDE.pdf', quiet = TRUE)" | ||
|
||
# Create bundled ZIP and TGZ versions without hidden top level files (such as | ||
# the git and travis stuff) and with the GUIDE.pdf. | ||
pushd .. | ||
zip -r chiimp-v${VERISON}.zip chiimp/* | ||
tar czvf chiimp-v${VERSION}.tgz chiimp/* | ||
popd | ||
|
||
# TODO show reminder of checks before tagging a release: | ||
# * full test on all three platforms | ||
# * make sure NEWS.md contains all updates under a heading matching this version | ||
# * make sure GUIDE.Rmd is up-to-date and the rendered GUIDE.pdf is correct |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: chiimp | ||
Title: Computational, High-throughput Individual Identification through Microsatellite Profiling | ||
Version: 0.2.0 | ||
Version: 0.2.1 | ||
Authors@R: person("Jesse", "Connell", email = "[email protected]", role = c("aut", "cre")) | ||
Description: An R package to analyze microsatellites in high-throughput sequencing datasets. | ||
Depends: R (>= 3.2.3) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
# Interpret genotyping results for samples with known identity. | ||
|
||
#' Associate known genotypes with samples | ||
#' | ||
#' Using the Name column of the given results summary data frame, pair each | ||
#' called genotype with the known alleles. A data frame with two columns, | ||
#' CorrectAllele1Seq and CorrectAllele2Seq, is returned. If matching entries are | ||
#' found in Allele1Seq and/or Allele2Seq the order will be preserved, and at | ||
#' this point the two allele entries should match up directly for genotypes that | ||
#' were called correctly. | ||
#' | ||
#' @param results_summary cross-sample summary data frame as produced by | ||
#' \code{\link{analyze_dataset}}. | ||
#' @param genotypes.known data frame of known genotypes that should be compared | ||
#' to the observed genotypes in the results, as loaded by | ||
#' \code{\link{load_genotypes}}. | ||
#' | ||
#' @return data frame with two columns for the two correct alleles, and rows | ||
#' matching the input summary table. | ||
#' | ||
#' @export | ||
match_known_genotypes <- function(results_summary, genotypes.known) { | ||
# match name/locus combos with genotypes | ||
id_tbl <- paste(results_summary$Name, results_summary$Locus) | ||
id_kg <- paste(genotypes.known$Name, genotypes.known$Locus) | ||
idx <- match(id_tbl, id_kg) | ||
# Build data frame of correct allele sequences | ||
result <- data.frame(CorrectAllele1Seq = genotypes.known[idx, "Allele1Seq"], | ||
CorrectAllele2Seq = genotypes.known[idx, "Allele2Seq"], | ||
stringsAsFactors = FALSE) | ||
# Ensure ordering within pairs matches samples, if possible. | ||
for (i in 1:nrow(result)) { | ||
a <- results_summary[i, c("Allele1Seq", "Allele2Seq")] | ||
kg <- result[i, ] | ||
idx <- match(a, kg) | ||
if (idx[1] %in% 2 || idx[2] %in% 1) | ||
result[i, ] <- rev(kg) | ||
} | ||
result | ||
} | ||
|
||
#' Categorize genotyping results | ||
#' | ||
#' For a given results summary data frame that has CorrectAllele1Seq and Correct | ||
#' Allele2Seq columns (such as produced by \code{\link{match_known_genotypes}}) | ||
#' added, create a factor labeling every row of the input data frame by its | ||
#' genotyping outcome. | ||
#' | ||
#' @details | ||
#' Levels in the returned factor, in order: | ||
#' | ||
#' * Correct: one/two alleles match. | ||
#' * Incorrect at least one allele does not match. | ||
#' * Blank: No alleles were called in the analysis even though known genotypes | ||
#' were supplied. | ||
#' * Dropped Allele: One called allele is correct for a heterozygous individual, | ||
#' but no second allele was called. | ||
#' | ||
#' Cases that should not occur, such as CorrectAllele1Seq and CorrectAllele2Seq | ||
#' both set to NA, map to NA in the returned factor. | ||
#' @md | ||
#' | ||
#' @param results_summary cross-sample summary data frame as produced by | ||
#' \code{\link{analyze_dataset}} with extra columns as produced by | ||
#' \code{\link{match_known_genotypes}}. | ||
#' | ||
#' @return factor defining genotyping result category for every row of the input | ||
#' data frame. | ||
#' | ||
#' @export | ||
categorize_genotype_results <- function(results_summary) { | ||
# Five possibilities for either NA/not NA plus outcome of non-NA pair | ||
# All five possibilities for a single allele check: | ||
# 0: Both non-NA, simple mismatch | ||
# 1: A not NA, C NA (no correct allele matched this one) | ||
# 2: A NA, C not NA (we missed a correct allele and left this blank) | ||
# 3: A NA, C NA (correctly did not report an allele) | ||
# 4: Both non-NA, match | ||
check_allele <- function(allele, ref) { | ||
a <- is.na(allele) * 2 + is.na(ref) # NA: 1, not NA: 0 | ||
a[a == 0 & allele == ref] <- 4 # special distinction for one case | ||
a | ||
} | ||
|
||
# Now, combine for both alleles to have all possible outcomes, and offset by | ||
# one to account for R's indexing. | ||
a1 <- check_allele(results_summary$Allele1Seq, | ||
results_summary$CorrectAllele1Seq) | ||
a2 <- check_allele(results_summary$Allele2Seq, | ||
results_summary$CorrectAllele2Seq) | ||
a <- a1 * 5 + a2 + 1 | ||
|
||
# Here's all the possible outcomes, categorized. Cases that should never come | ||
# up for correctly-labeled genotypes will evaluate to NA. | ||
lvls <- c( | ||
# A1 0: first allele simple mismatch. Whatever A2 is, this is Incorrect. | ||
"Incorrect", # both mismatch | ||
"Incorrect", # extra allele, mismatch | ||
"Incorrect", # drop | ||
"Incorrect", # correctly missing | ||
"Incorrect", # second correct | ||
# A1 1: first allele called, but no correct allele listed. Still Incorrect. | ||
"Incorrect", # simple mismatch | ||
NA, # second allele also not present?? weird case | ||
"Incorrect", # both mismatch | ||
NA, # no correct allele listed for second either?? weird case | ||
"Incorrect", # second is correct but first was wrong | ||
# A1 2: first allele incorrectly blank. | ||
"Incorrect", # simple mismatch | ||
"Incorrect", # wrong | ||
"Blank", # second allele also incorrectly blank | ||
"Incorrect", # though this *was* homozygous; we at least got that right. | ||
"Dropped Allele", # Got one right, but missed A1. | ||
# A1 3: first allele correctly blank (expecting true homozygote). | ||
"Incorrect", # simple mismatch | ||
NA, # but C2 also NA? weird case | ||
"Blank", # A2 also blank | ||
NA, # A2 NA but C2 also NA? weird case | ||
"Correct", # correct homozygote | ||
# A1 4: first allele correct. | ||
"Incorrect", # but second wrong. | ||
"Incorrect", # second wrongly given when should be blank. | ||
"Dropped Allele", # Got one right, but missed A2. | ||
"Correct", # correctly did not report a second allele (homozygote) | ||
"Correct" # correctly did report a second allele (heterozygote) | ||
) | ||
|
||
# Map the integers for each case to text categories and create factor. | ||
factor(lvls[a], levels = c("Correct", "Dropped Allele", "Blank", "Incorrect")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Install CHIIMP on Windows. | ||
|
||
# Find the path to the directory containing this script. We need this for | ||
# package testing and installation below. | ||
args <- commandArgs() | ||
f <- gsub("^--file=", "", args[grep("^--file=", args)]) | ||
f <- normalizePath(f) | ||
path <- dirname(f) | ||
|
||
UPROF <- Sys.getenv("USERPROFILE") | ||
|
||
# If no library paths are writeable, try creating a user library. | ||
if (! any(file.access(.libPaths(), 2) == 0)) { | ||
# This is the directory I see RStudio create automatically on first start, | ||
# and the command-line R also detects it. | ||
ver <- paste(version$major, sub("\\..*", "", version$minor), sep = ".") | ||
dp <- file.path(UPROF, "Documents", "R", "win-library", ver) | ||
dir.create(dp, recursive = TRUE) | ||
# On a second run through this will get picked up automatically, | ||
# but if we want it right now we have to add it to the list manually. | ||
.libPaths(dp) | ||
} | ||
|
||
cat("\n") | ||
cat("### Installing devtools\n") | ||
cat("\n") | ||
install.packages("devtools", repos = "https://cloud.r-project.org") | ||
|
||
cat("\n") | ||
cat("### Installing Bioconductor and MSA\n") | ||
cat("\n") | ||
source("https://bioconductor.org/biocLite.R") | ||
biocLite("msa") | ||
|
||
cat("\n") | ||
cat("### Installing dependencies\n") | ||
cat("\n") | ||
devtools::install_deps(path, dependencies = TRUE) | ||
|
||
cat("\n") | ||
cat("### Testing CHIIMP\n") | ||
cat("\n") | ||
status <- sum(as.data.frame(devtools::test(path))$failed) | ||
if (status == 1) { | ||
cat("\n") | ||
cat("\n") | ||
cat(" Warning: Tests indicated failures.\n") | ||
cat("\n") | ||
cat("\n") | ||
} | ||
|
||
cat("\n") | ||
cat("### Installing CHIIMP\n") | ||
cat("\n") | ||
devtools::install(path) | ||
|
||
shortcut_path <- file.path(UPROF, "Desktop", "CHIIMP.lnk") | ||
chiimp_path <- system.file("bin", "chiimp.cmd", package = "chiimp") | ||
# https://stackoverflow.com/a/30029955/6073858 | ||
args <- c(paste0("$s=(New-Object -COM WScript.Shell).CreateShortcut('", | ||
shortcut_path, | ||
"');"), | ||
paste0("$s.TargetPath='", chiimp_path, "';"), | ||
"$s.Save();") | ||
system2("powershell", args) |
Oops, something went wrong.