Merge pull request #76 from ShawHahnLab/release-0.4.0

Release 0.4.0
ShawHahnLab · Sep 7, 2022 · 229ffbd · 229ffbd
2 parents e6f6121 + 2fac6ab
commit 229ffbd
Show file tree

Hide file tree

Showing 57 changed files with 842 additions and 232 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,6 +1,6 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
-^\.travis.yml$
+.circleci
 .utils
 install_linux.sh
 install_windows.cmd

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -0,0 +1,83 @@
+# https://blog.jdblischak.com/posts/circleci-for-r-1/
+# https://blog.jdblischak.com/posts/circleci-for-r-2/
+#
+# Can't get the full package check working with rocker becuase of latex/pdf
+# issues.  This looks like the problem:
+#
+# https://gitlab.irstea.fr/in-wop/airGRiwrm/-/issues/53
+#
+# ...but if I switch to rocker/tidyverse it's then missing pdflatex, and if I
+# stick with rocker/verse, it still can't find the font.  For now I'm just
+# skipping that part of the package check with --no-manual.
+version: 2.1
+jobs:
+  build:
+    parameters:
+      rversion:
+        type: string
+    docker:
+      - image: rocker/verse:<<parameters.rversion>>
+    steps:
+      - checkout
+      - restore_cache:
+          keys:
+            - cache-{{ .Environment.CIRCLE_JOB }}-{{ checksum "DESCRIPTION" }}
+      - run:
+          name: Install package dependencies
+          command: R -e "devtools::install_deps(dep = TRUE)"
+      - save_cache:
+          key: cache-{{ .Environment.CIRCLE_JOB }}-{{ checksum "DESCRIPTION" }}
+          paths:
+            - "/usr/local/lib/R/site-library"
+      - run:
+          name: Check versions
+          command: |
+            which R
+            R --quiet -e 'installed.packages()[, "Version"]'
+            which pandoc
+            pandoc --version
+      - run:
+          name: Build package
+          command: R CMD build .
+      - run:
+          name: Check package
+          command: R CMD check --no-manual *tar.gz
+      - store_artifacts:
+          name: "Store Artifacts: 00check.log"
+          path: chiimp.Rcheck/00check.log
+      - store_artifacts:
+          name: "Store Artifacts: 00install.out"
+          path: chiimp.Rcheck/00install.out
+      - run:
+          name: "Run find_pandoc script"
+          command: Rscript exec/find_pandoc.R
+      - run:
+          name: "Run demo script"
+          command: bash exec/demo.sh $PWD/demo-files
+      - store_artifacts:
+          name: "Store Artifacts: demo: report"
+          path: demo-files/str-results/report.html
+      - store_artifacts:
+          name: "Store Artifacts: demo: summary"
+          path: demo-files/str-results/summary.csv
+      - run:
+          name: "Run demo script - empty case"
+          command: bash exec/demo_empty.sh $PWD/demo-empty-files
+      - store_artifacts:
+          name: "Store Artifacts: empty demo: report"
+          path: demo-empty-files/str-results/report.html
+      - store_artifacts:
+          name: "Store Artifacts: empty demo: summary"
+          path: demo-empty-files/str-results/summary.csv
+      - run:
+          name: "Run install test script"
+          command: bash tools/circleci_install_test.sh
+
+workflows:
+  version: 2
+  build_all:
+    jobs:
+      - build:
+          matrix:
+            parameters:
+              rversion: ["3.6.3", "4.2.1"]
diff --git a/.travis.yml b/.travis.yml
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,39 +1,39 @@
 Package: chiimp
 Title: Computational, High-throughput Individual Identification through Microsatellite Profiling
-Version: 0.3.1
+Version: 0.4.0
 Authors@R: person("Jesse", "Connell", email = "[email protected]", role = c("aut", "cre"))
 Description: An R package to analyze microsatellites in high-throughput sequencing datasets.
-Depends: R (>= 3.2.3)
+Depends: R (>= 3.6)
 License: file LICENSE
 Encoding: UTF-8
 LazyData: true
 Language: en-US
 biocViews:
 Imports:
-  argparser (>= 0.4),
-  Biostrings (>= 2.38.4),
+  graphics (>= 3.6),
+  grDevices (>= 3.6),
+  methods (>= 3.6),
+  parallel (>= 3.6),
+  stats (>= 3.6),
+  utils (>= 3.6),
+  argparser (>= 0.6),
   dnaplotr (>= 0.1),
   dnar (>= 0.1),
-  devtools (>= 1.13.5),
-  graphics (>= 3.2.3),
-  grDevices (>= 3.2.3),
-  kableExtra (>= 0.2.1.9000),
-  knitr (>= 1.16),
-  methods (>= 3.2.3),
-  msa (>= 1.2.1),
-  openssl (>= 0.9.6),
-  parallel (>= 3.2.3),
-  pheatmap (>= 1.0.8),
-  rmarkdown (>= 1.6),
-  stats (>= 3.2.3),
-  stringr (>= 1.2.0),
-  utils (>= 3.2.3),
-  viridis (>= 0.4.0),
-  yaml (>= 2.1.14)
+  devtools (>= 2.3),
+  Biostrings (>= 2.54),
+  kableExtra (>= 1.1),
+  knitr (>= 1.28),
+  msa (>= 1.18),
+  openssl (>= 1.4),
+  pheatmap (>= 1.0),
+  rmarkdown (>= 2.1),
+  stringr (>= 1.4),
+  viridis (>= 0.5),
+  yaml (>= 2.2)
 Remotes:
   github::sherrillmix/dnar,
   github::sherrillmix/dnaplotr
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.1
 Suggests:
   testthat,
   roxygen2
diff --git a/GUIDE.Rmd b/GUIDE.Rmd
@@ -4,11 +4,13 @@
 
 title: "CHIIMP User Guide"
 author: "Jesse Connell"
-date: "2019/07/10"
+date: "2022/04/25"
 output:
   pdf_document:
     toc: true
     toc_depth: 3
+urlcolor: blue
+linkcolor: blue
 ---
 
 ```{r setup, include=FALSE}
@@ -34,12 +36,12 @@ package documentation.
 
 ## Installation
 
-Most dependencies are provided by installation of [R] and [RStudio].  Once
-these are installed, follow the specific instructions below for your operating
-system.  In all three cases CHIIMP performs an anlysis when a configuration
-file is dragged and dropped onto the desktop icon; there is no interactive
-interface via the icon, though the R package can be used interactively.  See
-the Usage section for more information.
+First install [R] and [RStudio], which will supply most software dependencies
+for CHIIMP.  Once these are installed, follow the specific instructions below
+for your operating system.  In all three cases CHIIMP performs an analysis when
+a configuration file is dragged and dropped onto the desktop icon; there is no
+interactive interface via the icon, though the R package can be used
+interactively.  See the Usage section for more information.
 
 ### Windows
 
@@ -48,22 +50,25 @@ the package and R dependencies, and create a desktop shortcut.
 
 ### Mac OS
 
-On Mac OS, double-click the `install_mac.command` shell script to automatically
-install the package along with R dependencies and create a desktop alias.  If
-the install script won't open because of a security warning, you can
-right-click (control+click) and select "Open" in the menu that appears.
-Apple has specific instructions [here](https://support.apple.com/kb/PH25088?locale=en_US)
-about these security setings.
+On Mac OS, right-click (control+click) the `install_mac.command` shell script,
+select "Open," and also click "Open" in the window that appears to confirm that
+really do want to open it.  (Apple has specific instructions about these
+security precautions [here](https://support.apple.com/kb/PH25088?locale=en_US).)
+This will automatically install the package along with R dependencies and
+create a desktop alias.
 
 If a window appears recommending installation of the Mac OS command-line
 developer tools, go ahead and install them.  After that you'll probably need to
 re-run the CHIIMP installer again to finish the install.
 
 ### Linux
 
-On Linux, run the `install_linux.sh` shell script to automatically install the 
-package along with R dependencies.  An icon for the program is created at 
-`$HOME/Desktop/CHIIMP`.
+On Linux, run the `install_linux.sh` shell script to automatically install the
+package along with R dependencies.  An icon for the program is created at
+`$HOME/Desktop/CHIIMP.desktop`.  Specific usage of the desktop icon will depend
+on the desktop environment in use.  (The `CHIIMP.desktop` text file references
+the installed chiimp executable, and supplies the config file as a command-line
+argument when dragged and dropped onto the icon.)
 
 ## Input Data Organization
 
@@ -94,24 +99,24 @@ primer sequence is one of the filtering criteria during analysis.)
 The description of the samples to be analyzed can be provided in a spreadsheet,
 or automatically loaded from the data file names.  An example spreadsheet:
 
-| Filename      | Replicate   | Sample | Locus |
-| -------------:| -----------:| ------:| -----:|
-| 100-1-A.fastq |     1       | 100    | A     |
-| 100-2-A.fastq |     2       | 100    | A     |
-| 100-1-B.fastq |     1       | 100    | B     |
-| 100-2-B.fastq |     2       | 100    | B     |
-| 100-1-1.fastq |     1       | 100    | 1     |
-| 100-2-1.fastq |     2       | 100    | 1     |
-| 100-1-2.fastq |     1       | 100    | 2     |
-| 100-2-2.fastq |     2       | 100    | 2     |
-| 101-1-A.fastq |     1       | 101    | A     |
-| 101-2-A.fastq |     2       | 101    | A     |
-| 101-1-B.fastq |     1       | 101    | B     |
-| 101-2-B.fastq |     2       | 101    | B     |
-| 101-1-1.fastq |     1       | 101    | 1     |
-| 101-2-1.fastq |     2       | 101    | 1     |
-| 101-1-2.fastq |     1       | 101    | 2     |
-| 101-2-2.fastq |     2       | 101    | 2     |
+| `Filename`         | `Replicate` | `Sample` | `Locus` |
+|:------------------:|:-----------:|:--------:|:-------:|
+| `100-1-A.fastq.gz` |    `1`      |   `100`  |    `A`  |
+| `100-2-A.fastq.gz` |    `2`      |   `100`  |    `A`  |
+| `100-1-B.fastq.gz` |    `1`      |   `100`  |    `B`  |
+| `100-2-B.fastq.gz` |    `2`      |   `100`  |    `B`  |
+| `100-1-1.fastq.gz` |    `1`      |   `100`  |    `1`  |
+| `100-2-1.fastq.gz` |    `2`      |   `100`  |    `1`  |
+| `100-1-2.fastq.gz` |    `1`      |   `100`  |    `2`  |
+| `100-2-2.fastq.gz` |    `2`      |   `100`  |    `2`  |
+| `101-1-A.fastq.gz` |    `1`      |   `101`  |    `A`  |
+| `101-2-A.fastq.gz` |    `2`      |   `101`  |    `A`  |
+| `101-1-B.fastq.gz` |    `1`      |   `101`  |    `B`  |
+| `101-2-B.fastq.gz` |    `2`      |   `101`  |    `B`  |
+| `101-1-1.fastq.gz` |    `1`      |   `101`  |    `1`  |
+| `101-2-1.fastq.gz` |    `2`      |   `101`  |    `1`  |
+| `101-1-2.fastq.gz` |    `1`      |   `101`  |    `2`  |
+| `101-2-2.fastq.gz` |    `2`      |   `101`  |    `2`  |
 
 These columns are required for each entry:
 
@@ -136,13 +141,12 @@ Usage section for more information.
 The description of the loci should be given in a spreadsheet with loci on rows
 and attributes on columns.  For example:
 
-```{r, echo=FALSE}
-# Show the example locus attributes table
-locus_attrs <- load_locus_attrs("inst/example_locus_attrs.csv")
-locus_attrs$Primer <- paste0(substr(locus_attrs$Primer, 1, 12), "...")
-locus_attrs$ReversePrimer <- paste0(substr(locus_attrs$ReversePrimer, 1, 12), "...")
-knitr::kable(locus_attrs, row.names = FALSE)
-```
+| `Locus` | `LengthMin` | `LengthMax` | `LengthBuffer` | `Motif` |    `Primer`       |  `ReversePrimer` |
+|:-------:| -----------:| -----------:| --------------:|:-------:|:-----------------:|:----------------:|
+|   `A`   |       `131` |      `179`  |          `20`  |  `TAGA` | `TATCACTGGTGT...` | `CACAGTTGTGTG...`|
+|   `B`   |       `194` |      `235`  |          `20`  |  `TAGA` | `AGTCTCTCTTTC...` | `TAGGAGCCTGTG...`|
+|   `1`   |       `232` |      `270`  |          `20`  |  `TATC` | `ACAGTCAAGAAT...` | `CTGTGGCTCAAA...`|
+|   `2`   |       `218` |      `337`  |          `20`  |  `TCCA` | `TTGTCTCCCCAG...` | `TCTGTCATAAAC...`|
 
 These columns are required:
 
@@ -157,24 +161,25 @@ These columns are required:
  * Primer: The forward PCR primer used in preparing the sequencing library. 
  This is used as one of the checks for candidate allele sequences.
  * ReversePrimer: The reverse PCR primer used in preparing the sequencing
- library.  This is not currently used.
+ library.  This is not currently used unless `use_reverse_primers` is enabled
+ in the configuration.
 
 ### Known Individuals (Optional)
 
 If a spreadsheet of genotypes for known individuals is supplied, the analysis 
 can attempt to match samples with the known genotypes automatically.  For
 example:
 
-| Name          | Locus       | Allele1Seq     | Allele2Seq     |
-|:-------------:|:-----------:|:--------------:|:--------------:|
-| CH001         |     A       | ATTATCACTGG... | ATTATCACTGG... |
-| CH001         |     B       | TCAGTCTCTCT... |                |
-| CH001         |     1       | AGACAGTCAAG... | AGACAGTCAAG... |
-| CH001         |     2       | CTTTGTCTCCC... | CTTTGTCTCCC... |
-| CH002         |     A       | ATTATCACTGG... | ATTATCACTGG... |
-| CH002         |     B       | TCAGTCTCTCT... | TCAGTCTCTCT... |
-| CH002         |     1       | AGACAGTCAAG... |                |
-| CH002         |     2       | CTTTGTCTCCC... | CTTTGTCTCCC... |
+|   `Name`  | `Locus` |   `Allele1Seq`   |   `Allele2Seq`   |
+|:---------:|:-------:|:----------------:|:----------------:|
+|  `CH001`  |   `A`   | `ATTATCACTGG...` | `ATTATCACTGG...` |
+|  `CH001`  |   `B`   | `TCAGTCTCTCT...` |                  |
+|  `CH001`  |   `1`   | `AGACAGTCAAG...` | `AGACAGTCAAG...` |
+|  `CH001`  |   `2`   | `CTTTGTCTCCC...` | `CTTTGTCTCCC...` |
+|  `CH002`  |   `A`   | `ATTATCACTGG...` | `ATTATCACTGG...` |
+|  `CH002`  |   `B`   | `TCAGTCTCTCT...` | `TCAGTCTCTCT...` |
+|  `CH002`  |   `1`   | `AGACAGTCAAG...` |                  |
+|  `CH002`  |   `2`   | `CTTTGTCTCCC...` | `CTTTGTCTCCC...` |
 
 The order of the alleles given is not important, and homozygous individuals may 
 have Allele2Seq either left blank or set to a copy of Allele1Seq.  The sequences
@@ -186,14 +191,14 @@ used for the PCR primers described above.
 If a spreadsheet of allele names and sequences is supplied, the analysis 
 will use those names in summary tables in the output report.  For example:
 
-| Locus |  Name       | Seq            |
-|:-----:|:-----------:|:--------------:|
-| A     | 200-a       | ATTATCACTGG... |
-| A     | 180-a       | ATTATCACTGG... |
-| A     | 180-b       | ATTATCACTGG... |
-| B     | 300-a       | ATTATCACTGG... |
-| B     | 305-a       | ATTATCACTGG... |
-| B     | 290-a       | ATTATCACTGG... |
+| `Locus` |   `Name`    |      `Seq`       |
+|:-------:|:-----------:|:----------------:|
+|   `A`   |   `200-a`   | `ATTATCACTGG...` |
+|   `A`   |   `180-a`   | `ATTATCACTGG...` |
+|   `A`   |   `180-b`   | `ATTATCACTGG...` |
+|   `B`   |   `300-a`   | `ATTATCACTGG...` |
+|   `B`   |   `305-a`   | `ATTATCACTGG...` |
+|   `B`   |   `290-a`   | `ATTATCACTGG...` |
 
 The software will automatically create short allele names for any identified 
 allele not listed in the allele spreadsheet (or for all alleles if no