Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mazzalab authored Feb 1, 2025
2 parents 2b8ad23 + 406ace5 commit 779b84f
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 112 deletions.
1 change: 1 addition & 0 deletions modules/nf-core/last/mafconvert/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:

dependencies:
- bioconda::last=1608
- bioconda::samtools=1.21
43 changes: 30 additions & 13 deletions modules/nf-core/last/mafconvert/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@ process LAST_MAFCONVERT {

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data'
: 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}"
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/37/379183a78f725c3a8f2c4dda2f73ad452e57cc895239938fc97281d7bd74ffbf/data'
: 'community.wave.seqera.io/library/last_samtools:e2b51d2d9a1ce9fa'}"

input:
tuple val(meta), path(maf)
val(format)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gzi)

output:
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.bam"), optional:true, emit: bam
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.cram"), path(fasta), optional:true, emit: cram
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -31,7 +36,19 @@ process LAST_MAFCONVERT {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
set -o pipefail
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
case $format in
bam)
maf-convert $args -d sam $maf | samtools view -b -o ${prefix}.${format}
;;
cram)
# CRAM output is not supported if the genome is compressed with something else than bgzip
maf-convert $args -d sam $maf | samtools view -Ct $fasta -o ${prefix}.${format}
;;
*)
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
;;
esac
# maf-convert has no --version option but lastdb (part of the same package) has.
cat <<-END_VERSIONS > versions.yml
Expand Down
53 changes: 53 additions & 0 deletions modules/nf-core/last/mafconvert/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,35 @@ input:
type: string
description: Output format (one of axt, blast, blasttab, chain, gff, html, psl,
sam, or tab)
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fasta:
type: file
description: Genome file in FASTA format for CRAM conversion. If compressed it
must be done in BGZF format (like with the bgzip tool).
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- - meta3:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fai:
type: file
description: Genome index file needed for CRAM conversion.
pattern: "*.fai"
- - meta4:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- gzi:
type: file
description: Genome index file needed for CRAM conversion when the genome file
was compressed with the BGZF algorithm.
pattern: "*.gzi"
output:
- axt_gz:
- meta:
Expand All @@ -40,6 +69,16 @@ output:
type: file
description: Gzipped pairwise alignment in Axt (Blastz) format (optional)
pattern: "*.axt.gz"
- bam:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.bam":
type: file
description: Pairwise alignment in BAM format (optional)
pattern: "*.bam"
- blast_gz:
- meta:
type: map
Expand Down Expand Up @@ -70,6 +109,20 @@ output:
type: file
description: Gzipped pairwise alignment in UCSC chain format (optional)
pattern: "*.chain.gz"
- cram:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.cram":
type: file
description: Pairwise alignment in CRAM format (optional)
pattern: "*.cram"
- fasta:
type: file
description: Genome file to recover sequences from the CRAM file (optional)
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- gff_gz:
- meta:
type: map
Expand Down
74 changes: 72 additions & 2 deletions modules/nf-core/last/mafconvert/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ nextflow_process {
tag "last"
tag "last/mafconvert"

test("sarscov2 - bam") {
test("sarscov2 - psl") {

when {
process {
Expand All @@ -19,6 +19,9 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = [[],[]]
"""
}
}
Expand All @@ -32,7 +35,71 @@ nextflow_process {

}

test("sarscov2 - bam - stub") {
test("sarscov2 - bam") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'bam'
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = [[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.bam.collect { bam(it[1]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - cram") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'cram'
input[2] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[3] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
input[4] = [[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.cram.collect { cram(it[1], it[2]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - psl - stub") {

options "-stub"
when {
Expand All @@ -43,6 +110,9 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = [[],[]]
"""
}
}
Expand Down
112 changes: 90 additions & 22 deletions modules/nf-core/last/mafconvert/tests/main.nf.test.snap

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions modules/nf-core/learnmsa/align/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::learnmsa=2.0.1
- conda-forge::pigz=2.8
- conda-forge::coreutils=9.5
27 changes: 11 additions & 16 deletions modules/nf-core/learnmsa/align/main.nf
Original file line number Diff line number Diff line change
@@ -1,50 +1,45 @@
process LEARNMSA_ALIGN {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' :
'biocontainers/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' }"
container "registry.hub.docker.com/felbecker/learnmsa:2.0.9"

input:
tuple val(meta), path(fasta)
val(compress)

output:
tuple val(meta), path("*.aln{.gz,}"), emit: alignment
tuple val(meta), path("*.aln") , emit: alignment
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def write_output = compress ? ">(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "${prefix}.aln"
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("LearnMSA align module does not support Conda. Please use Docker / Singularity / Podman instead.")
}
"""
learnMSA \\
$args \\
-i <(unpigz -cdf $fasta) \\
-o $write_output
-i $fasta \\
-o "${prefix}.aln" \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g')
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.aln${compress ? '.gz' : ''}
touch ${prefix}.aln
cat <<-END_VERSIONS > versions.yml
"${task.process}":
learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g')
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
learnmsa: \$(if command -v learnMSA &>/dev/null; then learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g'; else echo "STUB_TEST_HARDCODED_VERSION"; fi)
END_VERSIONS
"""
}
15 changes: 5 additions & 10 deletions modules/nf-core/learnmsa/align/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,19 @@ input:
e.g. `[ id:'test']`
- fasta:
type: file
description: Input sequences in FASTA format. May be gz-compressed or uncompressed.
pattern: "*.{fa,fasta}{.gz,}"
- - compress:
type: boolean
description: Flag representing whether the output MSA should be compressed.
Set to true to enable/false to disable compression. Compression is done using
pigz, and is multithreaded.
description: Input sequences in FASTA format.
pattern: "*.{fa,fasta}"
output:
- alignment:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test']`
- "*.aln{.gz,}":
- "*.aln":
type: file
description: Alignment file, in FASTA format. May be gzipped or uncompressed.
pattern: "*.aln{.gz,}"
description: Alignment file, in FASTA format.
pattern: "*.aln"
- versions:
- versions.yml:
type: file
Expand Down
Loading

0 comments on commit 779b84f

Please sign in to comment.