Merge branch 'master' into master

nf-core · Feb 1, 2025 · 779b84f · 779b84f
2 parents 2b8ad23 + 406ace5
commit 779b84f
Show file tree

Hide file tree

Showing 11 changed files with 304 additions and 112 deletions.
diff --git a/modules/nf-core/last/mafconvert/environment.yml b/modules/nf-core/last/mafconvert/environment.yml
@@ -4,3 +4,4 @@ channels:
 
 dependencies:
   - bioconda::last=1608
+  - bioconda::samtools=1.21
diff --git a/modules/nf-core/last/mafconvert/main.nf b/modules/nf-core/last/mafconvert/main.nf
@@ -4,24 +4,29 @@ process LAST_MAFCONVERT {
 
     conda "${moduleDir}/environment.yml"
     container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
-        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data'
-        : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}"
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/37/379183a78f725c3a8f2c4dda2f73ad452e57cc895239938fc97281d7bd74ffbf/data'
+        : 'community.wave.seqera.io/library/last_samtools:e2b51d2d9a1ce9fa'}"
 
     input:
     tuple val(meta), path(maf)
     val(format)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+    tuple val(meta4), path(gzi)
 
     output:
-    tuple val(meta), path("*.axt.gz"),      optional:true, emit: axt_gz
-    tuple val(meta), path("*.blast.gz"),    optional:true, emit: blast_gz
-    tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
-    tuple val(meta), path("*.chain.gz"),    optional:true, emit: chain_gz
-    tuple val(meta), path("*.gff.gz"),      optional:true, emit: gff_gz
-    tuple val(meta), path("*.html.gz"),     optional:true, emit: html_gz
-    tuple val(meta), path("*.psl.gz"),      optional:true, emit: psl_gz
-    tuple val(meta), path("*.sam.gz"),      optional:true, emit: sam_gz
-    tuple val(meta), path("*.tab.gz"),      optional:true, emit: tab_gz
-    path "versions.yml"                                  , emit: versions
+    tuple val(meta), path("*.axt.gz"),             optional:true, emit: axt_gz
+    tuple val(meta), path("*.bam"),                optional:true, emit: bam
+    tuple val(meta), path("*.blast.gz"),           optional:true, emit: blast_gz
+    tuple val(meta), path("*.blasttab.gz"),        optional:true, emit: blasttab_gz
+    tuple val(meta), path("*.chain.gz"),           optional:true, emit: chain_gz
+    tuple val(meta), path("*.cram"), path(fasta),  optional:true, emit: cram
+    tuple val(meta), path("*.gff.gz"),             optional:true, emit: gff_gz
+    tuple val(meta), path("*.html.gz"),            optional:true, emit: html_gz
+    tuple val(meta), path("*.psl.gz"),             optional:true, emit: psl_gz
+    tuple val(meta), path("*.sam.gz"),             optional:true, emit: sam_gz
+    tuple val(meta), path("*.tab.gz"),             optional:true, emit: tab_gz
+    path "versions.yml"                                         , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -31,7 +36,19 @@ process LAST_MAFCONVERT {
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     set -o pipefail
-    maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
+
+    case $format in
+        bam)
+            maf-convert $args -d sam  $maf | samtools view -b -o ${prefix}.${format}
+            ;;
+        cram)
+            # CRAM output is not supported if the genome is compressed with something else than bgzip
+            maf-convert $args -d sam  $maf | samtools view -Ct $fasta -o ${prefix}.${format}
+            ;;
+        *)
+            maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
+            ;;
+    esac
 
     # maf-convert has no --version option but lastdb (part of the same package) has.
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/nf-core/last/mafconvert/meta.yml b/modules/nf-core/last/mafconvert/meta.yml
@@ -29,6 +29,35 @@ input:
         type: string
         description: Output format (one of axt, blast, blasttab, chain, gff, html, psl,
           sam, or tab)
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+    - fasta:
+        type: file
+        description: Genome file in FASTA format for CRAM conversion.  If compressed it
+          must be done in BGZF format (like with the bgzip tool).
+        pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+    - fai:
+        type: file
+        description: Genome index file needed for CRAM conversion.
+        pattern: "*.fai"
+  - - meta4:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+    - gzi:
+        type: file
+        description: Genome index file needed for CRAM conversion when the genome file
+          was compressed with the BGZF algorithm.
+        pattern: "*.gzi"
 output:
   - axt_gz:
       - meta:
@@ -40,6 +69,16 @@ output:
           type: file
           description: Gzipped pairwise alignment in Axt (Blastz) format (optional)
           pattern: "*.axt.gz"
+  - bam:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.bam":
+          type: file
+          description: Pairwise alignment in BAM format (optional)
+          pattern: "*.bam"
   - blast_gz:
       - meta:
           type: map
@@ -70,6 +109,20 @@ output:
           type: file
           description: Gzipped pairwise alignment in UCSC chain format (optional)
           pattern: "*.chain.gz"
+  - cram:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.cram":
+          type: file
+          description: Pairwise alignment in CRAM format (optional)
+          pattern: "*.cram"
+      - fasta:
+          type: file
+          description: Genome file to recover sequences from the CRAM file (optional)
+          pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
   - gff_gz:
       - meta:
           type: map

diff --git a/modules/nf-core/last/mafconvert/tests/main.nf.test b/modules/nf-core/last/mafconvert/tests/main.nf.test
@@ -9,7 +9,7 @@ nextflow_process {
     tag "last"
     tag "last/mafconvert"
 
-    test("sarscov2 - bam") {
+    test("sarscov2 - psl") {
 
         when {
             process {
@@ -19,6 +19,9 @@ nextflow_process {
                     file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
                 ]
                 input[1] = 'psl'
+                input[2] = [[],[]]
+                input[3] = [[],[]]
+                input[4] = [[],[]]
                 """
             }
         }
@@ -32,7 +35,71 @@ nextflow_process {
 
     }
 
-    test("sarscov2 - bam - stub") {
+    test("sarscov2 - bam") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'contigs.genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                ]
+                input[1] = 'bam'
+                input[2] = [[],[]]
+                input[3] = [[],[]]
+                input[4] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.bam.collect { bam(it[1]).getSamLines() },
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - cram") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'contigs.genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                ]
+                input[1] = 'cram'
+                input[2] = [
+                    [ id:'contigs.genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta',     checkIfExists: true)
+                ]
+                input[3] = [
+                    [ id:'contigs.genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[4] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.cram.collect { cram(it[1], it[2]).getSamLines() },
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - psl - stub") {
 
         options "-stub"
         when {
@@ -43,6 +110,9 @@ nextflow_process {
                     file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
                 ]
                 input[1] = 'psl'
+                input[2] = [[],[]]
+                input[3] = [[],[]]
+                input[4] = [[],[]]
                 """
             }
         }

diff --git a/modules/nf-core/last/mafconvert/tests/main.nf.test.snap b/modules/nf-core/last/mafconvert/tests/main.nf.test.snap
diff --git a/modules/nf-core/learnmsa/align/environment.yml b/modules/nf-core/learnmsa/align/environment.yml
@@ -2,5 +2,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - bioconda::learnmsa=2.0.1
-  - conda-forge::pigz=2.8
+  - conda-forge::coreutils=9.5
diff --git a/modules/nf-core/learnmsa/align/main.nf b/modules/nf-core/learnmsa/align/main.nf
@@ -1,50 +1,45 @@
 process LEARNMSA_ALIGN {
     tag "$meta.id"
     label 'process_medium'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' :
-        'biocontainers/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' }"
+    container "registry.hub.docker.com/felbecker/learnmsa:2.0.9"
 
     input:
     tuple val(meta), path(fasta)
-    val(compress)
 
     output:
-    tuple val(meta), path("*.aln{.gz,}"), emit: alignment
+    tuple val(meta), path("*.aln")      , emit: alignment
     path "versions.yml"                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    def write_output = compress ? ">(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "${prefix}.aln"
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("LearnMSA align module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
     """
     learnMSA \\
-        $args \\
-        -i <(unpigz -cdf $fasta) \\
-        -o $write_output
+        -i $fasta \\
+        -o "${prefix}.aln" \\
+        $args
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g')
-        pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
     END_VERSIONS
     """
 
     stub:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}.aln${compress ? '.gz' : ''}
+    touch ${prefix}.aln
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g')
-        pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
+        learnmsa: \$(if command -v learnMSA &>/dev/null; then learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g'; else echo "STUB_TEST_HARDCODED_VERSION"; fi)
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/learnmsa/align/meta.yml b/modules/nf-core/learnmsa/align/meta.yml
@@ -21,24 +21,19 @@ input:
           e.g. `[ id:'test']`
     - fasta:
         type: file
-        description: Input sequences in FASTA format. May be gz-compressed or uncompressed.
-        pattern: "*.{fa,fasta}{.gz,}"
-  - - compress:
-        type: boolean
-        description: Flag representing whether the output MSA should be compressed.
-          Set to true to enable/false to disable compression. Compression is done using
-          pigz, and is multithreaded.
+        description: Input sequences in FASTA format.
+        pattern: "*.{fa,fasta}"
 output:
   - alignment:
       - meta:
           type: map
           description: |
             Groovy Map containing sample information
             e.g. `[ id:'test']`
-      - "*.aln{.gz,}":
+      - "*.aln":
           type: file
-          description: Alignment file, in FASTA format. May be gzipped or uncompressed.
-          pattern: "*.aln{.gz,}"
+          description: Alignment file, in FASTA format.
+          pattern: "*.aln"
   - versions:
       - versions.yml:
           type: file
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,3 +4,4 @@ channels:

		dependencies:
		- bioconda::last=1608
		- bioconda::samtools=1.21