diff --git a/assets/schema_input.json b/assets/schema_fasta.json similarity index 57% rename from assets/schema_input.json rename to assets/schema_fasta.json index fcac185..aac3706 100644 --- a/assets/schema_input.json +++ b/assets/schema_fasta.json @@ -7,21 +7,16 @@ "items": { "type": "object", "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" - }, "fasta": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.f(a|asta|na|as)\\.gz$", - "errorMessage": "FastA file must be provided, cannot contain spaces and must have extension '.fa.gz', '.fasta.gz', '.fna.gz', or '.fas.gz'", + "pattern": ".*", + "errorMessage": "Path to a FASTA file, cannot contain spaces", "exists": true } + }, "required": [ - "sample", "fasta" ] } diff --git a/assets/schema_genbank.json b/assets/schema_genbank.json new file mode 100644 index 0000000..aac3706 --- /dev/null +++ b/assets/schema_genbank.json @@ -0,0 +1,23 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/socialgene/sgnf/master/assets/schema_input.json", + "title": "socialgene/sgnf pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "fasta": { + "type": "string", + "format": "file-path", + "pattern": ".*", + "errorMessage": "Path to a FASTA file, cannot contain spaces", + "exists": true + } + + }, + "required": [ + "fasta" + ] + } +} diff --git a/conf/base.config b/conf/base.config index 07d1d92..cc370a0 100644 --- a/conf/base.config +++ b/conf/base.config @@ -60,9 +60,6 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } withName:PROCESS_GENBANK_FILES { cpus = { check_max (1 * task.attempt, 'cpus' ) } diff --git a/conf/examples/input_examples/by_taxa/actinomycetota.config b/conf/examples/input_examples/by_taxa/actinomycetota.config index 66dfc0e..8c953c7 100644 --- a/conf/examples/input_examples/by_taxa/actinomycetota.config +++ b/conf/examples/input_examples/by_taxa/actinomycetota.config @@ -89,7 +89,7 @@ process { } withLabel:process_high_memory { memory = { check_max( 400.GB * task.attempt, 'memory' ) } - } + } withName:ANTISMASH { cpus = 2 memory = { check_max (3.GB * task.attempt, 'memory' ) } @@ -98,6 +98,6 @@ process { } withName:'MMSEQS2_CLUSTER'{ // https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster - ext.args2 = '-c 0.7 --cov-mode 0' + ext.args = '-c 0.7 --cov-mode 0' } } diff --git a/conf/examples/input_examples/input_examples.config b/conf/examples/input_examples/input_examples.config index 8eab01d..bc33304 100644 --- a/conf/examples/input_examples/input_examples.config +++ b/conf/examples/input_examples/input_examples.config @@ -61,7 +61,6 @@ process { } withName:'MMSEQS2_CLUSTER'{ // https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster - ext.args = '--single-step-clustering 1' - ext.args2 = '--min-seq-id 0.5 -c 0.7 --cov-mode 0' + ext.args = '--single-step-clustering 1 --min-seq-id 0.5 -c 0.7 --cov-mode 0' } } diff --git a/conf/examples/input_examples/protein_databases/swissprot.config b/conf/examples/input_examples/protein_databases/swissprot.config index 058c9e8..a4ff69c 100644 --- a/conf/examples/input_examples/protein_databases/swissprot.config +++ b/conf/examples/input_examples/protein_databases/swissprot.config @@ -20,7 +20,6 @@ params { // Use the downloaded BGC0000001 as input local_faa = "/tmp/uni/seqs.fasta" - crabhash_path = '/tmp/github/kwan_lab/crabhash/target/release' /* //////////////////////// diff --git a/conf/modules.config b/conf/modules.config index a98f599..d737e34 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,13 +12,6 @@ process { - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.tracedir}" }, - mode: 'copy', - pattern: '*_versions.yml' - ] - } withName:'HMMER_HMMSEARCH'{ ext.args = "" @@ -31,7 +24,7 @@ process { withName:'MMSEQS2_CLUSTER'{ // https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster - ext.args2 = '-c 0.7 --cov-mode 0' + ext.args = '-c 0.7 --cov-mode 0' } withName:'MMSEQS2_CREATEDB'{ @@ -71,4 +64,7 @@ process { ] } + + + } diff --git a/conf/modules2.config b/conf/modules2.config index 25d3e1d..e659993 100644 --- a/conf/modules2.config +++ b/conf/modules2.config @@ -15,7 +15,7 @@ process { withName:'DIAMOND_BLASTP'{ publishDir = [ path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } @@ -28,12 +28,12 @@ process { publishDir = [ [ path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*mmseqs2_results_cluster.tsv.gz' ], [ path: { "${params.outdir_per_run}/mmseqs_databases" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: 'mmseqs_*' ], @@ -43,7 +43,7 @@ process { withName:'MMSEQS2_CREATEDB'{ publishDir = [ path: {"${params.outdir_per_run}/mmseqs_databases"}, - mode: 'copy', + mode: params.publish_dir_mode, ] } @@ -51,7 +51,7 @@ process { withName: 'NEO4J_HEADERS|TAXDUMP_PROCESS|HMM_TSV_PARSE' { publishDir = [ path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } @@ -59,27 +59,27 @@ process { publishDir = [ [ path: { "${params.outdir_neo4j}/import/genomic_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*locus_to_protein.gz' ], [ path: { "${params.outdir_neo4j}/import/genomic_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*assembly_to_locus.gz' ], [ path: { "${params.outdir_neo4j}/import/genomic_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*assembly_to_taxid.gz' ], [ path: { "${params.outdir_neo4j}/import/genomic_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*loci.gz' ], [ path: { "${params.outdir_neo4j}/import/genomic_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*assemblies.gz' ] ] @@ -89,17 +89,17 @@ process { publishDir = [ [ path: { "${params.outdir_neo4j}/import/protein_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*protein_info.gz' ], [ path: { "${params.outdir_neo4j}/import/protein_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*protein_ids.gz' ], [ path: { "${params.outdir_neo4j}/import/protein_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*protein_to_go.gz' ] @@ -109,7 +109,7 @@ process { withName: 'NEO4J_HEADERS|TAXDUMP_PROCESS|HMM_TSV_PARSE' { publishDir = [ path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } @@ -118,17 +118,17 @@ process { [ [ path: { "${params.outdir_neo4j}/import/hmm_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*hmminfo' ], [ path: { "${params.outdir_neo4j}/import/hmm_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*sg_hmm_nodes' ], [ path: { "${params.outdir_per_run}/hmm_cache" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: 'socialgene_nr_hmms_file_*' ] ] @@ -138,30 +138,20 @@ process { withName:'TIGRFAM_ROLES|TIGRFAM_TO_GO|TIGRFAM_TO_ROLE'{ publishDir = [ path: { "${params.outdir_neo4j}/import/tigrfam_info" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } withName:'PARAMETER_EXPORT_FOR_NEO4J'{ publishDir = [ path: { "${params.outdir_neo4j}/import/parameters" }, - mode: 'copy', - ] - } - - withName: 'CRABHASH' { - publishDir = [ - path: {"${params.outdir_neo4j}/import/protein_info"}, - mode: 'copy', - overwrite: false, - pattern: "*.protein_info.gz" - + mode: params.publish_dir_mode, ] } withName: 'DOWNLOAD_CHEMBL_DATA' { publishDir = [ path: {"${params.outdir_neo4j}/import/chembl"}, - mode: 'copy', + mode: params.publish_dir_mode, overwrite: false ] } @@ -175,7 +165,7 @@ process { withName: 'DOWNLOAD_GOTERMS' { publishDir = [ path: { "${params.outdir_neo4j}/import/goterms" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } @@ -205,21 +195,21 @@ process { withName: 'DEDUPLICATE_AND_INDEX_FASTA' { publishDir = [ path: { "${params.outdir_per_run}/nonredundant_fasta" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } withName: 'NEO4J_ADMIN_IMPORT' { publishDir = [ path: {"${params.outdir_neo4j}"}, - mode: 'copy', + mode: params.publish_dir_mode, ] } withName: 'NEO4J_ADMIN_IMPORT_DRYRUN' { publishDir = [ path: {"${params.outdir_neo4j}"}, - mode: 'copy', + mode: params.publish_dir_mode, ] } @@ -240,16 +230,11 @@ process { withName:'MERGE_PARSED_DOMTBLOUT'{ publishDir = [ path: { "${params.outdir_neo4j}/import/parsed_domtblout" }, - mode: 'copy', + mode: params.publish_dir_mode, ] } - withName:'MULTIQC'{ - publishDir = [ - path: { "${params.tracedir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, - mode: 'copy', - ] - } + } diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 01b8653..0000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,352 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") - def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 8d030f4..0000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index b644bb5..0000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,63 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the socialgene/sgnf pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 631b5f5..ed01134 100644 --- a/main.nf +++ b/main.nf @@ -12,34 +12,13 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} -profile docker,ultraquickstart" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +include { SOCIALGENE } from './workflows/socialgene' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_socialgene_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_socialgene_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -47,13 +26,25 @@ WorkflowMain.initialise(workflow, params, log) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SOCIALGENE } from './workflows/socialgene' // // WORKFLOW: Run main socialgene/sgnf analysis pipeline // workflow SOCIALGENE_SGNF { - SOCIALGENE () + take: + ch_gbk + ch_fasta + ch_fna_files + + main: + SOCIALGENE( + ch_gbk, + ch_fasta, + ch_fna_files + ) + + emit: + multiqc_report = SOCIALGENE.out.multiqc_report // channel: /path/to/multiqc_report.html } /* @@ -67,7 +58,47 @@ workflow SOCIALGENE_SGNF { // See: https://github.com/nf-core/rnaseq/issues/619 // workflow { - SOCIALGENE_SGNF () + + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + SOCIALGENE_SGNF ( + PIPELINE_INITIALISATION.out.genbank_files, + PIPELINE_INITIALISATION.out.fasta_files, + PIPELINE_INITIALISATION.out.fna_files, + + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + SOCIALGENE_SGNF.out.multiqc_report + ) + + + + } /* diff --git a/modules.json b/modules.json index 1e18d01..c51ae6c 100644 --- a/modules.json +++ b/modules.json @@ -5,9 +5,9 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "mmseqs/createindex": { + "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "prokka": { @@ -18,7 +18,23 @@ } }, "subworkflows": { - "nf-core": {} + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/diamond/blastp/main.nf b/modules/local/diamond/blastp/main.nf index 4ab8d9e..2d98743 100644 --- a/modules/local/diamond/blastp/main.nf +++ b/modules/local/diamond/blastp/main.nf @@ -16,6 +16,7 @@ process DIAMOND_BLASTP { output: path('*.blast6.gz') , emit: blastout path "versions.yml" , emit: versions + path "diamond.log" , emit: log val output_args , emit: args @@ -39,6 +40,7 @@ process DIAMOND_BLASTP { $args $args2 \\ --compress 1 \\ --no-self-hits \\ + --log \\ --out blastp_all_vs_all.blast6.txt.gz md5_as_filename.sh "blastp_all_vs_all.blast6.txt.gz" "blast6.gz" diff --git a/modules/local/download_hmm_database.nf b/modules/local/download_hmm_database.nf index e84375f..b23745d 100644 --- a/modules/local/download_hmm_database.nf +++ b/modules/local/download_hmm_database.nf @@ -23,9 +23,9 @@ process DOWNLOAD_HMM_DATABASE { when: task.ext.when == null || task.ext.when - script: + shell: """ - hmm_download_${database}.sh $version + hmm_download_${database}.sh '$version' # convert hmm models to HMMER version 3 hmmconvert_loop.sh diff --git a/modules/local/htcondor_prep.nf b/modules/local/htcondor_prep.nf index 8d56ef0..150205a 100644 --- a/modules/local/htcondor_prep.nf +++ b/modules/local/htcondor_prep.nf @@ -21,7 +21,7 @@ process HTCONDOR_PREP { path "sample_matrix.csv" , emit: sample_matrix path "hmmsearch.sh" , emit: hmmsearch path "submit_server_finish.sh" , emit: submit_server_finish - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/mmseqs2_cluster.nf b/modules/local/mmseqs2_cluster.nf index adc9b31..97db53c 100644 --- a/modules/local/mmseqs2_cluster.nf +++ b/modules/local/mmseqs2_cluster.nf @@ -16,7 +16,7 @@ process MMSEQS2_CLUSTER { output: path '*.mmseqs2_results_cluster.tsv.gz' , emit: mmseqs_clustered_db_tsv path 'mmseqs_*' , emit: db - val output_args , emit: args + val args , emit: args path "versions.yml" , emit: versions @@ -25,8 +25,6 @@ process MMSEQS2_CLUSTER { script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - output_args = args + ' ' + args2 """ # have to modify fasta until mmseqs is fixed: # https://github.com/soedinglab/MMseqs2/issues/557 @@ -40,7 +38,7 @@ process MMSEQS2_CLUSTER { # scale input eg 90 to 0.90 transformed_id=\$(bc <<< "scale=1;(\$i/100)") mkdir mmseqs_\${i} clu_\${i} - mmseqs cluster \$temp clu_\${i}/clu_\${i} tmp --threads ${task.cpus} --min-seq-id 0\${transformed_id} $args --remove-tmp-files --compressed 1 + mmseqs cluster \$temp clu_\${i}/clu_\${i} tmp --threads ${task.cpus} --min-seq-id 0\${transformed_id} ${args} --remove-tmp-files --compressed 1 rm -r tmp mmseqs createsubdb --subdb-mode 0 clu_\${i}/clu_\${i} \$temp mmseqs_\$i/mmseqs_\$i mmseqs createtsv \$temp \$temp clu_\$i/clu_\$i mmseqs2_results_cluster_\$i.tsv --threads ${task.cpus} diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000..ca39fb6 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/local/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 79% rename from modules/local/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf index c406cd9..9ba8d5e 100644 --- a/modules/local/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'ewels/multiqc:v1.14' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -18,19 +18,19 @@ process MULTIQC { path "*_plots" , optional:true, emit: plots path "versions.yml" , emit: versions - when: - task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +41,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/local/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 92% rename from modules/local/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index f93b5ee..45a9bc3 100644 --- a/modules/local/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..f1c4242 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..bfebd80 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 0000000..bea6c0d --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index 394fe8c..123d861 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ manifest { description = """Scalable genome mining with SocialGene knowledge graphs""" mainScript = 'main.nf' defaultBranch = 'main' - nextflowVersion = '!>=22.10.1' + nextflowVersion = '!>=24.0.0' version = '1.1.1' // x-release-please-version doi = '' } @@ -23,8 +23,7 @@ manifest { // Global default params, used in configs params { - input = "not-used-but-needed-in-lint" - fasta = "not-used-but-needed-in-lint" + input = null // Boilerplate options email = null @@ -59,6 +58,9 @@ params { local_genbank = null local_faa = null local_fna = null + fasta_csv = null + genbank_csv = null + fna_csv = null ncbi_datasets_command = null ncbi_genome_download_command = null ncbi_datasets_file = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 8ecd522..45cdf14 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,45 @@ "description": "Scalable genome mining with SocialGene knowledge graphs", "type": "object", "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [], + "properties": { + "genbank_csv": { + "type": "string", + "mimetype": "text/csv", + "format": "file-path-pattern", + "exists": true, + "description": "Additional input CSV samplesheet containing information about pre-computed assemblies. When set, both read pre-processing and assembly are skipped and the pipeline begins at the binning stage.", + "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have at minimum three columns and the following header: `id,group,assembler,fasta` (group is only required when `--coassemble_group). Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", + "fa_icon": "fas fa-file-csv" + }, + "fasta_csv": { + "type": "string", + "mimetype": "text/csv", + "format": "file-path-pattern", + "exists": true, + "schema": "assets/schema_fasta.json", + "pattern": ".*", + "description": "Additional input CSV samplesheet containing information about pre-computed assemblies. When set, both read pre-processing and assembly are skipped and the pipeline begins at the binning stage.", + "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have at minimum three columns and the following header: `id,group,assembler,fasta` (group is only required when `--coassemble_group). Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", + "fa_icon": "fas fa-file-csv" + }, + "fna_csv": { + "type": "string", + "mimetype": "text/csv", + "format": "file-path-pattern", + "exists": true, + "schema": "assets/schema_assembly_input.json", + "pattern": ".*", + "description": "Additional input CSV samplesheet containing information about pre-computed assemblies. When set, both read pre-processing and assembly are skipped and the pipeline begins at the binning stage.", + "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have at minimum three columns and the following header: `id,group,assembler,fasta` (group is only required when `--coassemble_group). Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", + "fa_icon": "fas fa-file-csv" + } + }}, "input_genomes": { "title": "Input Genomes", "type": "object", @@ -70,7 +109,8 @@ "outdir": { "type": "string", "description": "If set, and others not, all other outdirs within this directory", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "socialgene_results" }, "outdir_download_cache": { "type": "string", @@ -291,7 +331,7 @@ }, "HMMSEARCH_E": { "type": "number", - "default": 10, + "default": 100, "description": "see pg 74-77: http://eddylab.org/software/hmmer3/3.1b2/Userguide.pdf", "hidden": true }, @@ -303,13 +343,13 @@ }, "HMMSEARCH_INCE": { "type": "number", - "default": 0.1, + "default": 0.001, "description": "see pg 74-77: http://eddylab.org/software/hmmer3/3.1b2/Userguide.pdf", "hidden": true }, "HMMSEARCH_INCDOME": { "type": "number", - "default": 0.01, + "default": 0.001, "description": "see pg 74-77: http://eddylab.org/software/hmmer3/3.1b2/Userguide.pdf", "hidden": true }, @@ -347,7 +387,7 @@ "properties": { "amrfinder_version": { "type": "string", - "default": "2021-03-01.1", + "default": "2024-05-02.2", "hidden": true, "description": "Version of amrfinder to download/use" }, @@ -365,25 +405,25 @@ }, "tigrfam_version": { "type": "string", - "default": "15.0", + "default": "15", "hidden": true, "description": "Version of tigrfam to download/use" }, "pfam_version": { "type": "string", - "default": "35.0", + "default": "37.0", "description": "PFAM version. Ffor version numbers see: http://ftp.ebi.ac.uk/pub/databases/Pfam/releases", "hidden": true }, "antismash_hmms_git_sha": { "type": "string", - "default": "e2d777c6cd035e6bf20f7eec924a350b00b84c7b", + "default": "8fd065a63e99ee628b536cc19fa66d5942b784ea", "description": "Controls which version of antismash the antismash HMMS are pulled from", "hidden": true }, "chembl_version": { "type": "string", - "description": "Set (e.g. 31 for chembl version 31) to incorporate chembl into the database", + "description": "Set (e.g. '31' for chembl version 31) to incorporate chembl into the database", "hidden": true, "default": "31" } @@ -551,15 +591,8 @@ "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", "hidden": true }, - "fasta": { - "type": "string", - "default": "not-used-but-needed-in-lint", - "description": "not used, required by nf-core linter", - "hidden": true - }, "input": { "type": "string", - "default": "not-used-but-needed-in-lint", "description": "not used, required by nf-core linter", "hidden": true }, @@ -674,6 +707,9 @@ } }, "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, { "$ref": "#/definitions/input_genomes" }, @@ -716,5 +752,20 @@ { "$ref": "#/definitions/validation" } - ] + ], + "properties": { + "prism_hmms_git_sha": { + "type": "string", + "default": "4bf8bbdf2da3b4f1c764de2301be7b02e74af157" + }, + "fasta_csv": { + "type": "string" + }, + "genbank_csv": { + "type": "string" + }, + "fna_csv": { + "type": "string" + } + } } diff --git a/subworkflows/local/genome_handling.nf b/subworkflows/local/genome_handling.nf index 23c4179..e9da5cc 100644 --- a/subworkflows/local/genome_handling.nf +++ b/subworkflows/local/genome_handling.nf @@ -4,8 +4,8 @@ This is the subworkflow that controls downloading and processing input genomes ======================================================================================== */ -include { CRABHASH } from '../../modules/local/crabhash.nf' include { MIBIG_DOWNLOAD } from '../../modules/local/mibig_download' +include { DOWNLOAD_CHEMBL_DATA } from '../../modules/local/download_chembl_data' include { NCBI_DATASETS_DOWNLOAD } from '../../modules/local/ncbi_datasets_download' include { NCBI_GENOME_DOWNLOAD } from '../../modules/local/ncbi_genome_download' include { PROCESS_GENBANK_FILES } from '../../modules/local/process_genbank_files' @@ -16,41 +16,48 @@ include { PROKKA } from '../../modules/nf-core/pr workflow GENOME_HANDLING { take: - fasta_ch - + ch_gbk + ch_fasta + ch_fna_files main: ch_versions = Channel.empty() - ch_gbk_file = Channel.empty() - ch_non_mibig_gbk_file = Channel.empty() - info_ch = Channel.empty() + ch_non_mibig_gbk_file = ch_gbk + + println("------------------------------") + println("params.local_genbank: ${ch_gbk}") + println("------------------------------") - // Create a channel to mix inputs from different sources - ch_read = Channel.empty() if (params.mibig){ MIBIG_DOWNLOAD() - ch_gbk_file = ch_gbk_file.mix(MIBIG_DOWNLOAD.out.genbank) + ch_gbk = ch_gbk.mix(MIBIG_DOWNLOAD.out.genbank) ch_versions = ch_versions.mix(MIBIG_DOWNLOAD.out.versions) } - if (params.ncbi_genome_download_command){ - NCBI_GENOME_DOWNLOAD(params.ncbi_genome_download_command) - ch_gbk_file = ch_gbk_file.mix(NCBI_GENOME_DOWNLOAD.out.gbff_files) - ch_non_mibig_gbk_file = ch_non_mibig_gbk_file.mix(NCBI_GENOME_DOWNLOAD.out.gbff_files) - ch_versions = ch_versions.mix(NCBI_GENOME_DOWNLOAD.out.versions) + if (params.chembl) { + DOWNLOAD_CHEMBL_DATA() + ch_fasta = ch_gbk.mix(DOWNLOAD_CHEMBL_DATA.out.chembl_fa) + ch_versions = ch_versions.mix(DOWNLOAD_CHEMBL_DATA.out.ch_versions) } if (params.local_genbank) { temp_file_ch = Channel.fromPath( params.local_genbank ) - ch_gbk_file= ch_gbk_file.mix(temp_file_ch) + ch_gbk= ch_gbk.mix(temp_file_ch) temp_file_ch2 = Channel.fromPath( params.local_genbank ) ch_non_mibig_gbk_file = ch_non_mibig_gbk_file.mix(temp_file_ch2) } + + if (params.ncbi_genome_download_command){ + NCBI_GENOME_DOWNLOAD(params.ncbi_genome_download_command) + ch_gbk = ch_gbk.mix(NCBI_GENOME_DOWNLOAD.out.gbff_files) + ch_non_mibig_gbk_file = ch_non_mibig_gbk_file.mix(NCBI_GENOME_DOWNLOAD.out.gbff_files) + ch_versions = ch_versions.mix(NCBI_GENOME_DOWNLOAD.out.versions) + } if (params.ncbi_datasets_command){ NCBI_DATASETS_DOWNLOAD() - ch_gbk_file= ch_gbk_file.mix(NCBI_DATASETS_DOWNLOAD.out.gbff_files) + ch_gbk= ch_gbk.mix(NCBI_DATASETS_DOWNLOAD.out.gbff_files) ch_non_mibig_gbk_file= ch_non_mibig_gbk_file.mix(NCBI_DATASETS_DOWNLOAD.out.gbff_files) ch_versions = ch_versions.mix(NCBI_DATASETS_DOWNLOAD.out.versions) } @@ -66,23 +73,38 @@ workflow GENOME_HANDLING { .set{ch_contigs} PROKKA(ch_contigs, [], []) - ch_gbk_file = ch_gbk_file.mix(PROKKA.out.gbk) + ch_gbk = ch_gbk.mix(PROKKA.out.gbk) ch_non_mibig_gbk_file = ch_non_mibig_gbk_file.mix(PROKKA.out.gbk) ch_versions = ch_versions.mix(PROKKA.out.versions) } + gbk_and_fasta_ch = ch_gbk.mix(ch_fasta) + + gbk_and_fasta_ch.flatten().toSortedList().flatten() + .map { file -> tuple(file, file.size()) } + .set { filesWithSizes } - if (fasta_ch) { - gbk_and_fasta_ch = ch_gbk_file.mix(fasta_ch) - } - PROCESS_GENBANK_FILES( - gbk_and_fasta_ch.flatten().toSortedList().flatten().buffer( size: params.genbank_input_buffer, remainder: true ), - ) +// Apply the batchFilesBySize function to the files and split into batches +// Collect all files and apply the batchFilesBySize function +// Collect all files and apply the batchFilesBySize function +filesWithSizes + .toList() + .map { fileList -> batchFilesBySize(fileList, 13 * 1024 * 1024 * 1024) } + .flatMap { it } + .set { batchedFiles } - PROCESS_GENBANK_FILES.out.fasta.set{ch_fasta_out} - ch_versions = ch_versions.mix(PROCESS_GENBANK_FILES.out.versions) + + + PROCESS_GENBANK_FILES( + batchedFiles + // gbk_and_fasta_ch.flatten().toSortedList().flatten().buffer( size: params.genbank_input_buffer, remainder: true ), + ) + + PROCESS_GENBANK_FILES.out.fasta.set{ch_fasta_out} + + ch_versions = ch_versions.mix(PROCESS_GENBANK_FILES.out.versions) // sort by hash is needed for caching to work PROCESS_GENBANK_FILES.out.protein_ids @@ -123,19 +145,45 @@ workflow GENOME_HANDLING { ch_protein_to_dedup = ch_protein_ids.mix(ch_protein_to_go) - - DEDUPLICATE_GENOMIC_INFO(ch_genomic_to_dedup) DEDUPLICATE_PROTEIN_INFO(ch_protein_to_dedup) - emit: ch_genome_info = DEDUPLICATE_GENOMIC_INFO.out.deduped.collect() ch_protein_info = DEDUPLICATE_PROTEIN_INFO.out.deduped.collect() - ch_gbk = ch_gbk_file.flatten().toSortedList().flatten() + ch_gbk = ch_gbk.flatten().toSortedList().flatten() ch_fasta = ch_fasta_out ch_versions = ch_versions ch_non_mibig_gbk_file = ch_non_mibig_gbk_file.flatten().toSortedList().flatten() } + +// Function to batch files by cumulative size +def batchFilesBySize(filesWithSizes, maxBatchSize) { + def batches = [] + def currentBatch = [] + def currentBatchSize = 0 + + filesWithSizes.each { tuple -> + def (file, size) = tuple + + // If adding this file exceeds the batch size limit, close the current batch and start a new one + if (currentBatchSize + size > maxBatchSize && currentBatch) { + batches << currentBatch + currentBatch = [] + currentBatchSize = 0 + } + + // Add the current file to the batch + currentBatch << file + currentBatchSize += size + } + + // Add the remaining batch if it exists and isn't empty + if (!currentBatch.isEmpty()) { + batches << currentBatch + } + + return batches +} diff --git a/subworkflows/local/sg_modules.nf b/subworkflows/local/sg_modules.nf index e724fc6..91acf25 100644 --- a/subworkflows/local/sg_modules.nf +++ b/subworkflows/local/sg_modules.nf @@ -19,8 +19,19 @@ workflow SG_MODULES { sg_modules.add("tigrfam") } } - if (params.ncbi_genome_download_command || params.local_genbank || params.ncbi_datasets_command || params.mibig || params.local_faa || params.local_fna){ + if ( params.ncbi_genome_download_command || + params.local_genbank || + params.ncbi_datasets_command || + params.mibig || + params.local_faa || + params.local_fna || + params.genbank_csv || + params.fasta_csv || + params.fna_csv + ){ + sg_modules.add("base") + } else { println("\033[0;31m" + "!! No input genome source specified !!" + "\033[0m") } diff --git a/subworkflows/local/utils_nfcore_socialgene_pipeline/main.nf b/subworkflows/local/utils_nfcore_socialgene_pipeline/main.nf new file mode 100644 index 0000000..31c2fd5 --- /dev/null +++ b/subworkflows/local/utils_nfcore_socialgene_pipeline/main.nf @@ -0,0 +1,243 @@ +// +// Subworkflow with functionality specific to the nf-core/bro pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +// include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + + ch_genbank_files = Channel.empty() + ch_fasta_files = Channel.empty() + ch_fna_files = Channel.empty() + + if (params.genbank_csv) { + def paths = file(params.genbank_csv).readLines().findAll { it.size() > 0 } + Channel.fromPath(paths).collect().set { ch_genbank_files } + } + + if (params.fasta_csv) { + Channel + .fromSamplesheet("fasta_csv") + .map { + validateInputSamplesheet(it) + } + .set { ch_fasta_files } + } + if (params.fna_csv) { + Channel + .fromSamplesheet("fna_csv") + .map { + validateInputSamplesheet(it) + } + .set { ch_fna_files } + } + + emit: + genbank_files = ch_genbank_files + fasta_files = ch_fasta_files + fna_files = ch_fna_files + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + // if (email || email_on_fail) { + // completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + // } + + // completionSummary(monochrome_logs) + + // if (hook_url) { + // imNotification(summary_params, hook_url) + // } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (files) = input + println "Validating input samplesheet: ${files}" + + return [ files ] +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + + diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..ac31f28 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..14558c3 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 0000000..2585b65 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 0000000..3d4a6b0 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 0000000..5784a33 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..7626c1c --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 0000000..60b1cff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/socialgene.nf b/workflows/socialgene.nf index 606c2ad..f3e049e 100644 --- a/workflows/socialgene.nf +++ b/workflows/socialgene.nf @@ -5,34 +5,8 @@ */ - - - -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowSocialgene.initialise(params, log) - /* - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ======================================================================================== IMPORT LOCAL MODULES @@ -52,10 +26,7 @@ include { SEQKIT_SPLIT } from '../modules/local include { HTCONDOR_PREP } from '../modules/local/htcondor_prep' include { HMMER_HMMSEARCH } from '../modules/local/hmmsearch' include { HMMSEARCH_PARSE } from '../modules/local/hmmsearch_parse' -include { DOWNLOAD_CHEMBL_DATA } from '../modules/local/download_chembl_data' -include { MD5_AS_FILENAME as MERGE_PARSED_DOMTBLOUT } from '../modules/local/md5_as_filename' - - +include { MD5_AS_FILENAME as MERGE_PARSED_DOMTBLOUT } from '../modules/local/md5_as_filename' /* ======================================================================================== @@ -67,18 +38,20 @@ include { NCBI_TAXONOMY } from '../subworkflows/local/ncbi_taxonom include { GENOME_HANDLING } from '../subworkflows/local/genome_handling' include { SG_MODULES } from '../subworkflows/local/sg_modules' include { HMM_PREP } from '../subworkflows/local/hmm_prep' -//include { HMM_RUN } from '../subworkflows/local/hmm_run' -//include { HMM_OUTSOURCED } from '../subworkflows/local/hmm_outsourced' /* ======================================================================================== IMPORT NF-CORE MODULES/SUBWORKFLOWS ======================================================================================== */ -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/local/custom/dumpsoftwareversions/main' include { DIAMOND_BLASTP } from '../modules/local/diamond/blastp/main' include { DIAMOND_MAKEDB } from '../modules/local/diamond/makedb/main' -include { MULTIQC } from '../modules/local/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_socialgene_pipeline' + /* ======================================================================================== RUN MAIN WORKFLOW @@ -87,13 +60,18 @@ include { MULTIQC } from '../modules/local/multiqc/main' available_hmms=["antismash","amrfinder","bigslice","classiphage", "ipresto","pfam","prism","resfams","tigrfam","virus_orthologous_groups"] -// Info required for completion email and summary -def multiqc_report = [] - workflow SOCIALGENE { + take: + ch_gbk + ch_fasta + ch_fna_files + + main: println "Manifest's pipeline version: $workflow.profile" ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + def hmmlist = [] // if not `null`, hmmlist needs to be a list @@ -136,44 +114,35 @@ workflow SOCIALGENE { //////////////////////// */ - - if (params.chembl) { - // chembl has fasta we need to process, so download here and pass fasta along - DOWNLOAD_CHEMBL_DATA() - chembl_fasta_ch = DOWNLOAD_CHEMBL_DATA.out.chembl_fa - ch_versions = ch_versions.mix(DOWNLOAD_CHEMBL_DATA.out.ch_versions) - } else { - chembl_fasta_ch = Channel.empty() - } - - if (params.local_faa){ - local_faa_ch = Channel.fromPath(params.local_faa) - } else { - local_faa_ch = Channel.empty() - } - - - - input_fasta_ch = chembl_fasta_ch.mix(local_faa_ch) - - GENOME_HANDLING(input_fasta_ch) + GENOME_HANDLING( + ch_gbk, + ch_fasta, + ch_fna_files + ) GENOME_HANDLING.out.ch_fasta.set{ch_fasta} ch_versions = ch_versions.mix(GENOME_HANDLING.out.ch_versions) // TODO: just pass this straight to DEDUPLICATE_AND_INDEX_FASTA, dont' create a collected_fasta.faa.gz - GENOME_HANDLING.out.ch_fasta.collect().set{ fasta_to_dedup} + GENOME_HANDLING.out.ch_fasta + .collect() + .set{fasta_to_dedup} + + ch_multiqc_files = ch_multiqc_files.mix(ch_gbk) + + DEDUPLICATE_AND_INDEX_FASTA(fasta_to_dedup) - DEDUPLICATE_AND_INDEX_FASTA.out - .fasta + + DEDUPLICATE_AND_INDEX_FASTA.out.fasta .set{ch_nr_fasta} if (params.sort_fasta) { // If testing, sort the FASTA file to get consistent output, otherwise skip // Use seqkit to remove redundant sequences, based on sequence id because they are already the sequence hash SEQKIT_SORT(ch_nr_fasta) - SEQKIT_SORT.out - .fasta + + SEQKIT_SORT.out.fasta .set{single_ch_fasta} + } else { single_ch_fasta = ch_nr_fasta } @@ -212,6 +181,7 @@ workflow SOCIALGENE { ch_split_fasta.collect().set{all_split_fasta} HTCONDOR_PREP(HMM_PREP.out.all_hmms, all_split_fasta) ch_versions = ch_versions.mix(HTCONDOR_PREP.out.versions) + domtblout_ch=false } else if (params.domtblout_path || params.domtblout_with_ga || params.domtblout_without_ga){ // TODO: check that no paths are repeated between the param inputs if (params.domtblout_with_ga){ @@ -251,6 +221,8 @@ workflow SOCIALGENE { ch_versions = ch_versions.mix(HMMSEARCH_PARSE.out.versions.last()) } + + hmm_info_ch = HMM_PREP.out.hmm_info hmm_nodes_ch = HMM_PREP.out.hmm_nodes @@ -294,6 +266,7 @@ workflow SOCIALGENE { ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions) ch_versions = ch_versions.mix(DIAMOND_BLASTP.out.versions) blastp_args = DIAMOND_BLASTP.out.args + ch_multiqc_files = ch_multiqc_files.mix(DIAMOND_BLASTP.out.log) } else { blast_ch = file("${baseDir}/assets/EMPTY_FILE") blastp_args = '' @@ -369,46 +342,48 @@ workflow SOCIALGENE { sg_modules ) -if (run_build_database) { - - // Neo4j isn't available with Conda so check that Docker is being used - if (workflow.profile.contains("conda")){ - println '\033[0;34m The Neo4j database can only be built using the docker Nextflow profile, but you have used Conda. The Docker/Neo4j command to do build the database can be found at \n "$outdir/socialgene_neo4j/command_to_build_neo4j_database_with_docker.sh" \033[0m' - } else if (workflow.profile.contains("docker")){ - - // TODO: this is not good/fragile, it really should be a single tuple input, but will have - // to recode admin import to not care about directory structure - NEO4J_ADMIN_IMPORT( - sg_modules.collect(), - hmmlist.collect(), - neo4j_header_ch, - taxdump_ch, - hmm_info_ch, - hmm_nodes_ch, - blast_ch, - mmseqs2_ch, - hmmer_result_ch, - tigrfam_ch, - parameters_ch, - GENOME_HANDLING.out.ch_genome_info, - GENOME_HANDLING.out.ch_protein_info, - goterms_ch - ) - - ch_versions = ch_versions.mix(NEO4J_ADMIN_IMPORT.out.versions) - } + if (run_build_database) { + + // Neo4j isn't available with Conda so check that Docker is being used + if (workflow.profile.contains("conda")){ + println '\033[0;34m The Neo4j database can only be built using the docker Nextflow profile, but you have used Conda. The Docker/Neo4j command to do build the database can be found at \n "$outdir/socialgene_neo4j/command_to_build_neo4j_database_with_docker.sh" \033[0m' + } else if (workflow.profile.contains("docker")){ + + + NEO4J_ADMIN_IMPORT( + sg_modules.collect(), + hmmlist.collect(), + neo4j_header_ch, + taxdump_ch, + hmm_info_ch, + hmm_nodes_ch, + blast_ch, + mmseqs2_ch, + hmmer_result_ch, + tigrfam_ch, + parameters_ch, + GENOME_HANDLING.out.ch_genome_info, + GENOME_HANDLING.out.ch_protein_info, + goterms_ch + ) + + ch_versions = ch_versions.mix(NEO4J_ADMIN_IMPORT.out.versions) + } } - /* - //////////////////////// - OUTPUT SOFTWARE VERSIONS - //////////////////////// - */ + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // /* //////////////////////// @@ -416,24 +391,46 @@ if (run_build_database) { //////////////////////// */ - workflow_summary = WorkflowSocialgene.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowSocialgene.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - - MULTIQC ( + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + ) - multiqc_report = MULTIQC.out.report.toList() + + emit: + multiqc_report = ch_multiqc_files.toList() + versions = ch_versions // channel: [ path(versions.yml) ] } @@ -443,16 +440,8 @@ if (run_build_database) { ======================================================================================== */ -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} + + /* ========================================================================================