Skip to content

Commit

Permalink
Merge branch 'inputs' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
chasemc committed Oct 26, 2024
2 parents 9dfcb9b + bc2e1d8 commit 4dc6158
Show file tree
Hide file tree
Showing 50 changed files with 2,399 additions and 747 deletions.
11 changes: 3 additions & 8 deletions assets/schema_input.json → assets/schema_fasta.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,16 @@
"items": {
"type": "object",
"properties": {
"sample": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces"
},
"fasta": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.f(a|asta|na|as)\\.gz$",
"errorMessage": "FastA file must be provided, cannot contain spaces and must have extension '.fa.gz', '.fasta.gz', '.fna.gz', or '.fas.gz'",
"pattern": ".*",
"errorMessage": "Path to a FASTA file, cannot contain spaces",
"exists": true
}

},
"required": [
"sample",
"fasta"
]
}
Expand Down
23 changes: 23 additions & 0 deletions assets/schema_genbank.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/socialgene/sgnf/master/assets/schema_input.json",
"title": "socialgene/sgnf pipeline - params.input schema",
"description": "Schema for the file provided with params.input",
"type": "array",
"items": {
"type": "object",
"properties": {
"fasta": {
"type": "string",
"format": "file-path",
"pattern": ".*",
"errorMessage": "Path to a FASTA file, cannot contain spaces",
"exists": true
}

},
"required": [
"fasta"
]
}
}
3 changes: 0 additions & 3 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}

withName:PROCESS_GENBANK_FILES {
cpus = { check_max (1 * task.attempt, 'cpus' ) }
Expand Down
4 changes: 2 additions & 2 deletions conf/examples/input_examples/by_taxa/actinomycetota.config
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ process {
}
withLabel:process_high_memory {
memory = { check_max( 400.GB * task.attempt, 'memory' ) }
}
}
withName:ANTISMASH {
cpus = 2
memory = { check_max (3.GB * task.attempt, 'memory' ) }
Expand All @@ -98,6 +98,6 @@ process {
}
withName:'MMSEQS2_CLUSTER'{
// https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster
ext.args2 = '-c 0.7 --cov-mode 0'
ext.args = '-c 0.7 --cov-mode 0'
}
}
3 changes: 1 addition & 2 deletions conf/examples/input_examples/input_examples.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ process {
}
withName:'MMSEQS2_CLUSTER'{
// https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster
ext.args = '--single-step-clustering 1'
ext.args2 = '--min-seq-id 0.5 -c 0.7 --cov-mode 0'
ext.args = '--single-step-clustering 1 --min-seq-id 0.5 -c 0.7 --cov-mode 0'
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ params {
// Use the downloaded BGC0000001 as input
local_faa = "/tmp/uni/seqs.fasta"

crabhash_path = '/tmp/github/kwan_lab/crabhash/target/release'

/*
////////////////////////
Expand Down
12 changes: 4 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@

process {

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.tracedir}" },
mode: 'copy',
pattern: '*_versions.yml'
]
}

withName:'HMMER_HMMSEARCH'{
ext.args = ""
Expand All @@ -31,7 +24,7 @@ process {

withName:'MMSEQS2_CLUSTER'{
// https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster
ext.args2 = '-c 0.7 --cov-mode 0'
ext.args = '-c 0.7 --cov-mode 0'
}

withName:'MMSEQS2_CREATEDB'{
Expand Down Expand Up @@ -71,4 +64,7 @@ process {
]

}



}
67 changes: 26 additions & 41 deletions conf/modules2.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process {
withName:'DIAMOND_BLASTP'{
publishDir = [
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}

Expand All @@ -28,12 +28,12 @@ process {
publishDir = [
[
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*mmseqs2_results_cluster.tsv.gz'
],
[
path: { "${params.outdir_per_run}/mmseqs_databases" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: 'mmseqs_*'
],

Expand All @@ -43,43 +43,43 @@ process {
withName:'MMSEQS2_CREATEDB'{
publishDir = [
path: {"${params.outdir_per_run}/mmseqs_databases"},
mode: 'copy',
mode: params.publish_dir_mode,
]
}


withName: 'NEO4J_HEADERS|TAXDUMP_PROCESS|HMM_TSV_PARSE' {
publishDir = [
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}

withName: 'DEDUPLICATE_GENOMIC_INFO' {
publishDir = [
[
path: { "${params.outdir_neo4j}/import/genomic_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*locus_to_protein.gz'
],
[
path: { "${params.outdir_neo4j}/import/genomic_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*assembly_to_locus.gz'
],
[
path: { "${params.outdir_neo4j}/import/genomic_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*assembly_to_taxid.gz'
],
[
path: { "${params.outdir_neo4j}/import/genomic_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*loci.gz'
],
[
path: { "${params.outdir_neo4j}/import/genomic_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*assemblies.gz'
]
]
Expand All @@ -89,17 +89,17 @@ process {
publishDir = [
[
path: { "${params.outdir_neo4j}/import/protein_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*protein_info.gz'
],
[
path: { "${params.outdir_neo4j}/import/protein_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*protein_ids.gz'
],
[
path: { "${params.outdir_neo4j}/import/protein_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*protein_to_go.gz'
]

Expand All @@ -109,7 +109,7 @@ process {
withName: 'NEO4J_HEADERS|TAXDUMP_PROCESS|HMM_TSV_PARSE' {
publishDir = [
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}

Expand All @@ -118,17 +118,17 @@ process {
[
[
path: { "${params.outdir_neo4j}/import/hmm_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*hmminfo'
],
[
path: { "${params.outdir_neo4j}/import/hmm_info" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: '*sg_hmm_nodes'
],
[
path: { "${params.outdir_per_run}/hmm_cache" },
mode: 'copy',
mode: params.publish_dir_mode,
pattern: 'socialgene_nr_hmms_file_*'
]
]
Expand All @@ -138,30 +138,20 @@ process {
withName:'TIGRFAM_ROLES|TIGRFAM_TO_GO|TIGRFAM_TO_ROLE'{
publishDir = [
path: { "${params.outdir_neo4j}/import/tigrfam_info" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}
withName:'PARAMETER_EXPORT_FOR_NEO4J'{
publishDir = [
path: { "${params.outdir_neo4j}/import/parameters" },
mode: 'copy',
]
}

withName: 'CRABHASH' {
publishDir = [
path: {"${params.outdir_neo4j}/import/protein_info"},
mode: 'copy',
overwrite: false,
pattern: "*.protein_info.gz"

mode: params.publish_dir_mode,
]
}

withName: 'DOWNLOAD_CHEMBL_DATA' {
publishDir = [
path: {"${params.outdir_neo4j}/import/chembl"},
mode: 'copy',
mode: params.publish_dir_mode,
overwrite: false
]
}
Expand All @@ -175,7 +165,7 @@ process {
withName: 'DOWNLOAD_GOTERMS' {
publishDir = [
path: { "${params.outdir_neo4j}/import/goterms" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}

Expand Down Expand Up @@ -205,21 +195,21 @@ process {
withName: 'DEDUPLICATE_AND_INDEX_FASTA' {
publishDir = [
path: { "${params.outdir_per_run}/nonredundant_fasta" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}

withName: 'NEO4J_ADMIN_IMPORT' {
publishDir = [
path: {"${params.outdir_neo4j}"},
mode: 'copy',
mode: params.publish_dir_mode,
]
}

withName: 'NEO4J_ADMIN_IMPORT_DRYRUN' {
publishDir = [
path: {"${params.outdir_neo4j}"},
mode: 'copy',
mode: params.publish_dir_mode,
]
}

Expand All @@ -240,16 +230,11 @@ process {
withName:'MERGE_PARSED_DOMTBLOUT'{
publishDir = [
path: { "${params.outdir_neo4j}/import/parsed_domtblout" },
mode: 'copy',
mode: params.publish_dir_mode,
]
}

withName:'MULTIQC'{
publishDir = [
path: { "${params.tracedir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
mode: 'copy',
]
}



}
Loading

0 comments on commit 4dc6158

Please sign in to comment.