diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc8dd3cf..43814c4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Changed`
- [#674](https://github.com/nf-core/mag/pull/674) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if preferred (added by @muabnezor)
+- [#666](https://github.com/nf-core/mag/pull/666) - Update SPAdes to version 4.0.0, replace both METASPADES and MEGAHIT with official nf-core modules (requested by @elsherbini, fix by @jfy133)
+- [#666](https://github.com/nf-core/mag/pull/666) - Update URLs to GTDB database downloads due to server move (reported by @Jokendo-collab, fix by @jfy133)
- [#695](https://github.com/nf-core/mag/pull/695) - Updated to nf-core 3.0.2 `TEMPLATE` (by @jfy133)
- [#695](https://github.com/nf-core/mag/pull/695) - Switch more stable Zenodo link for CheckM data (by @jfy133)
@@ -29,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| ------------ | ---------------- | ----------- |
| Porechop_ABI | | 0.5.0 |
| Filtlong | 0.2.0 | 0.2.1 |
+| SPAdes | 3.15.3 | 4.0.0 |
### `Deprecated`
diff --git a/conf/base.config b/conf/base.config
index 6ed5a366..21a8ac3e 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -10,9 +10,8 @@
process {
- // TODO nf-core: Check the defaults for all processes
cpus = { 1 * task.attempt }
- memory = { 6.GB * task.attempt }
+ memory = { 7.GB * task.attempt }
time = { 4.h * task.attempt }
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
diff --git a/conf/modules.config b/conf/modules.config
index df53a47e..8f2c1042 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -13,20 +13,11 @@
process {
//default: do not publish into the results folder
- publishDir = [
- path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- enabled: false
- ]
+ publishDir = [path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: false]
withName: FASTQC_RAW {
ext.args = '--quiet'
- publishDir = [
- path: { "${params.outdir}/QC_shortreads/fastqc" },
- mode: params.publish_dir_mode,
- pattern: "*.html"
- ]
+ publishDir = [path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html"]
ext.prefix = { "${meta.id}_run${meta.run}_raw" }
tag = { "${meta.id}_run${meta.run}_raw" }
}
@@ -204,19 +195,7 @@ process {
}
withName: NANOLYSE {
- publishDir = [
- [
- path: { "${params.outdir}/QC_longreads/NanoLyse" },
- mode: params.publish_dir_mode,
- pattern: "*.log"
- ],
- [
- path: { "${params.outdir}/QC_longreads/NanoLyse" },
- mode: params.publish_dir_mode,
- pattern: "*_nanolyse.fastq.gz",
- enabled: params.save_lambdaremoved_reads
- ]
- ]
+ publishDir = [[path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*.log"], [path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*_nanolyse.fastq.gz", enabled: params.save_lambdaremoved_reads]]
ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" }
}
@@ -252,20 +231,12 @@ process {
}
withName: CENTRIFUGE_CENTRIFUGE {
- publishDir = [
- path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" },
- mode: params.publish_dir_mode,
- pattern: "*.txt"
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt"]
}
withName: CENTRIFUGE_KREPORT {
ext.prefix = { "${meta.id}_kreport" }
- publishDir = [
- path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" },
- mode: params.publish_dir_mode,
- pattern: "*.txt"
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt"]
}
withName: KRAKEN2 {
@@ -278,62 +249,33 @@ process {
}
withName: KREPORT2KRONA_CENTRIFUGE {
- publishDir = [
- path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" },
- mode: params.publish_dir_mode,
- pattern: "*.txt",
- enabled: false
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt", enabled: false]
}
withName: KRONA_KTIMPORTTAXONOMY {
- publishDir = [
- path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" },
- mode: params.publish_dir_mode,
- pattern: "*.html"
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.html"]
}
- //pattern: "*.{fa.gz,log}" //'pattern' didnt work, probably because the output is in a folder, solved with 'saveAs'
withName: MEGAHIT {
- ext.args = params.megahit_options ?: ''
- publishDir = [
- path: { "${params.outdir}/Assembly" },
- mode: params.publish_dir_mode,
- saveAs: { filename ->
- filename.equals('versions.yml')
- ? null
- : filename.indexOf('.contigs.fa.gz') > 0
- ? filename
- : filename.indexOf('.log') > 0 ? filename : null
- }
- ]
+ ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" }
+ ext.prefix = { "MEGAHIT-${meta.id}" }
+ publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"]
}
- withName: SPADES {
- ext.args = params.spades_options ?: ''
- publishDir = [
- path: { "${params.outdir}/Assembly/SPAdes" },
- mode: params.publish_dir_mode,
- pattern: "*.{fasta.gz,gfa.gz,log}"
- ]
+ withName: METASPADES {
+ ext.args = params.spades_options ?: '--meta'
+ ext.prefix = { "SPAdes-${meta.id}" }
+ publishDir = [path: { "${params.outdir}/Assembly/SPAdes" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"]
}
- withName: SPADESHYBRID {
- ext.args = params.spades_options ?: ''
- publishDir = [
- path: { "${params.outdir}/Assembly/SPAdesHybrid" },
- mode: params.publish_dir_mode,
- pattern: "*.{fasta.gz,gfa.gz,log}"
- ]
+ withName: METASPADESHYBRID {
+ ext.args = params.spades_options ?: '--meta'
+ ext.prefix = { "SPAdesHybrid-${meta.id}" }
+ publishDir = [path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"]
}
withName: QUAST {
- publishDir = [
- path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: GENOMAD_ENDTOEND {
@@ -368,11 +310,7 @@ process {
}
withName: 'MAG_DEPTHS_PLOT|MAG_DEPTHS_SUMMARY' {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/depths/bins" },
- mode: params.publish_dir_mode,
- pattern: "*.{png,tsv}"
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/depths/bins" }, mode: params.publish_dir_mode, pattern: "*.{png,tsv}"]
}
withName: BIN_SUMMARY {
@@ -384,11 +322,7 @@ process {
}
withName: BUSCO_DB_PREPARATION {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC/BUSCO" },
- mode: params.publish_dir_mode,
- pattern: "*.tar.gz"
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, pattern: "*.tar.gz"]
}
withName: BUSCO {
@@ -403,40 +337,21 @@ process {
}
withName: BUSCO_SAVE_DOWNLOAD {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC/BUSCO" },
- mode: params.publish_dir_mode,
- overwrite: false,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, overwrite: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: 'BUSCO_SUMMARY|QUAST_BINS|QUAST_BINS_SUMMARY' {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/QC" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: ARIA2_UNTAR {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC/CheckM/checkm_downloads" },
- mode: params.publish_dir_mode,
- overwrite: false,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- enabled: params.save_checkm_data
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM/checkm_downloads" }, mode: params.publish_dir_mode, overwrite: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.save_checkm_data]
}
withName: CHECKM_LINEAGEWF {
tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" }
ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" }
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC/CheckM" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: CHECKM_QA {
@@ -451,11 +366,7 @@ process {
withName: COMBINE_CHECKM_TSV {
ext.prefix = { "checkm_summary" }
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/QC" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: GUNC_DOWNLOADDB {
@@ -486,27 +397,15 @@ process {
}
withName: CAT_DB_GENERATE {
- publishDir = [
- path: { "${params.outdir}/Taxonomy/CAT" },
- mode: params.publish_dir_mode,
- pattern: "*.tar.gz"
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/CAT" }, mode: params.publish_dir_mode, pattern: "*.tar.gz"]
}
withName: CAT {
- publishDir = [
- path: { "${params.outdir}/Taxonomy/CAT/${meta.assembler}/${meta.binner}" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/CAT/${meta.assembler}/${meta.binner}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: CAT_SUMMARY {
ext.prefix = "cat_summary"
- publishDir = [
- path: { "${params.outdir}/Taxonomy/CAT/" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/CAT/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: GTDBTK_CLASSIFYWF {
@@ -526,49 +425,30 @@ process {
withName: GTDBTK_SUMMARY {
ext.args = "--extension fa"
- publishDir = [
- path: { "${params.outdir}/Taxonomy/GTDB-Tk" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/Taxonomy/GTDB-Tk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: PROKKA {
ext.args = "--metagenome"
- publishDir = [
- path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: PRODIGAL {
ext.args = "-p meta"
- publishDir = [
- path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ ext.prefix = { "${meta.assembler}-${meta.id}_prodigal" }
+ publishDir = [path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: FREEBAYES {
ext.prefix = { "${meta.assembler}-${meta.id}" }
ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}"
- publishDir = [
- path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" },
- mode: params.publish_dir_mode,
- pattern: "*.vcf.gz"
- ]
+ publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"]
}
withName: BCFTOOLS_VIEW {
ext.prefix = { "${meta.assembler}-${meta.id}.filtered" }
ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'"
- publishDir = [
- path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" },
- mode: params.publish_dir_mode,
- pattern: "*.vcf.gz"
- ]
+ publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"]
}
withName: BCFTOOLS_CONSENSUS {
@@ -617,32 +497,12 @@ process {
}
withName: METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/depths/contigs" },
- mode: params.publish_dir_mode,
- pattern: '*-depth.txt.gz'
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/depths/contigs" }, mode: params.publish_dir_mode, pattern: '*-depth.txt.gz']
ext.prefix = { "${meta.assembler}-${meta.id}-depth" }
}
withName: METABAT2_METABAT2 {
- publishDir = [
- [
- path: { "${params.outdir}/GenomeBinning/MetaBAT2/bins/" },
- mode: params.publish_dir_mode,
- pattern: '*[!lowDepth|tooShort|unbinned].fa.gz'
- ],
- [
- path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" },
- mode: params.publish_dir_mode,
- pattern: '*tooShort.fa.gz'
- ],
- [
- path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" },
- mode: params.publish_dir_mode,
- pattern: '*lowDepth.fa.gz'
- ]
- ]
+ publishDir = [[path: { "${params.outdir}/GenomeBinning/MetaBAT2/bins/" }, mode: params.publish_dir_mode, pattern: '*[!lowDepth|tooShort|unbinned].fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*tooShort.fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*lowDepth.fa.gz']]
ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" }
ext.args = [
params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}",
@@ -695,23 +555,7 @@ process {
}
withName: SPLIT_FASTA {
- publishDir = [
- [
- path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned" },
- mode: params.publish_dir_mode,
- pattern: '*.*[0-9].fa.gz'
- ],
- [
- path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" },
- mode: params.publish_dir_mode,
- pattern: '*.pooled.fa.gz'
- ],
- [
- path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" },
- mode: params.publish_dir_mode,
- pattern: '*.remaining.fa.gz'
- ]
- ]
+ publishDir = [[path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned" }, mode: params.publish_dir_mode, pattern: '*.*[0-9].fa.gz'], [path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" }, mode: params.publish_dir_mode, pattern: '*.pooled.fa.gz'], [path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" }, mode: params.publish_dir_mode, pattern: '*.remaining.fa.gz']]
}
withName: DASTOOL_FASTATOCONTIG2BIN_METABAT2 {
@@ -773,32 +617,19 @@ process {
}
withName: TIARA_SUMMARY {
- publishDir = [
- path: { "${params.outdir}/GenomeBinning/Tiara" },
- mode: params.publish_dir_mode,
- pattern: "tiara_summary.tsv"
- ]
+ publishDir = [path: { "${params.outdir}/GenomeBinning/Tiara" }, mode: params.publish_dir_mode, pattern: "tiara_summary.tsv"]
ext.prefix = "tiara_summary"
}
withName: MMSEQS_DATABASES {
ext.prefix = { "${params.metaeuk_mmseqs_db.replaceAll("/", "-")}" }
- publishDir = [
- path: { "${params.outdir}/Annotation/mmseqs_db/" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- enabled: params.save_mmseqs_db
- ]
+ publishDir = [path: { "${params.outdir}/Annotation/mmseqs_db/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.save_mmseqs_db]
}
withName: METAEUK_EASYPREDICT {
ext.args = ""
ext.prefix = { "${meta.id}" }
- publishDir = [
- path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+ publishDir = [path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName: MULTIQC {
diff --git a/conf/test_full.config b/conf/test_full.config
index 9a01bc58..b09e6fe1 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -16,30 +16,30 @@ params {
// Input data for full size test
// hg19 reference with highly conserved and low-complexity regions masked by Brian Bushnell
- host_fasta = "s3://ngi-igenomes/test-data/mag/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz"
- input = "s3://ngi-igenomes/test-data/mag/samplesheets/samplesheet.full.csv"
+ host_fasta = "s3://ngi-igenomes/test-data/mag/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz"
+ input = "s3://ngi-igenomes/test-data/mag/samplesheets/samplesheet.full.csv"
//centrifuge_db = "s3://ngi-igenomes/test-data/mag/p_compressed+h+v.tar.gz"
- kraken2_db = "s3://ngi-igenomes/test-data/mag/minikraken_8GB_202003.tgz"
- cat_db = "s3://ngi-igenomes/test-data/mag/CAT_prepare_20210107.tar.gz"
+ kraken2_db = "s3://ngi-igenomes/test-data/mag/minikraken_8GB_202003.tgz"
+ cat_db = "s3://ngi-igenomes/test-data/mag/CAT_prepare_20210107.tar.gz"
// gtdb_db = "s3://ngi-igenomes/test-data/mag/gtdbtk_r214_data.tar.gz" ## This should be updated to release 220, once we get GTDB-Tk working again
- skip_gtdbtk = true
+ skip_gtdbtk = true
// TODO TEMPORARY: deactivate SPAdes due to incompatibility of container with fusion file system
- skip_spades = true
- skip_spadeshybrid = true
+ skip_spades = false
+ skip_spadeshybrid = false
// reproducibility options for assembly
- spades_fix_cpus = 10
- spadeshybrid_fix_cpus = 10
- megahit_fix_cpu_1 = true
+ spades_fix_cpus = 10
+ spadeshybrid_fix_cpus = 10
+ megahit_fix_cpu_1 = true
// available options to enable reproducibility for BUSCO (--busco_db) not used here
// to allow detection of possible problems in automated lineage selection mode using public databases
// test CAT with official taxonomic ranks only
- cat_official_taxonomy = true
+ cat_official_taxonomy = true
// Skip CONCOCT due to timeout issues
- skip_concoct = true
+ skip_concoct = true
}
diff --git a/docs/output.md b/docs/output.md
index 7ca0ae13..4e43ffb6 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -219,10 +219,10 @@ Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembl
Output files
- `Assembly/SPAdes/`
- - `[sample/group]_scaffolds.fasta.gz`: Compressed assembled scaffolds in fasta format
- - `[sample/group]_graph.gfa.gz`: Compressed assembly graph in gfa format
- - `[sample/group]_contigs.fasta.gz`: Compressed assembled contigs in fasta format
- - `[sample/group].log`: Log file
+ - `[sample/group].scaffolds.fa.gz`: Compressed assembled scaffolds in fasta format
+ - `[sample/group].assembly.gfa.gz`: Compressed assembly graph in gfa format
+ - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format
+ - `[sample/group].spades.log`: Log file
- `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs
- `SPAdes-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set.
- `SPAdes-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap").
@@ -238,10 +238,10 @@ SPAdesHybrid is a part of the [SPAdes](http://cab.spbu.ru/software/spades/) soft
Output files
- `Assembly/SPAdesHybrid/`
- - `[sample/group]_scaffolds.fasta.gz`: Compressed assembled scaffolds in fasta format
- - `[sample/group]_graph.gfa.gz`: Compressed assembly graph in gfa format
- - `[sample/group]_contigs.fasta.gz`: Compressed assembled contigs in fasta format
- - `[sample/group].log`: Log file
+ - `[sample/group].scaffolds.fa.gz`: Compressed assembled scaffolds in fasta format
+ - `[sample/group].assembly.gfa.gz`: Compressed assembly graph in gfa format
+ - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format
+ - `[sample/group].spades.log`: Log file
- `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs
- `SPAdesHybrid-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set.
- `SPAdesHybrid-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap").
diff --git a/modules.json b/modules.json
index 3fba80df..3eea27cd 100644
--- a/modules.json
+++ b/modules.json
@@ -172,6 +172,11 @@
"git_sha": "283613159e079152f1336cef0db1c836086206e0",
"installed_by": ["modules"]
},
+ "megahit": {
+ "branch": "master",
+ "git_sha": "7755db15e36b30da564cd67fffdfe18a255092aa",
+ "installed_by": ["modules"]
+ },
"metabat2/jgisummarizebamcontigdepths": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
@@ -247,6 +252,11 @@
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
+ "spades": {
+ "branch": "master",
+ "git_sha": "cfebb244d8c83ae533bf2db399f9af361927d504",
+ "installed_by": ["modules"]
+ },
"tiara/tiara": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
diff --git a/modules/local/megahit.nf b/modules/local/megahit.nf
deleted file mode 100644
index 6f31425c..00000000
--- a/modules/local/megahit.nf
+++ /dev/null
@@ -1,40 +0,0 @@
-process MEGAHIT {
- tag "$meta.id"
-
- conda "bioconda::megahit=1.2.9"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/megahit:1.2.9--h2e03b76_1' :
- 'biocontainers/megahit:1.2.9--h2e03b76_1' }"
-
- input:
- tuple val(meta), path(reads1), path(reads2)
-
- output:
- tuple val(meta), path("MEGAHIT/MEGAHIT-${meta.id}.contigs.fa"), emit: assembly
- path "MEGAHIT/*.log" , emit: log
- path "MEGAHIT/MEGAHIT-${meta.id}.contigs.fa.gz" , emit: assembly_gz
- path "versions.yml" , emit: versions
-
- script:
- def args = task.ext.args ?: ''
- def input = meta.single_end ? "-r \"" + reads1.join(",") + "\"" : "-1 \"" + reads1.join(",") + "\" -2 \"" + reads2.join(",") + "\""
- mem = task.memory.toBytes()
- if ( !params.megahit_fix_cpu_1 || task.cpus == 1 )
- """
- ## Check if we're in the same work directory as a previous failed MEGAHIT run
- if [[ -d MEGAHIT ]]; then
- rm -r MEGAHIT/
- fi
-
- megahit $args -t "${task.cpus}" -m $mem $input -o MEGAHIT --out-prefix "MEGAHIT-${meta.id}"
-
- gzip -c "MEGAHIT/MEGAHIT-${meta.id}.contigs.fa" > "MEGAHIT/MEGAHIT-${meta.id}.contigs.fa.gz"
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
- END_VERSIONS
- """
- else
- error "ERROR: '--megahit_fix_cpu_1' was specified, but not succesfully applied. Likely this is caused by changed process properties in a custom config file."
-}
diff --git a/modules/local/spades.nf b/modules/local/spades.nf
deleted file mode 100644
index 9ef7ec77..00000000
--- a/modules/local/spades.nf
+++ /dev/null
@@ -1,51 +0,0 @@
-process SPADES {
- tag "$meta.id"
-
- conda "bioconda::spades=3.15.3"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' :
- 'biocontainers/spades:3.15.3--h95f258a_0' }"
-
- input:
- tuple val(meta), path(reads)
-
- output:
- tuple val(meta), path("SPAdes-${meta.id}_scaffolds.fasta"), emit: assembly
- path "SPAdes-${meta.id}.log" , emit: log
- path "SPAdes-${meta.id}_contigs.fasta.gz" , emit: contigs_gz
- path "SPAdes-${meta.id}_scaffolds.fasta.gz" , emit: assembly_gz
- path "SPAdes-${meta.id}_graph.gfa.gz" , emit: graph
- path "versions.yml" , emit: versions
-
- script:
- def args = task.ext.args ?: ''
- maxmem = task.memory.toGiga()
- // The -s option is not supported for metaspades. Each time this is called with `meta.single_end` it's because
- // read depth was normalized with BBNorm, which actually outputs pairs, but in an interleaved file.
- def readstr = meta.single_end ? "--12 ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
-
- if ( params.spades_fix_cpus == -1 || task.cpus == params.spades_fix_cpus )
- """
- metaspades.py \
- $args \
- --threads "${task.cpus}" \
- --memory $maxmem \
- ${readstr} \
- -o spades
- mv spades/assembly_graph_with_scaffolds.gfa SPAdes-${meta.id}_graph.gfa
- mv spades/scaffolds.fasta SPAdes-${meta.id}_scaffolds.fasta
- mv spades/contigs.fasta SPAdes-${meta.id}_contigs.fasta
- mv spades/spades.log SPAdes-${meta.id}.log
- gzip "SPAdes-${meta.id}_contigs.fasta"
- gzip "SPAdes-${meta.id}_graph.gfa"
- gzip -c "SPAdes-${meta.id}_scaffolds.fasta" > "SPAdes-${meta.id}_scaffolds.fasta.gz"
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- python: \$(python --version 2>&1 | sed 's/Python //g')
- metaspades: \$(metaspades.py --version | sed "s/SPAdes genome assembler v//; s/ \\[.*//")
- END_VERSIONS
- """
- else
- error "ERROR: '--spades_fix_cpus' was specified, but not succesfully applied. Likely this is caused by changed process properties in a custom config file."
-}
diff --git a/modules/local/spadeshybrid.nf b/modules/local/spadeshybrid.nf
deleted file mode 100644
index 13578a69..00000000
--- a/modules/local/spadeshybrid.nf
+++ /dev/null
@@ -1,49 +0,0 @@
-process SPADESHYBRID {
- tag "$meta.id"
-
- conda "bioconda::spades=3.15.3"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' :
- 'biocontainers/spades:3.15.3--h95f258a_0' }"
-
- input:
- tuple val(meta), path(long_reads), path(short_reads)
-
- output:
- tuple val(meta), path("SPAdesHybrid-${meta.id}_scaffolds.fasta"), emit: assembly
- path "SPAdesHybrid-${meta.id}.log" , emit: log
- path "SPAdesHybrid-${meta.id}_contigs.fasta.gz" , emit: contigs_gz
- path "SPAdesHybrid-${meta.id}_scaffolds.fasta.gz" , emit: assembly_gz
- path "SPAdesHybrid-${meta.id}_graph.gfa.gz" , emit: graph
- path "versions.yml" , emit: versions
-
- script:
- def args = task.ext.args ?: ''
- maxmem = task.memory.toGiga()
- if ( params.spadeshybrid_fix_cpus == -1 || task.cpus == params.spadeshybrid_fix_cpus )
- """
- metaspades.py \
- $args \
- --threads "${task.cpus}" \
- --memory $maxmem \
- --pe1-1 ${short_reads[0]} \
- --pe1-2 ${short_reads[1]} \
- --nanopore ${long_reads} \
- -o spades
- mv spades/assembly_graph_with_scaffolds.gfa SPAdesHybrid-${meta.id}_graph.gfa
- mv spades/scaffolds.fasta SPAdesHybrid-${meta.id}_scaffolds.fasta
- mv spades/contigs.fasta SPAdesHybrid-${meta.id}_contigs.fasta
- mv spades/spades.log SPAdesHybrid-${meta.id}.log
- gzip "SPAdesHybrid-${meta.id}_contigs.fasta"
- gzip "SPAdesHybrid-${meta.id}_graph.gfa"
- gzip -c "SPAdesHybrid-${meta.id}_scaffolds.fasta" > "SPAdesHybrid-${meta.id}_scaffolds.fasta.gz"
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- python: \$(python --version 2>&1 | sed 's/Python //g')
- metaspades: \$(metaspades.py --version | sed "s/SPAdes genome assembler v//; s/ \\[.*//")
- END_VERSIONS
- """
- else
- error "ERROR: '--spadeshybrid_fix_cpus' was specified, but not succesfully applied. Likely this is caused by changed process properties in a custom config file."
-}
diff --git a/modules/nf-core/megahit/environment.yml b/modules/nf-core/megahit/environment.yml
new file mode 100644
index 00000000..eed8b725
--- /dev/null
+++ b/modules/nf-core/megahit/environment.yml
@@ -0,0 +1,6 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::megahit=1.2.9
+ - conda-forge::pigz=2.8
diff --git a/modules/nf-core/megahit/main.nf b/modules/nf-core/megahit/main.nf
new file mode 100644
index 00000000..f6e50f94
--- /dev/null
+++ b/modules/nf-core/megahit/main.nf
@@ -0,0 +1,70 @@
+process MEGAHIT {
+ tag "${meta.id}"
+ label 'process_high'
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f2cb827988dca7067ff8096c37cb20bc841c878013da52ad47a50865d54efe83/data' :
+ 'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }"
+
+ input:
+ tuple val(meta), path(reads1), path(reads2)
+
+ output:
+ tuple val(meta), path("*.contigs.fa.gz") , emit: contigs
+ tuple val(meta), path("intermediate_contigs/k*.contigs.fa.gz") , emit: k_contigs
+ tuple val(meta), path("intermediate_contigs/k*.addi.fa.gz") , emit: addi_contigs
+ tuple val(meta), path("intermediate_contigs/k*.local.fa.gz") , emit: local_contigs
+ tuple val(meta), path("intermediate_contigs/k*.final.contigs.fa.gz"), emit: kfinal_contigs
+ tuple val(meta), path('*.log') , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}"
+ """
+ megahit \\
+ ${reads_command} \\
+ ${args} \\
+ -t ${task.cpus} \\
+ --out-prefix ${prefix}
+
+ pigz \\
+ --no-name \\
+ -p ${task.cpus} \\
+ ${args2} \\
+ megahit_out/*.fa \\
+ megahit_out/intermediate_contigs/*.fa
+
+ mv megahit_out/* .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}"
+ """
+ mkdir -p intermediate_contigs
+ echo "" | gzip > ${prefix}.contigs.fa.gz
+ echo "" | gzip > intermediate_contigs/k21.contigs.fa.gz
+ echo "" | gzip > intermediate_contigs/k21.addi.fa.gz
+ echo "" | gzip > intermediate_contigs/k21.local.fa.gz
+ echo "" | gzip > intermediate_contigs/k21.final.contigs.fa.gz
+ touch ${prefix}.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/megahit/meta.yml b/modules/nf-core/megahit/meta.yml
new file mode 100644
index 00000000..04dab4c2
--- /dev/null
+++ b/modules/nf-core/megahit/meta.yml
@@ -0,0 +1,114 @@
+name: megahit
+description: An ultra-fast metagenomic assembler for large and complex metagenomics
+keywords:
+ - megahit
+ - denovo
+ - assembly
+ - debruijn
+ - metagenomics
+tools:
+ - megahit:
+ description: "An ultra-fast single-node solution for large and complex metagenomics
+ assembly via succinct de Bruijn graph"
+ homepage: https://github.com/voutcn/megahit
+ documentation: https://github.com/voutcn/megahit
+ tool_dev_url: https://github.com/voutcn/megahit
+ doi: "10.1093/bioinformatics/btv033"
+ licence: ["GPL v3"]
+ args_id: "$args"
+ identifier: biotools:megahit
+ - pigz:
+ description: "Parallel implementation of the gzip algorithm."
+ homepage: "https://zlib.net/pigz/"
+ documentation: "https://zlib.net/pigz/pigz.pdf"
+ args_id: "$args2"
+
+ identifier: biotools:megahit
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information and input single, or paired-end FASTA/FASTQ files (optionally decompressed)
+ e.g. [ id:'test', single_end:false ]
+ - reads1:
+ type: file
+ description: |
+ A single or list of input FastQ files for single-end or R1 of paired-end library(s),
+ respectively in gzipped or uncompressed FASTQ or FASTA format.
+ - reads2:
+ type: file
+ description: |
+ A single or list of input FastQ files for R2 of paired-end library(s),
+ respectively in gzipped or uncompressed FASTQ or FASTA format.
+output:
+ - contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.contigs.fa.gz":
+ type: file
+ description: Final final contigs result of the assembly in FASTA format.
+ pattern: "*.contigs.fa.gz"
+ - k_contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - intermediate_contigs/k*.contigs.fa.gz:
+ type: file
+ description: Contigs assembled from the de Bruijn graph of order-K
+ pattern: "k*.contigs.fa.gz"
+ - addi_contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - intermediate_contigs/k*.addi.fa.gz:
+ type: file
+ description: Contigs assembled after iteratively removing local low coverage
+ unitigs in the de Bruijn graph of order-K
+ pattern: "k*.addi.fa.gz"
+ - local_contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - intermediate_contigs/k*.local.fa.gz:
+ type: file
+ description: Contigs of the locally assembled contigs for k=K
+ pattern: "k*.local.fa.gz"
+ - kfinal_contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - intermediate_contigs/k*.final.contigs.fa.gz:
+ type: file
+ description: Stand-alone contigs for k=K; if local assembly is turned on, the
+ file will be empty
+ pattern: "k*.final.contigs.fa.gz"
+ - log:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.log":
+ type: file
+ description: Log file containing statistics of the assembly output
+ pattern: "*.log"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@jfy133"
+maintainers:
+ - "@jfy133"
diff --git a/modules/nf-core/megahit/tests/main.nf.test b/modules/nf-core/megahit/tests/main.nf.test
new file mode 100644
index 00000000..b52765d4
--- /dev/null
+++ b/modules/nf-core/megahit/tests/main.nf.test
@@ -0,0 +1,126 @@
+nextflow_process {
+
+ name "Test Process MEGAHIT"
+ script "../main.nf"
+ process "MEGAHIT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "megahit"
+
+ test("sarscov2 - fastq - se") {
+
+ when {
+ process {
+ """
+ input[0] = [ [id:"test", single_end:true],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ []]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") },
+ { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert snapshot(
+ path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"),
+ process.out.versions
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq - pe") {
+
+ when {
+ process {
+ """
+ input[0] = [ [id:"test", single_end:false],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") },
+ { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert snapshot(
+ path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"),
+ process.out.versions
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq - pe - coassembly") {
+
+ when {
+ process {
+ """
+ input[0] = [ [id:"test", single_end:false],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true)] ,
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") },
+ { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+ { assert snapshot(
+ path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"),
+ process.out.versions
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("sarscov2 - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [ [id:"test", single_end:true],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ []
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+}
diff --git a/modules/nf-core/megahit/tests/main.nf.test.snap b/modules/nf-core/megahit/tests/main.nf.test.snap
new file mode 100644
index 00000000..4677cc33
--- /dev/null
+++ b/modules/nf-core/megahit/tests/main.nf.test.snap
@@ -0,0 +1,172 @@
+{
+ "sarscov2 - fastq - se": {
+ "content": [
+ true,
+ [
+ "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-12T16:45:42.387947698"
+ },
+ "sarscov2 - fastq - pe": {
+ "content": [
+ true,
+ [
+ "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-12T16:45:48.679485983"
+ },
+ "sarscov2 - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "6": [
+ "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+ ],
+ "addi_contigs": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "contigs": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "k_contigs": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "kfinal_contigs": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "local_contigs": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-12T16:44:35.245399991"
+ },
+ "sarscov2 - fastq - pe - coassembly": {
+ "content": [
+ true,
+ [
+ "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-12T16:45:56.23363342"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/megahit/tests/tags.yml b/modules/nf-core/megahit/tests/tags.yml
new file mode 100644
index 00000000..9e865846
--- /dev/null
+++ b/modules/nf-core/megahit/tests/tags.yml
@@ -0,0 +1,2 @@
+megahit:
+ - "modules/nf-core/megahit/**"
diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml
new file mode 100644
index 00000000..8cc5321f
--- /dev/null
+++ b/modules/nf-core/spades/environment.yml
@@ -0,0 +1,5 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::spades=4.0.0
diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf
new file mode 100644
index 00000000..36cdfe44
--- /dev/null
+++ b/modules/nf-core/spades/main.nf
@@ -0,0 +1,102 @@
+process SPADES {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/spades:4.0.0--h5fb382e_1' :
+ 'biocontainers/spades:4.0.0--h5fb382e_1' }"
+
+ input:
+ tuple val(meta), path(illumina), path(pacbio), path(nanopore)
+ path yml
+ path hmm
+
+ output:
+ tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
+ tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
+ tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
+ tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
+ tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
+ tuple val(meta), path('*.warnings.log') , optional:true, emit: warnings
+ tuple val(meta), path('*.spades.log') , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def maxmem = task.memory.toGiga()
+ def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
+ def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
+ def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
+ def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
+ def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads"
+ """
+ spades.py \\
+ $args \\
+ --threads $task.cpus \\
+ --memory $maxmem \\
+ $custom_hmms \\
+ $reads \\
+ -o ./
+ mv spades.log ${prefix}.spades.log
+
+ if [ -f scaffolds.fasta ]; then
+ mv scaffolds.fasta ${prefix}.scaffolds.fa
+ gzip -n ${prefix}.scaffolds.fa
+ fi
+ if [ -f contigs.fasta ]; then
+ mv contigs.fasta ${prefix}.contigs.fa
+ gzip -n ${prefix}.contigs.fa
+ fi
+ if [ -f transcripts.fasta ]; then
+ mv transcripts.fasta ${prefix}.transcripts.fa
+ gzip -n ${prefix}.transcripts.fa
+ fi
+ if [ -f assembly_graph_with_scaffolds.gfa ]; then
+ mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
+ gzip -n ${prefix}.assembly.gfa
+ fi
+
+ if [ -f gene_clusters.fasta ]; then
+ mv gene_clusters.fasta ${prefix}.gene_clusters.fa
+ gzip -n ${prefix}.gene_clusters.fa
+ fi
+
+ if [ -f warnings.log ]; then
+ mv warnings.log ${prefix}.warnings.log
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def maxmem = task.memory.toGiga()
+ def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
+ def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
+ def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
+ def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
+ def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads"
+ """
+ echo "" | gzip > ${prefix}.scaffolds.fa.gz
+ echo "" | gzip > ${prefix}.contigs.fa.gz
+ echo "" | gzip > ${prefix}.transcripts.fa.gz
+ echo "" | gzip > ${prefix}.gene_clusters.fa.gz
+ echo "" | gzip > ${prefix}.assembly.gfa.gz
+ touch ${prefix}.spades.log
+ touch ${prefix}.warnings.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml
new file mode 100644
index 00000000..986871be
--- /dev/null
+++ b/modules/nf-core/spades/meta.yml
@@ -0,0 +1,99 @@
+name: spades
+description: Assembles a small genome (bacterial, fungal, viral)
+keywords:
+ - genome
+ - assembly
+ - genome assembler
+ - small genome
+ - de novo assembler
+tools:
+ - spades:
+ description: SPAdes (St. Petersburg genome assembler) is intended for both standard isolates and single-cell MDA bacteria assemblies.
+ homepage: http://cab.spbu.ru/files/release3.15.0/manual.html
+ documentation: http://cab.spbu.ru/files/release3.15.0/manual.html
+ tool_dev_url: https://github.com/ablab/spades
+ doi: 10.1089/cmb.2012.0021
+ licence: ["GPL v2"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - illumina:
+ type: file
+ description: |
+ List of input FastQ (Illumina or PacBio CCS reads) files
+ of size 1 and 2 for single-end and paired-end data,
+ respectively. This input data type is required.
+ - pacbio:
+ type: file
+ description: |
+ List of input PacBio CLR FastQ files of size 1.
+ - nanopore:
+ type: file
+ description: |
+ List of input FastQ files of size 1, originating from Oxford Nanopore technology.
+ - yml:
+ type: file
+ description: |
+ Path to yml file containing read information.
+ The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML.
+ File entries in this yml must contain only the file name and no paths.
+ pattern: "*.{yml,yaml}"
+ - hmm:
+ type: file
+ description: File or directory with amino acid HMMs for Spades HMM-guided mode.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - scaffolds:
+ type: file
+ description: |
+ Fasta file containing scaffolds
+ pattern: "*.fa.gz"
+ - contigs:
+ type: file
+ description: |
+ Fasta file containing contigs
+ pattern: "*.fa.gz"
+ - transcripts:
+ type: file
+ description: |
+ Fasta file containing transcripts
+ pattern: "*.fa.gz"
+ - gene_clusters:
+ type: file
+ description: |
+ Fasta file containing gene_clusters
+ pattern: "*.fa.gz"
+ - gfa:
+ type: file
+ description: |
+ gfa file containing assembly
+ pattern: "*.gfa.gz"
+ - log:
+ type: file
+ description: |
+ Spades log file
+ pattern: "*.spades.log"
+ - log:
+ type: file
+ description: |
+ Spades warning log file
+ pattern: "*.warning.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@JoseEspinosa"
+ - "@drpatelh"
+ - "@d4straub"
+maintainers:
+ - "@JoseEspinosa"
+ - "@drpatelh"
+ - "@d4straub"
diff --git a/modules/nf-core/spades/tests/main.nf.test b/modules/nf-core/spades/tests/main.nf.test
new file mode 100644
index 00000000..3a93f486
--- /dev/null
+++ b/modules/nf-core/spades/tests/main.nf.test
@@ -0,0 +1,228 @@
+nextflow_process {
+
+ name "Test Process SPADES"
+ script "../main.nf"
+ process "SPADES"
+ config "./nextflow.config"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "spades"
+
+ test("sarscov2 - se ") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ],
+ [],
+ []
+ ]
+ input[1] = []
+ input[2] = []
+ """
+ }
+ }
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.scaffolds,
+ process.out.contigs,
+ process.out.transcripts,
+ process.out.gene_clusters,
+ process.out.gfa,
+ process.out.versions
+ ).match() },
+ { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } }
+ )
+ }
+ }
+
+ test("sarscov2 - pe ") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ],
+ [],
+ []
+ ]
+ input [1] = []
+ input [2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.scaffolds,
+ process.out.contigs,
+ process.out.transcripts,
+ process.out.gene_clusters,
+ process.out.gfa,
+ process.out.versions
+ ).match() },
+ { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } },
+ { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} }
+ )
+ }
+
+ }
+ // isnt perfect, because CCS reads should rather be used with -s instead of --pacbio
+ test("sarscov2 - pe - pacbio ") {
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ],
+ [],
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true) ]
+ ]
+ input [1] = []
+ input [2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.scaffolds,
+ process.out.contigs,
+ process.out.transcripts,
+ process.out.gene_clusters,
+ process.out.gfa,
+ process.out.versions
+ ).match() },
+ { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } },
+ { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} }
+ )
+ }
+ }
+
+ test("sarscov2 - pe - nanopore ") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ],
+ [],
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true) ]
+ ]
+ input [1] = []
+ input [2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.scaffolds,
+ process.out.contigs,
+ process.out.transcripts,
+ process.out.gene_clusters,
+ process.out.gfa,
+ process.out.versions
+ ).match() },
+ { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } },
+ { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} }
+ )
+ }
+ }
+
+ test("sarscov2 - pe - nanopore - yml ") {
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ],
+ [],
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true) ]
+ ]
+ input [1] = file(params.modules_testdata_base_path + "delete_me/spades/spades_input_yml.yml", checkIfExists: true)
+ input [2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.scaffolds,
+ process.out.contigs,
+ process.out.transcripts,
+ process.out.gene_clusters,
+ process.out.gfa,
+ process.out.versions
+ ).match() },
+ { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } },
+ { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} }
+ )
+ }
+ }
+
+ test("sarscov2 - pe - hmm ") {
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file("https://github.com/nf-core/test-datasets/raw/viralrecon/illumina/sispa/SRR11140744_R1.fastq.gz", checkIfExists: true),
+ file("https://github.com/nf-core/test-datasets/raw/viralrecon/illumina/sispa/SRR11140744_R2.fastq.gz", checkIfExists: true) ],
+ [],
+ []
+ ]
+ input [1] = []
+ input [2] = [file(params.modules_testdata_base_path + "/genomics/sarscov2/genome/proteome.hmm.gz", checkIfExists: true)]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.scaffolds,
+ process.out.contigs,
+ process.out.transcripts,
+ process.out.gene_clusters,
+ process.out.gfa,
+ process.out.versions
+ ).match() },
+ { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } }
+ )
+ }
+ }
+
+ test("sarscov2 - pe - stub ") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ],
+ [],
+ []
+ ]
+ input [1] = []
+ input [2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+}
diff --git a/modules/nf-core/spades/tests/main.nf.test.snap b/modules/nf-core/spades/tests/main.nf.test.snap
new file mode 100644
index 00000000..e1b3b652
--- /dev/null
+++ b/modules/nf-core/spades/tests/main.nf.test.snap
@@ -0,0 +1,403 @@
+{
+ "sarscov2 - pe - nanopore ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.scaffolds.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.contigs.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,19418df83534fc93543dec4ec9b2ae72"
+ ]
+ ],
+ [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T07:13:08.663068339"
+ },
+ "sarscov2 - pe - hmm ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.scaffolds.fa.gz:md5,ce077d5f3380690f8d9a5fe188f82128"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,07136eab8e231f095dc5dd62f1b62a91"
+ ]
+ ],
+ [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T08:04:19.650636803"
+ },
+ "sarscov2 - pe - pacbio ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.scaffolds.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.contigs.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,19418df83534fc93543dec4ec9b2ae72"
+ ]
+ ],
+ [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T07:12:49.305512756"
+ },
+ "sarscov2 - pe ": {
+ "content": [
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.contigs.fa.gz:md5,70e4a5485dd59566b212a199c31c343b"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,b773132d52be5090cdbdf5a643027093"
+ ]
+ ],
+ [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T07:12:36.161628498"
+ },
+ "sarscov2 - pe - nanopore - yml ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.scaffolds.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.contigs.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,19418df83534fc93543dec4ec9b2ae72"
+ ]
+ ],
+ [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T07:13:21.868805946"
+ },
+ "sarscov2 - se ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.scaffolds.fa.gz:md5,65ba6a517c152dbe219bf4b5b92bdad7"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.contigs.fa.gz:md5,65ba6a517c152dbe219bf4b5b92bdad7"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.assembly.gfa.gz:md5,e4836fdf7104d79e314e3e50986b4bb2"
+ ]
+ ],
+ [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T07:12:16.562778962"
+ },
+ "sarscov2 - pe - stub ": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.scaffolds.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.transcripts.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.gene_clusters.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "6": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spades.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "7": [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ],
+ "contigs": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "gene_clusters": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.gene_clusters.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "gfa": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.assembly.gfa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spades.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "scaffolds": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.scaffolds.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "transcripts": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.transcripts.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,990abcdf543421412170e5cf413ec56d"
+ ],
+ "warnings": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T07:20:07.195881734"
+ }
+}
diff --git a/modules/nf-core/spades/tests/nextflow.config b/modules/nf-core/spades/tests/nextflow.config
new file mode 100644
index 00000000..adec1bde
--- /dev/null
+++ b/modules/nf-core/spades/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: SPADES {
+ ext.args = '--rnaviral'
+ }
+}
diff --git a/modules/nf-core/spades/tests/tags.yml b/modules/nf-core/spades/tests/tags.yml
new file mode 100644
index 00000000..035861ff
--- /dev/null
+++ b/modules/nf-core/spades/tests/tags.yml
@@ -0,0 +1,2 @@
+spades:
+ - "modules/nf-core/spades/**"
diff --git a/nextflow.config b/nextflow.config
index 02820eb1..12c6c6aa 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -90,7 +90,7 @@ params {
cat_official_taxonomy = false
save_cat_db = false
skip_gtdbtk = false
- gtdb_db = "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz"
+ gtdb_db = "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz"
gtdb_mash = null
gtdbtk_min_completeness = 50.0
gtdbtk_max_contamination = 10.0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 40e3553e..1063c4ac 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -485,7 +485,7 @@
"gtdb_db": {
"type": "string",
"description": "Specify the location of a GTDBTK database. Can be either an uncompressed directory or a `.tar.gz` archive. If not specified will be downloaded for you when GTDBTK or binning QC is not skipped.",
- "default": "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz"
+ "default": "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz"
},
"gtdb_mash": {
"type": "string",
@@ -551,7 +551,7 @@
},
"spades_options": {
"type": "string",
- "description": "Additional custom options for SPAdes.",
+ "description": "Additional custom options for SPAdes and SPAdesHybrid. You must also specify `--meta` to run SPAdes in metagenomic mode if customising these options!",
"help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")"
},
"megahit_options": {
diff --git a/workflows/mag.nf b/workflows/mag.nf
index e8adec33..01519243 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -43,7 +43,11 @@ include { KRONA_KRONADB } from '../modul
include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main'
include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main'
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
+include { MEGAHIT } from '../modules/nf-core/megahit/main'
+include { SPADES as METASPADES } from '../modules/nf-core/spades/main'
+include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main'
include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_ASSEMBLYINPUT } from '../modules/nf-core/gunzip'
include { PRODIGAL } from '../modules/nf-core/prodigal/main'
include { PROKKA } from '../modules/nf-core/prokka/main'
include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main'
@@ -61,9 +65,6 @@ include { KRAKEN2 } from '../modules
include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads'
include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads'
include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads'
-include { MEGAHIT } from '../modules/local/megahit'
-include { SPADES } from '../modules/local/spades'
-include { SPADESHYBRID } from '../modules/local/spadeshybrid'
include { QUAST } from '../modules/local/quast'
include { QUAST_BINS } from '../modules/local/quast_bins'
include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary'
@@ -74,112 +75,105 @@ include { CAT_SUMMARY } from "../modules
include { BIN_SUMMARY } from '../modules/local/bin_summary'
include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv'
-////////////////////////////////////////////////////
-/* -- Create channel for reference databases -- */
-////////////////////////////////////////////////////
-
-if ( params.host_genome ) {
- host_fasta = params.genomes[params.host_genome].fasta ?: false
- ch_host_fasta = Channel
- .value(file( "${host_fasta}" ))
- host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false
- ch_host_bowtie2index = Channel
- .value(file( "${host_bowtie2index}/*" ))
-} else if ( params.host_fasta ) {
- ch_host_fasta = Channel
- .value(file( "${params.host_fasta}" ))
-} else {
- ch_host_fasta = Channel.empty()
-}
-
-if (params.busco_db) {
- ch_busco_db = file(params.busco_db, checkIfExists: true)
-} else {
- ch_busco_db = []
-}
+workflow MAG {
-if(params.checkm_db) {
- ch_checkm_db = file(params.checkm_db, checkIfExists: true)
-}
+ take:
+ ch_raw_short_reads // channel: samplesheet read in from --input
+ ch_raw_long_reads
+ ch_input_assemblies
-if (params.gunc_db) {
- ch_gunc_db = file(params.gunc_db, checkIfExists: true)
-} else {
- ch_gunc_db = Channel.empty()
-}
+ main:
-if(params.kraken2_db){
- ch_kraken2_db_file = file(params.kraken2_db, checkIfExists: true)
-} else {
- ch_kraken2_db_file = []
-}
+ ch_versions = Channel.empty()
+ ch_multiqc_files = Channel.empty()
-if(params.cat_db){
- ch_cat_db_file = Channel
- .value(file( "${params.cat_db}" ))
-} else {
- ch_cat_db_file = Channel.empty()
-}
+ ////////////////////////////////////////////////////
+ /* -- Create channel for reference databases -- */
+ ////////////////////////////////////////////////////
+
+ if ( params.host_genome ) {
+ host_fasta = params.genomes[params.host_genome].fasta ?: false
+ ch_host_fasta = Channel
+ .value(file( "${host_fasta}" ))
+ host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false
+ ch_host_bowtie2index = Channel
+ .value(file( "${host_bowtie2index}/*" ))
+ } else if ( params.host_fasta ) {
+ ch_host_fasta = Channel
+ .value(file( "${params.host_fasta}" ))
+ } else {
+ ch_host_fasta = Channel.empty()
+ }
-if(params.krona_db){
- ch_krona_db_file = Channel
- .value(file( "${params.krona_db}" ))
-} else {
- ch_krona_db_file = Channel.empty()
-}
+ if (params.busco_db) {
+ ch_busco_db = file(params.busco_db, checkIfExists: true)
+ } else {
+ ch_busco_db = []
+ }
-if(!params.keep_phix) {
- ch_phix_db_file = Channel
- .value(file( "${params.phix_reference}" ))
-}
+ if(params.checkm_db) {
+ ch_checkm_db = file(params.checkm_db, checkIfExists: true)
+ }
-if (!params.keep_lambda) {
- ch_nanolyse_db = Channel
- .value(file( "${params.lambda_reference}" ))
-}
+ if (params.gunc_db) {
+ ch_gunc_db = file(params.gunc_db, checkIfExists: true)
+ } else {
+ ch_gunc_db = Channel.empty()
+ }
-if (params.genomad_db){
- ch_genomad_db = file(params.genomad_db, checkIfExists: true)
-} else {
- ch_genomad_db = Channel.empty()
-}
+ if(params.kraken2_db){
+ ch_kraken2_db_file = file(params.kraken2_db, checkIfExists: true)
+ } else {
+ ch_kraken2_db_file = []
+ }
-gtdb = ( params.skip_binqc || params.skip_gtdbtk ) ? false : params.gtdb_db
+ if(params.cat_db){
+ ch_cat_db_file = Channel
+ .value(file( "${params.cat_db}" ))
+ } else {
+ ch_cat_db_file = Channel.empty()
+ }
-if (gtdb) {
- gtdb = file( "${gtdb}", checkIfExists: true)
- gtdb_mash = params.gtdb_mash ? file("${params.gtdb_mash}", checkIfExists: true) : []
-} else {
- gtdb = []
-}
+ if(params.krona_db){
+ ch_krona_db_file = Channel
+ .value(file( "${params.krona_db}" ))
+ } else {
+ ch_krona_db_file = Channel.empty()
+ }
-if(params.metaeuk_db && !params.skip_metaeuk) {
- ch_metaeuk_db = Channel.
- value(file("${params.metaeuk_db}", checkIfExists: true))
-} else {
- ch_metaeuk_db = Channel.empty()
-}
+ if(!params.keep_phix) {
+ ch_phix_db_file = Channel
+ .value(file( "${params.phix_reference}" ))
+ }
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- RUN MAIN WORKFLOW
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
+ if (!params.keep_lambda) {
+ ch_nanolyse_db = Channel
+ .value(file( "${params.lambda_reference}" ))
+ }
-// Additional info for completion email and summary
-def busco_failed_bins = [:]
+ if (params.genomad_db){
+ ch_genomad_db = file(params.genomad_db, checkIfExists: true)
+ } else {
+ ch_genomad_db = Channel.empty()
+ }
-workflow MAG {
+ gtdb = ( params.skip_binqc || params.skip_gtdbtk ) ? false : params.gtdb_db
- take:
- ch_raw_short_reads // channel: samplesheet read in from --input
- ch_raw_long_reads
- ch_input_assemblies
+ if (gtdb) {
+ gtdb = file( "${gtdb}", checkIfExists: true)
+ gtdb_mash = params.gtdb_mash ? file("${params.gtdb_mash}", checkIfExists: true) : []
+ } else {
+ gtdb = []
+ }
- main:
+ if(params.metaeuk_db && !params.skip_metaeuk) {
+ ch_metaeuk_db = Channel.value(file("${params.metaeuk_db}", checkIfExists: true))
+ } else {
+ ch_metaeuk_db = Channel.empty()
+ }
- ch_versions = Channel.empty()
- ch_multiqc_files = Channel.empty()
+ // Additional info for completion email and summary
+ def busco_failed_bins = [:]
// Get checkM database if not supplied
@@ -462,7 +456,8 @@ workflow MAG {
*/
if ( !params.assembly_input ) {
- // Co-assembly: prepare grouping for MEGAHIT and for pooling for SPAdes
+
+ // Co-assembly preparation: grouping for MEGAHIT and for pooling for SPAdes
if (params.coassemble_group) {
// short reads
// group and set group as new id
@@ -501,20 +496,6 @@ workflow MAG {
ch_long_reads_grouped = ch_long_reads
}
- ch_assemblies = Channel.empty()
-
- if (!params.skip_megahit){
- MEGAHIT ( ch_short_reads_grouped )
- ch_megahit_assemblies = MEGAHIT.out.assembly
- .map { meta, assembly ->
- def meta_new = meta + [assembler: 'MEGAHIT']
- [ meta_new, assembly ]
- }
- ch_assemblies = ch_assemblies.mix(ch_megahit_assemblies)
- ch_versions = ch_versions.mix(MEGAHIT.out.versions.first())
- }
-
- // Co-assembly: pool reads for SPAdes
if ( ! params.skip_spades || ! params.skip_spadeshybrid ){
if ( params.coassemble_group ) {
if ( params.bbnorm ) {
@@ -546,15 +527,19 @@ workflow MAG {
ch_long_reads_spades = Channel.empty()
}
+ // Assembly
+
+ ch_assembled_contigs = Channel.empty()
+
if (!params.single_end && !params.skip_spades){
- SPADES ( ch_short_reads_spades )
- ch_spades_assemblies = SPADES.out.assembly
+ METASPADES ( ch_short_reads_spades.map{ meta, reads -> [meta, reads, [], []]}, [], [] )
+ ch_spades_assemblies = METASPADES.out.scaffolds
.map { meta, assembly ->
def meta_new = meta + [assembler: 'SPAdes']
[ meta_new, assembly ]
}
- ch_assemblies = ch_assemblies.mix(ch_spades_assemblies)
- ch_versions = ch_versions.mix(SPADES.out.versions.first())
+ ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies)
+ ch_versions = ch_versions.mix(METASPADES.out.versions.first())
}
if (!params.single_end && !params.skip_spadeshybrid){
@@ -564,17 +549,36 @@ workflow MAG {
ch_reads_spadeshybrid = ch_long_reads_spades
.map { meta, reads -> [ meta.id, meta, reads ] }
.combine(ch_short_reads_spades_tmp, by: 0)
- .map { id, meta_long, long_reads, meta_short, short_reads -> [ meta_short, long_reads, short_reads ] }
+ .map { id, meta_long, long_reads, meta_short, short_reads -> [ meta_short, short_reads, [], long_reads ] }
- SPADESHYBRID ( ch_reads_spadeshybrid )
- ch_spadeshybrid_assemblies = SPADESHYBRID.out.assembly
+ METASPADESHYBRID ( ch_reads_spadeshybrid, [], [] )
+ ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds
.map { meta, assembly ->
def meta_new = meta + [assembler: "SPAdesHybrid"]
[ meta_new, assembly ]
}
- ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies)
- ch_versions = ch_versions.mix(SPADESHYBRID.out.versions.first())
+ ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies)
+ ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first())
+ }
+
+ if (!params.skip_megahit){
+ MEGAHIT ( ch_short_reads_grouped )
+ ch_megahit_assemblies = MEGAHIT.out.contigs
+ .map { meta, assembly ->
+ def meta_new = meta + [assembler: 'MEGAHIT']
+ [ meta_new, assembly ]
+ }
+ ch_assembled_contigs = ch_assembled_contigs.mix(ch_megahit_assemblies)
+ ch_versions = ch_versions.mix(MEGAHIT.out.versions.first())
}
+
+
+
+ GUNZIP_ASSEMBLIES ( ch_assembled_contigs )
+ ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES .out.versions)
+
+ ch_assemblies = GUNZIP_ASSEMBLIES.out.gunzip
+
} else {
ch_assemblies_split = ch_input_assemblies
.branch { meta, assembly ->
@@ -582,11 +586,11 @@ workflow MAG {
ungzip: true
}
- GUNZIP_ASSEMBLIES(ch_assemblies_split.gzipped)
- ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES.out.versions)
+ GUNZIP_ASSEMBLYINPUT(ch_assemblies_split.gzipped)
+ ch_versions = ch_versions.mix(GUNZIP_ASSEMBLYINPUT.out.versions)
ch_assemblies = Channel.empty()
- ch_assemblies = ch_assemblies.mix(ch_assemblies_split.ungzip, GUNZIP_ASSEMBLIES.out.gunzip)
+ ch_assemblies = ch_assemblies.mix(ch_assemblies_split.ungzip, GUNZIP_ASSEMBLYINPUT.out.gunzip)
}
ch_quast_multiqc = Channel.empty()