From b512be51a9c79daa420b3be8905f8a47140a2966 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 27 Aug 2024 10:31:57 +0200 Subject: [PATCH 01/24] Bump versions --- CHANGELOG.md | 12 ++++++++++++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6089039e..18ddc29e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev [unreleased] + +### `Added` + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + ## 3.0.3 [2024-08-27] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 10c24150..9f4e5e7e 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/mag + This report has been generated by the nf-core/mag analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mag-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 9306ae99..85be4905 100644 --- a/nextflow.config +++ b/nextflow.config @@ -390,7 +390,7 @@ manifest { description = """Assembly, binning and annotation of metagenomes""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '3.0.3' + version = '3.0.4dev' doi = '10.1093/nargab/lqac007' } From df026135c66f207d1a139ccb3501ddcfc1631867 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Sun, 8 Sep 2024 04:09:13 -0300 Subject: [PATCH 02/24] Update GTDBTk to v2.4.0 --- bin/summary_gtdbtk.py | 10 +++++----- modules.json | 2 +- modules/nf-core/gtdbtk/classifywf/environment.yml | 4 +--- modules/nf-core/gtdbtk/classifywf/main.nf | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/bin/summary_gtdbtk.py b/bin/summary_gtdbtk.py index 7ae43a09..370ea4fa 100755 --- a/bin/summary_gtdbtk.py +++ b/bin/summary_gtdbtk.py @@ -76,11 +76,11 @@ def main(args=None): columns = [ "user_genome", "classification", - "fastani_reference", - "fastani_reference_radius", - "fastani_taxonomy", - "fastani_ani", - "fastani_af", + "closest_genome_reference", + "closest_genome_reference_radius", + "closest_genome_taxonomy", + "closest_genome_ani", + "closest_genome_af", "closest_placement_reference", "closest_placement_radius", "closest_placement_taxonomy", diff --git a/modules.json b/modules.json index 0cab4e4e..b17f7dc3 100644 --- a/modules.json +++ b/modules.json @@ -124,7 +124,7 @@ }, "gtdbtk/classifywf": { "branch": "master", - "git_sha": "0735b6d2b509cbb5cf71d15fda819cd7392722fe", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "gunc/downloaddb": { diff --git a/modules/nf-core/gtdbtk/classifywf/environment.yml b/modules/nf-core/gtdbtk/classifywf/environment.yml index 8801269e..500531ea 100644 --- a/modules/nf-core/gtdbtk/classifywf/environment.yml +++ b/modules/nf-core/gtdbtk/classifywf/environment.yml @@ -1,7 +1,5 @@ -name: gtdbtk_classifywf channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::gtdbtk=2.3.2 + - bioconda::gtdbtk=2.4.0 diff --git a/modules/nf-core/gtdbtk/classifywf/main.nf b/modules/nf-core/gtdbtk/classifywf/main.nf index 14dd28a4..9fc7a32d 100644 --- a/modules/nf-core/gtdbtk/classifywf/main.nf +++ b/modules/nf-core/gtdbtk/classifywf/main.nf @@ -5,8 +5,8 @@ process GTDBTK_CLASSIFYWF { // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gtdbtk:2.3.2--pyhdfd78af_0' : - 'biocontainers/gtdbtk:2.3.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gtdbtk:2.4.0--pyhdfd78af_1' : + 'biocontainers/gtdbtk:2.4.0--pyhdfd78af_1' }" input: tuple val(meta), path("bins/*") From e4fa8a9822b697b181c5840cb74a739baf748402 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Sun, 8 Sep 2024 04:22:06 -0300 Subject: [PATCH 03/24] Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ddc29e..e12b47a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` +| Tool | Previous version | New version | +| -------- | ---------------- | ----------- | +| GTDBTk | 2.3.2 | 2.4.0 | + ### `Deprecated` ## 3.0.3 [2024-08-27] From 55803753f4774fac127c2ef129b7b33dd1d28e68 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Sun, 8 Sep 2024 04:36:19 -0300 Subject: [PATCH 04/24] Prettier --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e12b47a5..d056eefa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,9 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` -| Tool | Previous version | New version | -| -------- | ---------------- | ----------- | -| GTDBTk | 2.3.2 | 2.4.0 | +| Tool | Previous version | New version | +| ------ | ---------------- | ----------- | +| GTDBTk | 2.3.2 | 2.4.0 | ### `Deprecated` From 080eac6108f8fd7b642635af8966ad4f06480aff Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 9 Sep 2024 08:57:16 +0000 Subject: [PATCH 05/24] Bump GTDB release version, better changelog --- CHANGELOG.md | 2 ++ conf/test_full.config | 2 +- nextflow.config | 2 +- nextflow_schema.json | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d056eefa..63515e3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#664](https://github.com/nf-core/mag/pull/664) - Update GTDBTk to latest version, with updated column names, update GTDB to release 220 (by @dialvarezs) + ### `Fixed` ### `Dependencies` diff --git a/conf/test_full.config b/conf/test_full.config index d7f3365e..9a01bc58 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -22,7 +22,7 @@ params { //centrifuge_db = "s3://ngi-igenomes/test-data/mag/p_compressed+h+v.tar.gz" kraken2_db = "s3://ngi-igenomes/test-data/mag/minikraken_8GB_202003.tgz" cat_db = "s3://ngi-igenomes/test-data/mag/CAT_prepare_20210107.tar.gz" - // gtdb_db = "s3://ngi-igenomes/test-data/mag/gtdbtk_r214_data.tar.gz" + // gtdb_db = "s3://ngi-igenomes/test-data/mag/gtdbtk_r214_data.tar.gz" ## This should be updated to release 220, once we get GTDB-Tk working again skip_gtdbtk = true // TODO TEMPORARY: deactivate SPAdes due to incompatibility of container with fusion file system diff --git a/nextflow.config b/nextflow.config index 85be4905..8338e9f3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,7 +87,7 @@ params { cat_official_taxonomy = false save_cat_db = false skip_gtdbtk = false - gtdb_db = "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/auxillary_files/gtdbtk_r214_data.tar.gz" + gtdb_db = "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" gtdb_mash = null gtdbtk_min_completeness = 50.0 gtdbtk_max_contamination = 10.0 diff --git a/nextflow_schema.json b/nextflow_schema.json index 35e85825..219d6724 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -537,7 +537,7 @@ "gtdb_db": { "type": "string", "description": "Specify the location of a GTDBTK database. Can be either an uncompressed directory or a `.tar.gz` archive. If not specified will be downloaded for you when GTDBTK or binning QC is not skipped.", - "default": "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/auxillary_files/gtdbtk_r214_data.tar.gz" + "default": "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" }, "gtdb_mash": { "type": "string", From f11d9ae34e31819cb06542ca54b39bbeafada472 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 9 Sep 2024 13:51:38 +0000 Subject: [PATCH 06/24] Add support for pre-supplying bowtie2 host index --- CHANGELOG.md | 2 ++ modules/local/bowtie2_removal_align.nf | 6 +++++- modules/local/bowtie2_removal_build.nf | 6 +++--- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/mag.nf | 14 ++++++++++---- 6 files changed, 26 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ddc29e..803adad7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) + ### `Changed` ### `Fixed` diff --git a/modules/local/bowtie2_removal_align.nf b/modules/local/bowtie2_removal_align.nf index 03cb9b25..79eb0e47 100644 --- a/modules/local/bowtie2_removal_align.nf +++ b/modules/local/bowtie2_removal_align.nf @@ -26,8 +26,12 @@ process BOWTIE2_REMOVAL_ALIGN { def save_ids = (args2.contains('--host_removal_save_ids')) ? "Y" : "N" if (!meta.single_end){ """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + bowtie2 -p ${task.cpus} \ - -x ${index[0].getSimpleName()} \ + -x \$INDEX \ -1 "${reads[0]}" -2 "${reads[1]}" \ $args \ --un-conc-gz ${prefix}.unmapped_%.fastq.gz \ diff --git a/modules/local/bowtie2_removal_build.nf b/modules/local/bowtie2_removal_build.nf index ba152611..f3922094 100644 --- a/modules/local/bowtie2_removal_build.nf +++ b/modules/local/bowtie2_removal_build.nf @@ -10,14 +10,14 @@ process BOWTIE2_REMOVAL_BUILD { path fasta output: - path 'bt2_index_base*', emit: index - path "versions.yml" , emit: versions + path "*.bt2" , emit: index + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' """ mkdir bowtie - bowtie2-build --threads $task.cpus $fasta "bt2_index_base" + bowtie2-build --threads $task.cpus $fasta ${fasta.simpleName} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 85be4905..aa0ef836 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,6 +31,7 @@ params { phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false host_fasta = null + host_fasta_bowtie2index = null host_genome = null host_removal_verysensitive = false host_removal_save_ids = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 35e85825..f8064be5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -383,6 +383,11 @@ "description": "Fasta reference file for host contamination removal.", "help_text": "This parameter is mutually exclusive with `--host_genome`. The reference can be masked. Host read removal is done with Bowtie2." }, + "host_fasta_bowtie2index": { + "type": "string", + "description": "Bowtie2 index directory corresponding to --host_fasta reference file for host contamination removal.", + "help_text": "This parameter must be used in combination with --host_fasta, and should be a directory containing files from the output of `bowtie2-build`, i.e. files ending in `.bt2`" + }, "host_removal_verysensitive": { "type": "boolean", "description": "Use the `--very-sensitive` instead of the`--sensitive`setting for Bowtie 2 to map reads against the host genome." diff --git a/workflows/mag.nf b/workflows/mag.nf index f71d4218..6c158284 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -250,11 +250,17 @@ workflow MAG { } if (params.host_fasta){ - BOWTIE2_HOST_REMOVAL_BUILD ( - ch_host_fasta - ) - ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + if ( params.host_fasta_bowtie2index ) { + ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) + } else { + BOWTIE2_HOST_REMOVAL_BUILD ( + ch_host_fasta + ) + ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + } + } + ch_bowtie2_removal_host_multiqc = Channel.empty() if (params.host_fasta || params.host_genome){ BOWTIE2_HOST_REMOVAL_ALIGN ( From ca8ba9d08701acf55dfcbc2dec2c0e1e535d4bd9 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 11 Sep 2024 05:17:42 +0200 Subject: [PATCH 07/24] Improve condition so SPLIT_FASTQ skipped if only concoct selected as binner --- CHANGELOG.md | 2 ++ subworkflows/local/binning.nf | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c2c8c05..08c3c625 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) + ### `Dependencies` | Tool | Previous version | New version | diff --git a/subworkflows/local/binning.nf b/subworkflows/local/binning.nf index a07ca416..83f0f38e 100644 --- a/subworkflows/local/binning.nf +++ b/subworkflows/local/binning.nf @@ -94,7 +94,7 @@ workflow BINNING { bams: [ meta, bams, bais ] } - FASTA_BINNING_CONCOCT ( ch_concoct_input ) + FASTA_BINNING_CONCOCT ( ch_concoct_input.bins, ch_concoct_input.bams ) ch_final_bins_for_gunzip = ch_final_bins_for_gunzip.mix( FASTA_BINNING_CONCOCT.out.bins.transpose() ) ch_binning_results_gzipped_final = ch_binning_results_gzipped_final.mix( FASTA_BINNING_CONCOCT.out.bins ) ch_versions = ch_versions.mix(FASTA_BINNING_CONCOCT.out.versions) @@ -102,10 +102,12 @@ workflow BINNING { // decide which unbinned fasta files to further filter, depending on which binners selected // NOTE: CONCOCT does not produce 'unbins' itself, therefore not included here. - if ( !params.skip_metabat2 & params.skip_maxbin2 ) { + if ( !params.skip_metabat2 && params.skip_maxbin2 ) { ch_input_splitfasta = METABAT2_METABAT2.out.unbinned - } else if ( params.skip_metabat2 & !params.skip_maxbin2 ) { + } else if ( params.skip_metabat2 && !params.skip_maxbin2 ) { ch_input_splitfasta = MAXBIN2.out.unbinned_fasta + } else if ( params.skip_metabat2 && params.skip_maxbin2 ) { + ch_input_splitfasta = Channel.empty() } else { ch_input_splitfasta = METABAT2_METABAT2.out.unbinned.mix(MAXBIN2.out.unbinned_fasta) } @@ -116,11 +118,11 @@ workflow BINNING { ch_split_fasta_results_transposed = SPLIT_FASTA.out.unbinned.transpose() ch_versions = ch_versions.mix(SPLIT_FASTA.out.versions) - GUNZIP_BINS ( ch_final_bins_for_gunzip ) + GUNZIP_BINS ( ch_final_bins_for_gunzip.dump(tag: 'final_bins') ) ch_binning_results_gunzipped = GUNZIP_BINS.out.gunzip .groupTuple(by: 0) - GUNZIP_UNBINS ( ch_split_fasta_results_transposed ) + GUNZIP_UNBINS ( ch_split_fasta_results_transposed.dump(tag: 'final_unbins') ) ch_splitfasta_results_gunzipped = GUNZIP_UNBINS.out.gunzip .groupTuple(by: 0) From 9fc4153e254b6a0a9f4b804d4d394d756754379c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 12 Sep 2024 19:20:37 +0200 Subject: [PATCH 08/24] Update subworkflows/local/binning.nf --- subworkflows/local/binning.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/binning.nf b/subworkflows/local/binning.nf index 83f0f38e..6985f82e 100644 --- a/subworkflows/local/binning.nf +++ b/subworkflows/local/binning.nf @@ -118,7 +118,7 @@ workflow BINNING { ch_split_fasta_results_transposed = SPLIT_FASTA.out.unbinned.transpose() ch_versions = ch_versions.mix(SPLIT_FASTA.out.versions) - GUNZIP_BINS ( ch_final_bins_for_gunzip.dump(tag: 'final_bins') ) + GUNZIP_BINS ( ch_final_bins_for_gunzip ) ch_binning_results_gunzipped = GUNZIP_BINS.out.gunzip .groupTuple(by: 0) From fcdb5184013f7a1665dd0fa0e706fb637e582996 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 12 Sep 2024 19:20:43 +0200 Subject: [PATCH 09/24] Update subworkflows/local/binning.nf --- subworkflows/local/binning.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/binning.nf b/subworkflows/local/binning.nf index 6985f82e..51caaeb9 100644 --- a/subworkflows/local/binning.nf +++ b/subworkflows/local/binning.nf @@ -122,7 +122,7 @@ workflow BINNING { ch_binning_results_gunzipped = GUNZIP_BINS.out.gunzip .groupTuple(by: 0) - GUNZIP_UNBINS ( ch_split_fasta_results_transposed.dump(tag: 'final_unbins') ) + GUNZIP_UNBINS ( ch_split_fasta_results_transposed ) ch_splitfasta_results_gunzipped = GUNZIP_UNBINS.out.gunzip .groupTuple(by: 0) From 9d9e4eda432f18d1b8175fb6116ec12b8eb213d7 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 13 Sep 2024 16:08:39 +0200 Subject: [PATCH 10/24] Fix module failure when no bins --- modules.json | 2 +- modules/nf-core/gtdbtk/classifywf/main.nf | 55 +++++++++++------------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/modules.json b/modules.json index b17f7dc3..64207e26 100644 --- a/modules.json +++ b/modules.json @@ -124,7 +124,7 @@ }, "gtdbtk/classifywf": { "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "git_sha": "c44b5e99aea273adf455d8c40babeb3885083ff9", "installed_by": ["modules"] }, "gunc/downloaddb": { diff --git a/modules/nf-core/gtdbtk/classifywf/main.nf b/modules/nf-core/gtdbtk/classifywf/main.nf index 9fc7a32d..9aa13ee4 100644 --- a/modules/nf-core/gtdbtk/classifywf/main.nf +++ b/modules/nf-core/gtdbtk/classifywf/main.nf @@ -1,29 +1,25 @@ process GTDBTK_CLASSIFYWF { tag "${prefix}" label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gtdbtk:2.4.0--pyhdfd78af_1' : - 'biocontainers/gtdbtk:2.4.0--pyhdfd78af_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gtdbtk:2.4.0--pyhdfd78af_1' : 'biocontainers/gtdbtk:2.4.0--pyhdfd78af_1'}" input: - tuple val(meta), path("bins/*") + tuple val(meta) , path("bins/*") tuple val(db_name), path("database/*") - path(mash_db) + path mash_db output: - tuple val(meta), path("gtdbtk.${prefix}.*.summary.tsv") , emit: summary - tuple val(meta), path("gtdbtk.${prefix}.*.classify.tree.gz") , emit: tree, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.markers_summary.tsv") , emit: markers, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.msa.fasta.gz") , emit: msa, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.user_msa.fasta.gz") , emit: user_msa, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.filtered.tsv") , emit: filtered, optional: true - tuple val(meta), path("gtdbtk.${prefix}.failed_genomes.tsv") , emit: failed, optional: true - tuple val(meta), path("gtdbtk.${prefix}.log") , emit: log - tuple val(meta), path("gtdbtk.${prefix}.warnings.log") , emit: warnings - path("versions.yml") , emit: versions + tuple val(meta), path("gtdbtk.${prefix}.*.summary.tsv") , emit: summary + tuple val(meta), path("gtdbtk.${prefix}.*.classify.tree.gz") , emit: tree , optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.markers_summary.tsv"), emit: markers , optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.msa.fasta.gz") , emit: msa , optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.user_msa.fasta.gz") , emit: user_msa, optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.filtered.tsv") , emit: filtered, optional: true + tuple val(meta), path("gtdbtk.${prefix}.failed_genomes.tsv") , emit: failed , optional: true + tuple val(meta), path("gtdbtk.${prefix}.log") , emit: log + tuple val(meta), path("gtdbtk.${prefix}.warnings.log") , emit: warnings + path ("versions.yml"), emit: versions when: task.ext.when == null || task.ext.when @@ -31,7 +27,7 @@ process GTDBTK_CLASSIFYWF { script: def args = task.ext.args ?: '' def pplacer_scratch = params.gtdbtk_pplacer_scratch ? "--scratch_dir pplacer_tmp" : "" - def mash_mode = mash_db ? "--mash_db ${mash_db}" : "--skip_ani_screen" + def mash_mode = mash_db ? "--mash_db ${mash_db}" : "--skip_ani_screen" prefix = task.ext.prefix ?: "${meta.id}" """ @@ -41,27 +37,27 @@ process GTDBTK_CLASSIFYWF { fi gtdbtk classify_wf \\ - $args \\ + ${args} \\ --genome_dir bins \\ --prefix "gtdbtk.${prefix}" \\ --out_dir "\${PWD}" \\ - --cpus $task.cpus \\ - $mash_mode \\ - $pplacer_scratch \\ - --min_perc_aa $params.gtdbtk_min_perc_aa \\ - --min_af $params.gtdbtk_min_af + --cpus ${task.cpus} \\ + ${mash_mode} \\ + ${pplacer_scratch} \\ + --min_perc_aa ${params.gtdbtk_min_perc_aa} \\ + --min_af ${params.gtdbtk_min_af} ## If mash db given, classify/ and identify/ directories won't be created - if [[ -d classify/ ]]; then + if [[ -d classify/ && \$(ls -A classify/) ]]; then mv classify/* . fi - if [[ -d identify/ ]]; then + if [[ -d identify/ && \$(ls -A identify/) ]]; then mv identify/* . fi ## If nothing aligns, no output, so only run - if [[ -d align/ ]]; then + if [[ -d align/ && \$(ls -A align/) ]]; then mv align/* . fi @@ -78,7 +74,8 @@ process GTDBTK_CLASSIFYWF { """ stub: - def VERSION = '2.3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '2.3.2' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. prefix = task.ext.prefix ?: "${meta.id}" """ touch gtdbtk.${prefix}.stub.summary.tsv @@ -93,7 +90,7 @@ process GTDBTK_CLASSIFYWF { cat <<-END_VERSIONS > versions.yml "${task.process}": - gtdbtk: \$(echo "$VERSION") + gtdbtk: \$(echo "${VERSION}") END_VERSIONS """ } From a60f2e7a9b86dfc31b9bbfcf9154649c19ea6c52 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 16 Sep 2024 13:07:36 +0200 Subject: [PATCH 11/24] Address comments via ofifical module update --- modules.json | 2 +- modules/nf-core/gtdbtk/classifywf/main.nf | 4 +--- modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap | 8 ++++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/modules.json b/modules.json index 64207e26..be1b1dd3 100644 --- a/modules.json +++ b/modules.json @@ -124,7 +124,7 @@ }, "gtdbtk/classifywf": { "branch": "master", - "git_sha": "c44b5e99aea273adf455d8c40babeb3885083ff9", + "git_sha": "3fb6803fd1cf5a63998216f4254d6d4e487fab21", "installed_by": ["modules"] }, "gunc/downloaddb": { diff --git a/modules/nf-core/gtdbtk/classifywf/main.nf b/modules/nf-core/gtdbtk/classifywf/main.nf index 9aa13ee4..f0944fdc 100644 --- a/modules/nf-core/gtdbtk/classifywf/main.nf +++ b/modules/nf-core/gtdbtk/classifywf/main.nf @@ -74,8 +74,6 @@ process GTDBTK_CLASSIFYWF { """ stub: - def VERSION = '2.3.2' - // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. prefix = task.ext.prefix ?: "${meta.id}" """ touch gtdbtk.${prefix}.stub.summary.tsv @@ -90,7 +88,7 @@ process GTDBTK_CLASSIFYWF { cat <<-END_VERSIONS > versions.yml "${task.process}": - gtdbtk: \$(echo "${VERSION}") + gtdbtk: \$(echo \$(gtdbtk --version -v 2>&1) | sed "s/gtdbtk: version //; s/ Copyright.*//") END_VERSIONS """ } diff --git a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap index e821084c..eb0ee89a 100644 --- a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap +++ b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap @@ -93,7 +93,7 @@ ] ], "9": [ - "versions.yml:md5,a8ab755bce9f17684f235d49ab99f6d2" + "versions.yml:md5,2c94de2b8633b99e11881ab0193835d7" ], "failed": [ [ @@ -176,7 +176,7 @@ ] ], "versions": [ - "versions.yml:md5,a8ab755bce9f17684f235d49ab99f6d2" + "versions.yml:md5,2c94de2b8633b99e11881ab0193835d7" ], "warnings": [ [ @@ -192,8 +192,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.4" }, - "timestamp": "2024-03-26T09:39:21.632259941" + "timestamp": "2024-09-16T11:46:32.337929018" } } \ No newline at end of file From b19d4d3d70c33c8acb80c6b2efb8212d378f7a46 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 18 Sep 2024 14:25:37 +0200 Subject: [PATCH 12/24] Utilise missing GTDBK parameters, rename pplacer scratch due to suboptimal default --- conf/modules.config | 188 +++++++++--------- modules.json | 2 +- modules/nf-core/gtdbtk/classifywf/main.nf | 54 +++-- modules/nf-core/gtdbtk/classifywf/meta.yml | 3 + .../gtdbtk/classifywf/tests/main.nf.test | 7 +- .../gtdbtk/classifywf/tests/main.nf.test.snap | 8 +- nextflow.config | 2 +- nextflow_schema.json | 6 +- subworkflows/local/gtdbtk.nf | 1 + 9 files changed, 138 insertions(+), 133 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 81df5bc8..c5afc12b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,18 +21,18 @@ process { ] withName: FASTQC_RAW { - ext.args = '--quiet' + ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html" ] ext.prefix = { "${meta.id}_run${meta.run}_raw" } - tag = { "${meta.id}_run${meta.run}_raw" } + tag = { "${meta.id}_run${meta.run}_raw" } } withName: FASTP { - ext.args = [ + ext.args = [ "-q ${params.fastp_qualified_quality}", "--cut_front", "--cut_tail", @@ -53,11 +53,11 @@ process { ] ] ext.prefix = { "${meta.id}_run${meta.run}_fastp" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_PE { - ext.args = [ + ext.args = [ "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}", "--minquality ${params.adapterremoval_minquality} --trimns", @@ -77,11 +77,11 @@ process { ] ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_SE { - ext.args = [ + ext.args = [ "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1}", "--minquality ${params.adapterremoval_minquality} --trimns", @@ -93,7 +93,7 @@ process { pattern: "*.{settings}" ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_PHIX_REMOVAL_ALIGN { @@ -111,12 +111,12 @@ process { enabled: params.save_phixremoved_reads ] ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_HOST_REMOVAL_ALIGN { - ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" - ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' + ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" + ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' ext.prefix = { "${meta.id}_run${meta.run}_host_removed" } publishDir = [ [ @@ -131,37 +131,37 @@ process { enabled: params.save_hostremoved_reads ] ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: FASTQC_TRIMMED { - ext.args = '--quiet' + ext.args = '--quiet' ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } publishDir = [ path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html" ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BBMAP_BBNORM { - ext.args = [ + ext.args = [ params.bbnorm_target ? "target=${params.bbnorm_target}" : '', - params.bbnorm_min ? "min=${params.bbnorm_min}" : '', + params.bbnorm_min ? "min=${params.bbnorm_min}" : '' ].join(' ').trim() publishDir = [ [ - path : { "${params.outdir}/bbmap/bbnorm/logs" }, + path: { "${params.outdir}/bbmap/bbnorm/logs" }, enabled: params.save_bbnorm_reads, - mode : params.publish_dir_mode, + mode: params.publish_dir_mode, pattern: "*.log" ], [ - path : { "${params.outdir}/bbmap/bbnorm/"}, - mode : 'copy', + path: { "${params.outdir}/bbmap/bbnorm/" }, + mode: 'copy', enabled: params.save_bbnorm_reads, - mode : params.publish_dir_mode, + mode: params.publish_dir_mode, pattern: "*.fastq.gz" ] ] @@ -169,21 +169,21 @@ process { withName: PORECHOP_PORECHOP { publishDir = [ - path: { "${params.outdir}/QC_longreads/porechop" }, - mode: params.publish_dir_mode, - pattern: "*_trimmed.fastq", - enabled: params.save_porechop_reads - ] + path: { "${params.outdir}/QC_longreads/porechop" }, + mode: params.publish_dir_mode, + pattern: "*_trimmed.fastq", + enabled: params.save_porechop_reads + ] ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } } withName: FILTLONG { publishDir = [ - path: { "${params.outdir}/QC_longreads/Filtlong" }, - mode: params.publish_dir_mode, - pattern: "*_lr_filtlong.fastq.gz", - enabled: params.save_filtlong_reads - ] + path: { "${params.outdir}/QC_longreads/Filtlong" }, + mode: params.publish_dir_mode, + pattern: "*_lr_filtlong.fastq.gz", + enabled: params.save_filtlong_reads + ] ext.prefix = { "${meta.id}_run${meta.run}_lengthfiltered" } } @@ -206,11 +206,13 @@ process { withName: NANOPLOT_RAW { ext.prefix = 'raw' - ext.args = { [ - "-p raw_", - "--title ${meta.id}_raw", - "-c darkblue", - ].join(' ').trim() } + ext.args = { + [ + "-p raw_", + "--title ${meta.id}_raw", + "-c darkblue" + ].join(' ').trim() + } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, @@ -219,11 +221,13 @@ process { } withName: NANOPLOT_FILTERED { - ext.args = { [ - "-p filtered_", - "--title ${meta.id}_filtered", - "-c darkblue", - ].join(' ').trim() } + ext.args = { + [ + "-p filtered_", + "--title ${meta.id}_filtered", + "-c darkblue" + ].join(' ').trim() + } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, @@ -249,7 +253,7 @@ process { } withName: KRAKEN2 { - ext.args = '--quiet' + ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" }, mode: params.publish_dir_mode, @@ -276,19 +280,22 @@ process { //pattern: "*.{fa.gz,log}" //'pattern' didnt work, probably because the output is in a folder, solved with 'saveAs' withName: MEGAHIT { - ext.args = params.megahit_options ?: '' + ext.args = params.megahit_options ?: '' publishDir = [ path: { "${params.outdir}/Assembly" }, mode: params.publish_dir_mode, - saveAs: { - filename -> filename.equals('versions.yml') ? null : - filename.indexOf('.contigs.fa.gz') > 0 ? filename : - filename.indexOf('.log') > 0 ? filename : null } + saveAs: { filename -> + filename.equals('versions.yml') + ? null + : filename.indexOf('.contigs.fa.gz') > 0 + ? filename + : filename.indexOf('.log') > 0 ? filename : null + } ] } withName: SPADES { - ext.args = params.spades_options ?: '' + ext.args = params.spades_options ?: '' publishDir = [ path: { "${params.outdir}/Assembly/SPAdes" }, mode: params.publish_dir_mode, @@ -297,7 +304,7 @@ process { } withName: SPADESHYBRID { - ext.args = params.spades_options ?: '' + ext.args = params.spades_options ?: '' publishDir = [ path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, @@ -314,10 +321,10 @@ process { } withName: GENOMAD_ENDTOEND { - ext.args = [ + ext.args = [ "--cleanup", "--min-score ${params.genomad_min_score}", - "--splits ${params.genomad_splits}", + "--splits ${params.genomad_splits}" ].join(' ').trim() publishDir = [ path: { "${params.outdir}/VirusIdentification/geNomad/${meta.id}" }, @@ -327,7 +334,7 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' + ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.prefix = { "${meta.id}.assembly" } publishDir = [ [ @@ -340,7 +347,7 @@ process { mode: params.publish_dir_mode, pattern: "*.{bam,bai}", enabled: params.save_assembly_mapped_reads - ], + ] ] } @@ -352,7 +359,7 @@ process { ] } - withName: 'BIN_SUMMARY' { + withName: BIN_SUMMARY { publishDir = [ path: { "${params.outdir}/GenomeBinning" }, mode: params.publish_dir_mode, @@ -368,10 +375,10 @@ process { ] } - withName: 'BUSCO' { - ext.args = [ - params.busco_db ? '--offline' : '' - ].join(' ').trim() + withName: BUSCO { + ext.args = [ + params.busco_db ? '--offline' : '' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, @@ -406,8 +413,8 @@ process { ] } - withName: 'CHECKM_LINEAGEWF' { - tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } + withName: CHECKM_LINEAGEWF { + tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" } publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, @@ -416,9 +423,9 @@ process { ] } - withName: 'CHECKM_QA' { + withName: CHECKM_QA { ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_qa" } - ext.args = "-o 2 --tab_table" + ext.args = "-o 2 --tab_table" publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, @@ -426,7 +433,7 @@ process { ] } - withName: 'COMBINE_CHECKM_TSV' { + withName: COMBINE_CHECKM_TSV { ext.prefix = { "checkm_summary" } publishDir = [ path: { "${params.outdir}/GenomeBinning/QC" }, @@ -435,7 +442,7 @@ process { ] } - withName: 'GUNC_DOWNLOADDB' { + withName: GUNC_DOWNLOADDB { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC" }, mode: params.publish_dir_mode, @@ -445,7 +452,7 @@ process { } // Make sure to keep directory in sync with gunc_qc.nf - withName: 'GUNC_RUN' { + withName: GUNC_RUN { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${fasta.baseName}/" }, mode: params.publish_dir_mode, @@ -454,7 +461,7 @@ process { } // Make sure to keep directory in sync with gunc_qc.nf - withName: 'GUNC_MERGECHECKM' { + withName: GUNC_MERGECHECKM { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${checkm_file.baseName}" }, mode: params.publish_dir_mode, @@ -487,7 +494,12 @@ process { } withName: GTDBTK_CLASSIFYWF { - ext.args = "--extension fa" + ext.args = [ + "--extension fa", + "--min_perc_aa ${gtdbtk_min_perc_aa}", + "--min_af ${gtdbtk_min_af}", + "--pplacer_cpus ${gtdbtk_pplacer_cpus}" + ].join(' ') ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } publishDir = [ path: { "${params.outdir}/Taxonomy/GTDB-Tk/${meta.assembler}/${meta.binner}/${meta.id}" }, @@ -546,7 +558,7 @@ process { withName: BCFTOOLS_CONSENSUS { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/variant_calling/consensus" }, + path: { "${params.outdir}/Ancient_DNA/variant_calling/consensus" }, mode: params.publish_dir_mode, pattern: "*.fa" ] @@ -556,7 +568,7 @@ process { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t" publishDir = [ - path: {"${params.outdir}/Ancient_DNA/variant_calling/index" }, + path: { "${params.outdir}/Ancient_DNA/variant_calling/index" }, mode: params.publish_dir_mode, enabled: false ] @@ -565,7 +577,7 @@ process { withName: PYDAMAGE_ANALYZE { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, + path: { "${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode ] } @@ -574,7 +586,7 @@ process { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t ${params.pydamage_accuracy}" publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, + path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode ] } @@ -582,7 +594,7 @@ process { withName: SAMTOOLS_FAIDX { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/samtools/faidx" }, + path: { "${params.outdir}/Ancient_DNA/samtools/faidx" }, mode: params.publish_dir_mode, enabled: false ] @@ -616,7 +628,7 @@ process { ] ] ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" } - ext.args = [ + ext.args = [ params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}", "--unbinned", "--seed ${params.metabat_rng_seed}" @@ -629,11 +641,9 @@ process { path: { "${params.outdir}/GenomeBinning/MaxBin2/discarded" }, mode: params.publish_dir_mode, pattern: '*.tooshort.gz' - ], + ] ] ext.prefix = { "${meta.assembler}-MaxBin2-${meta.id}" } - // if no gene found, will crash so allow ignore so rest of pipeline - // completes but without MaxBin2 results } withName: ADJUST_MAXBIN2_EXT { @@ -642,11 +652,11 @@ process { path: { "${params.outdir}/GenomeBinning/MaxBin2/bins/" }, mode: params.publish_dir_mode, pattern: '*.fa.gz' - ], + ] ] } - withName: "CONCOCT_.*" { + withName: 'CONCOCT_.*' { publishDir = [ [ path: { "${params.outdir}/GenomeBinning/CONCOCT/stats/" }, @@ -705,10 +715,10 @@ process { path: { "${params.outdir}/GenomeBinning/DASTool" }, mode: params.publish_dir_mode, pattern: '*.{tsv,log,eval,seqlength}' - ], + ] ] ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" } - ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" + ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" } withName: RENAME_POSTDASTOOL { @@ -721,7 +731,6 @@ process { [ path: { "${params.outdir}/GenomeBinning/DASTool/bins" }, mode: params.publish_dir_mode, - // pattern needs to be updated in case of new binning methods pattern: '*-{MetaBAT2,MaxBin2,CONCOCT}Refined-*.fa' ] ] @@ -729,16 +738,16 @@ process { withName: TIARA_TIARA { publishDir = [ - path: { "${params.outdir}/Taxonomy/Tiara/" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } + path: { "${params.outdir}/Taxonomy/Tiara/" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } ext.prefix = { "${meta.assembler}-${meta.id}.tiara" } } withName: TIARA_CLASSIFY { - ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } + ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.bin}-${meta.id}" } } @@ -771,13 +780,12 @@ process { ] } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } diff --git a/modules.json b/modules.json index b17f7dc3..5f4eb8bb 100644 --- a/modules.json +++ b/modules.json @@ -124,7 +124,7 @@ }, "gtdbtk/classifywf": { "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "git_sha": "7b9ce4b817926f17ec82cc0099d2d0ff095a2fac", "installed_by": ["modules"] }, "gunc/downloaddb": { diff --git a/modules/nf-core/gtdbtk/classifywf/main.nf b/modules/nf-core/gtdbtk/classifywf/main.nf index 9fc7a32d..23862fee 100644 --- a/modules/nf-core/gtdbtk/classifywf/main.nf +++ b/modules/nf-core/gtdbtk/classifywf/main.nf @@ -1,37 +1,34 @@ process GTDBTK_CLASSIFYWF { tag "${prefix}" label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gtdbtk:2.4.0--pyhdfd78af_1' : - 'biocontainers/gtdbtk:2.4.0--pyhdfd78af_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gtdbtk:2.4.0--pyhdfd78af_1' : 'biocontainers/gtdbtk:2.4.0--pyhdfd78af_1'}" input: - tuple val(meta), path("bins/*") + tuple val(meta) , path("bins/*") tuple val(db_name), path("database/*") - path(mash_db) + val use_pplacer_scratch_dir + path mash_db output: - tuple val(meta), path("gtdbtk.${prefix}.*.summary.tsv") , emit: summary - tuple val(meta), path("gtdbtk.${prefix}.*.classify.tree.gz") , emit: tree, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.markers_summary.tsv") , emit: markers, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.msa.fasta.gz") , emit: msa, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.user_msa.fasta.gz") , emit: user_msa, optional: true - tuple val(meta), path("gtdbtk.${prefix}.*.filtered.tsv") , emit: filtered, optional: true - tuple val(meta), path("gtdbtk.${prefix}.failed_genomes.tsv") , emit: failed, optional: true - tuple val(meta), path("gtdbtk.${prefix}.log") , emit: log - tuple val(meta), path("gtdbtk.${prefix}.warnings.log") , emit: warnings - path("versions.yml") , emit: versions + tuple val(meta), path("gtdbtk.${prefix}.*.summary.tsv") , emit: summary + tuple val(meta), path("gtdbtk.${prefix}.*.classify.tree.gz") , emit: tree , optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.markers_summary.tsv"), emit: markers , optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.msa.fasta.gz") , emit: msa , optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.user_msa.fasta.gz") , emit: user_msa, optional: true + tuple val(meta), path("gtdbtk.${prefix}.*.filtered.tsv") , emit: filtered, optional: true + tuple val(meta), path("gtdbtk.${prefix}.failed_genomes.tsv") , emit: failed , optional: true + tuple val(meta), path("gtdbtk.${prefix}.log") , emit: log + tuple val(meta), path("gtdbtk.${prefix}.warnings.log") , emit: warnings + path ("versions.yml"), emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def pplacer_scratch = params.gtdbtk_pplacer_scratch ? "--scratch_dir pplacer_tmp" : "" - def mash_mode = mash_db ? "--mash_db ${mash_db}" : "--skip_ani_screen" + def pplacer_scratch = use_pplacer_scratch_dir ? "--scratch_dir pplacer_tmp" : "" + def mash_mode = mash_db ? "--mash_db ${mash_db}" : "--skip_ani_screen" prefix = task.ext.prefix ?: "${meta.id}" """ @@ -41,27 +38,25 @@ process GTDBTK_CLASSIFYWF { fi gtdbtk classify_wf \\ - $args \\ + ${args} \\ --genome_dir bins \\ --prefix "gtdbtk.${prefix}" \\ --out_dir "\${PWD}" \\ - --cpus $task.cpus \\ - $mash_mode \\ - $pplacer_scratch \\ - --min_perc_aa $params.gtdbtk_min_perc_aa \\ - --min_af $params.gtdbtk_min_af + --cpus ${task.cpus} \\ + ${mash_mode} \\ + ${pplacer_scratch} ## If mash db given, classify/ and identify/ directories won't be created - if [[ -d classify/ ]]; then + if [[ -d classify/ && \$(ls -A classify/) ]]; then mv classify/* . fi - if [[ -d identify/ ]]; then + if [[ -d identify/ && \$(ls -A identify/) ]]; then mv identify/* . fi ## If nothing aligns, no output, so only run - if [[ -d align/ ]]; then + if [[ -d align/ && \$(ls -A align/) ]]; then mv align/* . fi @@ -78,7 +73,6 @@ process GTDBTK_CLASSIFYWF { """ stub: - def VERSION = '2.3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. prefix = task.ext.prefix ?: "${meta.id}" """ touch gtdbtk.${prefix}.stub.summary.tsv @@ -93,7 +87,7 @@ process GTDBTK_CLASSIFYWF { cat <<-END_VERSIONS > versions.yml "${task.process}": - gtdbtk: \$(echo "$VERSION") + gtdbtk: \$(echo \$(gtdbtk --version -v 2>&1) | sed "s/gtdbtk: version //; s/ Copyright.*//") END_VERSIONS """ } diff --git a/modules/nf-core/gtdbtk/classifywf/meta.yml b/modules/nf-core/gtdbtk/classifywf/meta.yml index d85f9966..0667dcd6 100644 --- a/modules/nf-core/gtdbtk/classifywf/meta.yml +++ b/modules/nf-core/gtdbtk/classifywf/meta.yml @@ -30,6 +30,9 @@ input: type: file description: The local copy of the taxonomic database used by GTDB-tk (unzipped copy) pattern: "*" + - use_pplacer_scratch_dir: + type: boolean + description: Set to true to reduce pplacer memory usage by writing to disk (slower) - mash_db: type: file description: The local copy of the Mash sketch database used by GTDB-tk if `ani_screen` mode is used (optional) diff --git a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test index 60bedb68..deca962d 100644 --- a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test +++ b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test @@ -26,7 +26,8 @@ nextflow_process { ] ] input[1] = [[], []] - input[2] = [] + input[2] = false + input[3] = [] """ } } @@ -35,9 +36,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() } - ) + ) } - } - } diff --git a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap index e821084c..eb0ee89a 100644 --- a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap +++ b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap @@ -93,7 +93,7 @@ ] ], "9": [ - "versions.yml:md5,a8ab755bce9f17684f235d49ab99f6d2" + "versions.yml:md5,2c94de2b8633b99e11881ab0193835d7" ], "failed": [ [ @@ -176,7 +176,7 @@ ] ], "versions": [ - "versions.yml:md5,a8ab755bce9f17684f235d49ab99f6d2" + "versions.yml:md5,2c94de2b8633b99e11881ab0193835d7" ], "warnings": [ [ @@ -192,8 +192,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.4" }, - "timestamp": "2024-03-26T09:39:21.632259941" + "timestamp": "2024-09-16T11:46:32.337929018" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index faab3eba..849922d9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -95,7 +95,7 @@ params { gtdbtk_min_perc_aa = 10 gtdbtk_min_af = 0.65 gtdbtk_pplacer_cpus = 1 - gtdbtk_pplacer_scratch = true + gtdbtk_pplacer_scratchoff = false // long read preprocessing options skip_adapter_trimming = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b6b4c07..78ccdabc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -584,11 +584,11 @@ "description": "Number of CPUs used for the by GTDB-Tk run tool pplacer.", "help_text": "A low number of CPUs helps to reduce the memory required/reported by GTDB-Tk. See also the [GTDB-Tk documentation](https://ecogenomics.github.io/GTDBTk/faq.html#gtdb-tk-reaches-the-memory-limit-pplacer-crashes)." }, - "gtdbtk_pplacer_scratch": { + "gtdbtk_pplacer_scratchoff": { "type": "boolean", "default": true, - "description": "Reduce GTDB-Tk memory consumption by running pplacer in a setting writing to disk.", - "help_text": "Will be slower. Set to `false` to turn this off." + "description": "Speed up pplacer step of GTDB-Tk by loading to memory.", + "help_text": "Will be faster than writing to disk (default setting), however at the expense of much larger memory (RAM) requirements for GDTBTK/CLASSIFY." }, "genomad_db": { "type": "string", diff --git a/subworkflows/local/gtdbtk.nf b/subworkflows/local/gtdbtk.nf index 21ef25b2..c5893f16 100644 --- a/subworkflows/local/gtdbtk.nf +++ b/subworkflows/local/gtdbtk.nf @@ -90,6 +90,7 @@ workflow GTDBTK { GTDBTK_CLASSIFYWF ( ch_filtered_bins.passed.groupTuple(), ch_db_for_gtdbtk, + params.gtdbtk_pplacer_scratchoff ? false : true gtdb_mash ) From 1059e3658649757b05a37856df7b18e7fba0074e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 18 Sep 2024 14:29:17 +0200 Subject: [PATCH 13/24] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08c3c625..0a310d99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_scratchoff to run GTDBTk in memory mode rather than write to disk (reported by harper357, fixed by @jfy133) ### `Changed` @@ -16,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Re-add missing GTDBTk parameters into GTDBTk module (reported by harper357, fixed by @jfy133) ### `Dependencies` @@ -25,6 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` +- [#670](https://github.com/nf-core/mag/pull/670) - Deprecated --gtdbtk_pplacer_scratch due to uninuitive usage (reported by harper357, fixed by @jfy133) + ## 3.0.3 [2024-08-27] ### `Added` From a3ce46a3ccfd620cf9eda8ae52fe425a148e660d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 18 Sep 2024 14:35:31 +0200 Subject: [PATCH 14/24] Specify they are parameters --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c5afc12b..44d8fbbc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -496,9 +496,9 @@ process { withName: GTDBTK_CLASSIFYWF { ext.args = [ "--extension fa", - "--min_perc_aa ${gtdbtk_min_perc_aa}", - "--min_af ${gtdbtk_min_af}", - "--pplacer_cpus ${gtdbtk_pplacer_cpus}" + "--min_perc_aa ${params.gtdbtk_min_perc_aa}", + "--min_af ${params.gtdbtk_min_af}", + "--pplacer_cpus ${params.gtdbtk_pplacer_cpus}" ].join(' ') ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } publishDir = [ From 459f41853d4fe7e18d9c3eb9500e557e55f79f1d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 14:27:04 +0200 Subject: [PATCH 15/24] Typo fixes after review from @harper357 --- CHANGELOG.md | 2 +- subworkflows/local/gtdbtk.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a310d99..abd2b0c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) -- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_scratchoff to run GTDBTk in memory mode rather than write to disk (reported by harper357, fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_scratchoff to run GTDBTk in memory mode rather than write to disk (reported by @harper357, fixed by @jfy133) ### `Changed` diff --git a/subworkflows/local/gtdbtk.nf b/subworkflows/local/gtdbtk.nf index c5893f16..0216e27a 100644 --- a/subworkflows/local/gtdbtk.nf +++ b/subworkflows/local/gtdbtk.nf @@ -90,7 +90,7 @@ workflow GTDBTK { GTDBTK_CLASSIFYWF ( ch_filtered_bins.passed.groupTuple(), ch_db_for_gtdbtk, - params.gtdbtk_pplacer_scratchoff ? false : true + params.gtdbtk_pplacer_scratchoff ? false : true, gtdb_mash ) From 6183ef013d4a2072da24afe153a3fa7b0c9dce3b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 14:33:37 +0200 Subject: [PATCH 16/24] REname scratchoff to more intiuitive name to match behaviour --- CHANGELOG.md | 4 ++-- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- subworkflows/local/gtdbtk.nf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index abd2b0c0..99765627 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) -- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_scratchoff to run GTDBTk in memory mode rather than write to disk (reported by @harper357, fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_useram to run GTDBTk in memory mode rather than write to disk (reported by @harper357, fixed by @jfy133) ### `Changed` @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` -- [#670](https://github.com/nf-core/mag/pull/670) - Deprecated --gtdbtk_pplacer_scratch due to uninuitive usage (reported by harper357, fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Deprecated --gtdbtk_pplacer_scratch due to unintuitive usage (reported by harper357, fixed by @jfy133) ## 3.0.3 [2024-08-27] diff --git a/nextflow.config b/nextflow.config index 849922d9..3b43df33 100644 --- a/nextflow.config +++ b/nextflow.config @@ -95,7 +95,7 @@ params { gtdbtk_min_perc_aa = 10 gtdbtk_min_af = 0.65 gtdbtk_pplacer_cpus = 1 - gtdbtk_pplacer_scratchoff = false + gtdbtk_pplacer_useram = false // long read preprocessing options skip_adapter_trimming = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 78ccdabc..dab2eb50 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -584,9 +584,9 @@ "description": "Number of CPUs used for the by GTDB-Tk run tool pplacer.", "help_text": "A low number of CPUs helps to reduce the memory required/reported by GTDB-Tk. See also the [GTDB-Tk documentation](https://ecogenomics.github.io/GTDBTk/faq.html#gtdb-tk-reaches-the-memory-limit-pplacer-crashes)." }, - "gtdbtk_pplacer_scratchoff": { + "gtdbtk_pplacer_useram": { "type": "boolean", - "default": true, + "default": false, "description": "Speed up pplacer step of GTDB-Tk by loading to memory.", "help_text": "Will be faster than writing to disk (default setting), however at the expense of much larger memory (RAM) requirements for GDTBTK/CLASSIFY." }, diff --git a/subworkflows/local/gtdbtk.nf b/subworkflows/local/gtdbtk.nf index 0216e27a..6da5680d 100644 --- a/subworkflows/local/gtdbtk.nf +++ b/subworkflows/local/gtdbtk.nf @@ -90,7 +90,7 @@ workflow GTDBTK { GTDBTK_CLASSIFYWF ( ch_filtered_bins.passed.groupTuple(), ch_db_for_gtdbtk, - params.gtdbtk_pplacer_scratchoff ? false : true, + params.gtdbtk_pplacer_useram ? false : true, gtdb_mash ) From 3b199432cd4e5332138dcfa7d7c40351ca1e1486 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 14:48:20 +0200 Subject: [PATCH 17/24] Fix missing GTDBTk files in outdir --- CHANGELOG.md | 1 + conf/modules.config | 183 ++++++++++++++++++++++---------------------- 2 files changed, 94 insertions(+), 90 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08c3c625..31ae78bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) +- [#672](https://github.com/nf-core/mag/pull/672) - Fix GTDB-Tk per-sample TSV files not being published in output directory (reported by @jhayer, fix by @jfy133) ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 81df5bc8..b786a072 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,18 +21,18 @@ process { ] withName: FASTQC_RAW { - ext.args = '--quiet' + ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html" ] ext.prefix = { "${meta.id}_run${meta.run}_raw" } - tag = { "${meta.id}_run${meta.run}_raw" } + tag = { "${meta.id}_run${meta.run}_raw" } } withName: FASTP { - ext.args = [ + ext.args = [ "-q ${params.fastp_qualified_quality}", "--cut_front", "--cut_tail", @@ -53,11 +53,11 @@ process { ] ] ext.prefix = { "${meta.id}_run${meta.run}_fastp" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_PE { - ext.args = [ + ext.args = [ "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}", "--minquality ${params.adapterremoval_minquality} --trimns", @@ -77,11 +77,11 @@ process { ] ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_SE { - ext.args = [ + ext.args = [ "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1}", "--minquality ${params.adapterremoval_minquality} --trimns", @@ -93,7 +93,7 @@ process { pattern: "*.{settings}" ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_PHIX_REMOVAL_ALIGN { @@ -111,12 +111,12 @@ process { enabled: params.save_phixremoved_reads ] ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_HOST_REMOVAL_ALIGN { - ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" - ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' + ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" + ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' ext.prefix = { "${meta.id}_run${meta.run}_host_removed" } publishDir = [ [ @@ -131,37 +131,37 @@ process { enabled: params.save_hostremoved_reads ] ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: FASTQC_TRIMMED { - ext.args = '--quiet' + ext.args = '--quiet' ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } publishDir = [ path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html" ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BBMAP_BBNORM { - ext.args = [ + ext.args = [ params.bbnorm_target ? "target=${params.bbnorm_target}" : '', - params.bbnorm_min ? "min=${params.bbnorm_min}" : '', + params.bbnorm_min ? "min=${params.bbnorm_min}" : '' ].join(' ').trim() publishDir = [ [ - path : { "${params.outdir}/bbmap/bbnorm/logs" }, + path: { "${params.outdir}/bbmap/bbnorm/logs" }, enabled: params.save_bbnorm_reads, - mode : params.publish_dir_mode, + mode: params.publish_dir_mode, pattern: "*.log" ], [ - path : { "${params.outdir}/bbmap/bbnorm/"}, - mode : 'copy', + path: { "${params.outdir}/bbmap/bbnorm/" }, + mode: 'copy', enabled: params.save_bbnorm_reads, - mode : params.publish_dir_mode, + mode: params.publish_dir_mode, pattern: "*.fastq.gz" ] ] @@ -169,21 +169,21 @@ process { withName: PORECHOP_PORECHOP { publishDir = [ - path: { "${params.outdir}/QC_longreads/porechop" }, - mode: params.publish_dir_mode, - pattern: "*_trimmed.fastq", - enabled: params.save_porechop_reads - ] + path: { "${params.outdir}/QC_longreads/porechop" }, + mode: params.publish_dir_mode, + pattern: "*_trimmed.fastq", + enabled: params.save_porechop_reads + ] ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } } withName: FILTLONG { publishDir = [ - path: { "${params.outdir}/QC_longreads/Filtlong" }, - mode: params.publish_dir_mode, - pattern: "*_lr_filtlong.fastq.gz", - enabled: params.save_filtlong_reads - ] + path: { "${params.outdir}/QC_longreads/Filtlong" }, + mode: params.publish_dir_mode, + pattern: "*_lr_filtlong.fastq.gz", + enabled: params.save_filtlong_reads + ] ext.prefix = { "${meta.id}_run${meta.run}_lengthfiltered" } } @@ -206,11 +206,13 @@ process { withName: NANOPLOT_RAW { ext.prefix = 'raw' - ext.args = { [ - "-p raw_", - "--title ${meta.id}_raw", - "-c darkblue", - ].join(' ').trim() } + ext.args = { + [ + "-p raw_", + "--title ${meta.id}_raw", + "-c darkblue" + ].join(' ').trim() + } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, @@ -219,11 +221,13 @@ process { } withName: NANOPLOT_FILTERED { - ext.args = { [ - "-p filtered_", - "--title ${meta.id}_filtered", - "-c darkblue", - ].join(' ').trim() } + ext.args = { + [ + "-p filtered_", + "--title ${meta.id}_filtered", + "-c darkblue" + ].join(' ').trim() + } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, @@ -249,7 +253,7 @@ process { } withName: KRAKEN2 { - ext.args = '--quiet' + ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" }, mode: params.publish_dir_mode, @@ -276,19 +280,22 @@ process { //pattern: "*.{fa.gz,log}" //'pattern' didnt work, probably because the output is in a folder, solved with 'saveAs' withName: MEGAHIT { - ext.args = params.megahit_options ?: '' + ext.args = params.megahit_options ?: '' publishDir = [ path: { "${params.outdir}/Assembly" }, mode: params.publish_dir_mode, - saveAs: { - filename -> filename.equals('versions.yml') ? null : - filename.indexOf('.contigs.fa.gz') > 0 ? filename : - filename.indexOf('.log') > 0 ? filename : null } + saveAs: { filename -> + filename.equals('versions.yml') + ? null + : filename.indexOf('.contigs.fa.gz') > 0 + ? filename + : filename.indexOf('.log') > 0 ? filename : null + } ] } withName: SPADES { - ext.args = params.spades_options ?: '' + ext.args = params.spades_options ?: '' publishDir = [ path: { "${params.outdir}/Assembly/SPAdes" }, mode: params.publish_dir_mode, @@ -297,7 +304,7 @@ process { } withName: SPADESHYBRID { - ext.args = params.spades_options ?: '' + ext.args = params.spades_options ?: '' publishDir = [ path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, @@ -314,10 +321,10 @@ process { } withName: GENOMAD_ENDTOEND { - ext.args = [ + ext.args = [ "--cleanup", "--min-score ${params.genomad_min_score}", - "--splits ${params.genomad_splits}", + "--splits ${params.genomad_splits}" ].join(' ').trim() publishDir = [ path: { "${params.outdir}/VirusIdentification/geNomad/${meta.id}" }, @@ -327,7 +334,7 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' + ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.prefix = { "${meta.id}.assembly" } publishDir = [ [ @@ -340,7 +347,7 @@ process { mode: params.publish_dir_mode, pattern: "*.{bam,bai}", enabled: params.save_assembly_mapped_reads - ], + ] ] } @@ -352,7 +359,7 @@ process { ] } - withName: 'BIN_SUMMARY' { + withName: BIN_SUMMARY { publishDir = [ path: { "${params.outdir}/GenomeBinning" }, mode: params.publish_dir_mode, @@ -368,10 +375,10 @@ process { ] } - withName: 'BUSCO' { - ext.args = [ - params.busco_db ? '--offline' : '' - ].join(' ').trim() + withName: BUSCO { + ext.args = [ + params.busco_db ? '--offline' : '' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, @@ -406,8 +413,8 @@ process { ] } - withName: 'CHECKM_LINEAGEWF' { - tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } + withName: CHECKM_LINEAGEWF { + tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" } publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, @@ -416,9 +423,9 @@ process { ] } - withName: 'CHECKM_QA' { + withName: CHECKM_QA { ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_qa" } - ext.args = "-o 2 --tab_table" + ext.args = "-o 2 --tab_table" publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, @@ -426,7 +433,7 @@ process { ] } - withName: 'COMBINE_CHECKM_TSV' { + withName: COMBINE_CHECKM_TSV { ext.prefix = { "checkm_summary" } publishDir = [ path: { "${params.outdir}/GenomeBinning/QC" }, @@ -435,7 +442,7 @@ process { ] } - withName: 'GUNC_DOWNLOADDB' { + withName: GUNC_DOWNLOADDB { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC" }, mode: params.publish_dir_mode, @@ -445,7 +452,7 @@ process { } // Make sure to keep directory in sync with gunc_qc.nf - withName: 'GUNC_RUN' { + withName: GUNC_RUN { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${fasta.baseName}/" }, mode: params.publish_dir_mode, @@ -454,7 +461,7 @@ process { } // Make sure to keep directory in sync with gunc_qc.nf - withName: 'GUNC_MERGECHECKM' { + withName: GUNC_MERGECHECKM { publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${checkm_file.baseName}" }, mode: params.publish_dir_mode, @@ -492,7 +499,7 @@ process { publishDir = [ path: { "${params.outdir}/Taxonomy/GTDB-Tk/${meta.assembler}/${meta.binner}/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{log,tasv,tree.gz,fasta,fasta.gz}" + pattern: "*.{log,tsv,tree.gz,fasta,fasta.gz}" ] } @@ -546,7 +553,7 @@ process { withName: BCFTOOLS_CONSENSUS { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/variant_calling/consensus" }, + path: { "${params.outdir}/Ancient_DNA/variant_calling/consensus" }, mode: params.publish_dir_mode, pattern: "*.fa" ] @@ -556,7 +563,7 @@ process { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t" publishDir = [ - path: {"${params.outdir}/Ancient_DNA/variant_calling/index" }, + path: { "${params.outdir}/Ancient_DNA/variant_calling/index" }, mode: params.publish_dir_mode, enabled: false ] @@ -565,7 +572,7 @@ process { withName: PYDAMAGE_ANALYZE { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, + path: { "${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode ] } @@ -574,7 +581,7 @@ process { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t ${params.pydamage_accuracy}" publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, + path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode ] } @@ -582,7 +589,7 @@ process { withName: SAMTOOLS_FAIDX { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/samtools/faidx" }, + path: { "${params.outdir}/Ancient_DNA/samtools/faidx" }, mode: params.publish_dir_mode, enabled: false ] @@ -616,7 +623,7 @@ process { ] ] ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" } - ext.args = [ + ext.args = [ params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}", "--unbinned", "--seed ${params.metabat_rng_seed}" @@ -629,11 +636,9 @@ process { path: { "${params.outdir}/GenomeBinning/MaxBin2/discarded" }, mode: params.publish_dir_mode, pattern: '*.tooshort.gz' - ], + ] ] ext.prefix = { "${meta.assembler}-MaxBin2-${meta.id}" } - // if no gene found, will crash so allow ignore so rest of pipeline - // completes but without MaxBin2 results } withName: ADJUST_MAXBIN2_EXT { @@ -642,11 +647,11 @@ process { path: { "${params.outdir}/GenomeBinning/MaxBin2/bins/" }, mode: params.publish_dir_mode, pattern: '*.fa.gz' - ], + ] ] } - withName: "CONCOCT_.*" { + withName: 'CONCOCT_.*' { publishDir = [ [ path: { "${params.outdir}/GenomeBinning/CONCOCT/stats/" }, @@ -705,10 +710,10 @@ process { path: { "${params.outdir}/GenomeBinning/DASTool" }, mode: params.publish_dir_mode, pattern: '*.{tsv,log,eval,seqlength}' - ], + ] ] ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" } - ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" + ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" } withName: RENAME_POSTDASTOOL { @@ -721,7 +726,6 @@ process { [ path: { "${params.outdir}/GenomeBinning/DASTool/bins" }, mode: params.publish_dir_mode, - // pattern needs to be updated in case of new binning methods pattern: '*-{MetaBAT2,MaxBin2,CONCOCT}Refined-*.fa' ] ] @@ -729,16 +733,16 @@ process { withName: TIARA_TIARA { publishDir = [ - path: { "${params.outdir}/Taxonomy/Tiara/" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } + path: { "${params.outdir}/Taxonomy/Tiara/" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } ext.prefix = { "${meta.assembler}-${meta.id}.tiara" } } withName: TIARA_CLASSIFY { - ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } + ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.bin}-${meta.id}" } } @@ -771,13 +775,12 @@ process { ] } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } From bfc2c0baf63c052d0fbed7174b165b6e24613d98 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 14:54:46 +0200 Subject: [PATCH 18/24] Update CHANGELOG.md Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31ae78bd..cf092806 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) -- [#672](https://github.com/nf-core/mag/pull/672) - Fix GTDB-Tk per-sample TSV files not being published in output directory (reported by @jhayer, fix by @jfy133) +- [#672](https://github.com/nf-core/mag/pull/673) - Fix GTDB-Tk per-sample TSV files not being published in output directory (reported by @jhayer, fix by @jfy133) ### `Dependencies` From aa1f3d9996a47eb44b0334d2688d8bdb01064961 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 23 Sep 2024 18:09:57 +0000 Subject: [PATCH 19/24] Add exit code 12 to spades(hybrid), and standardise errorStrategy format --- CHANGELOG.md | 1 + conf/base.config | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf092806..ab0a3360 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#664](https://github.com/nf-core/mag/pull/664) - Update GTDBTk to latest version, with updated column names, update GTDB to release 220 (by @dialvarezs) +- [#675](https://github.com/nf-core/mag/pull/675) - Added exit code 12 to valid SPAdes retry codes, due to OOM errors from spades-hammer (reported by @bawee, fix by @jfy133) ### `Fixed` diff --git a/conf/base.config b/conf/base.config index 2928a99b..839612ad 100644 --- a/conf/base.config +++ b/conf/base.config @@ -123,7 +123,7 @@ process { cpus = { check_megahit_cpus (8, task.attempt ) } memory = { check_max (40.GB * task.attempt, 'memory' ) } time = { check_max (16.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139,250] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 250) ? 'retry' : 'finish' } } //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)! //exponential increase of memory and time with attempts, keep number of threads to enable reproducibility @@ -131,14 +131,14 @@ process { cpus = { check_spades_cpus (10, task.attempt) } memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } - errorStrategy = { task.exitStatus in [143,137,21,1] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 12 + 21 + 12 + 1) ? 'retry' : 'finish' } maxRetries = 5 } withName: SPADESHYBRID { cpus = { check_spadeshybrid_cpus (10, task.attempt) } memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } - errorStrategy = { task.exitStatus in [143,137,21,1] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 12 + 21 + 12 + 1) ? 'retry' : 'finish' } maxRetries = 5 } //returns exit code 247 when running out of memory @@ -146,7 +146,7 @@ process { cpus = { check_max (2 * task.attempt, 'cpus' ) } memory = { check_max (8.GB * task.attempt, 'memory' ) } time = { check_max (8.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 247) ? 'retry' : 'finish' } } withName: METABAT2_METABAT2 { cpus = { check_max (8 * task.attempt, 'cpus' ) } @@ -168,6 +168,6 @@ process { withName: DASTOOL_DASTOOL { // if SCGs not found, bins cannot be assigned and DAS_tool will die with exit status 1 - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' } } } From 868cec1a830eb7f7000bb6f14581255214bf2149 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 24 Sep 2024 11:06:40 +0200 Subject: [PATCH 20/24] Apply suggestions from code review Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- conf/base.config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab0a3360..d8bebe99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#664](https://github.com/nf-core/mag/pull/664) - Update GTDBTk to latest version, with updated column names, update GTDB to release 220 (by @dialvarezs) -- [#675](https://github.com/nf-core/mag/pull/675) - Added exit code 12 to valid SPAdes retry codes, due to OOM errors from spades-hammer (reported by @bawee, fix by @jfy133) +- [#676](https://github.com/nf-core/mag/pull/676) - Added exit code 12 to valid SPAdes retry codes, due to OOM errors from spades-hammer (reported by @bawee, fix by @jfy133) ### `Fixed` diff --git a/conf/base.config b/conf/base.config index 839612ad..1e2540f3 100644 --- a/conf/base.config +++ b/conf/base.config @@ -131,14 +131,14 @@ process { cpus = { check_spades_cpus (10, task.attempt) } memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } - errorStrategy = { task.exitStatus in ((130..145) + 104 + 12 + 21 + 12 + 1) ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 21 + 12 + 1) ? 'retry' : 'finish' } maxRetries = 5 } withName: SPADESHYBRID { cpus = { check_spadeshybrid_cpus (10, task.attempt) } memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } - errorStrategy = { task.exitStatus in ((130..145) + 104 + 12 + 21 + 12 + 1) ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 21 + 12 + 1) ? 'retry' : 'finish' } maxRetries = 5 } //returns exit code 247 when running out of memory From ce9e541cd7d7b6ddf80783707535105349334cd0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 2 Oct 2024 13:42:41 +0200 Subject: [PATCH 21/24] Bump version for release --- CHANGELOG.md | 4 ++-- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76ac357c..d9c81bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## dev [unreleased] +## 3.1.0 [2024-10-02] ### `Added` - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) -- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_useram to run GTDBTk in memory mode rather than write to disk (reported by @harper357, fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_useram to run GTDBTk in memory mode rather than write to disk (requested by @harper357, fixed by @jfy133) ### `Changed` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 9f4e5e7e..8dcc79aa 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/mag + This report has been generated by the nf-core/mag analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mag-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 3b43df33..312908fc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -391,7 +391,7 @@ manifest { description = """Assembly, binning and annotation of metagenomes""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '3.0.4dev' + version = '3.1.0' doi = '10.1093/nargab/lqac007' } From 63ce1cb2238ddcfea0998e4668e50f78851281d3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 2 Oct 2024 13:44:04 +0200 Subject: [PATCH 22/24] Harshil align --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 312908fc..2ea9d994 100644 --- a/nextflow.config +++ b/nextflow.config @@ -95,7 +95,7 @@ params { gtdbtk_min_perc_aa = 10 gtdbtk_min_af = 0.65 gtdbtk_pplacer_cpus = 1 - gtdbtk_pplacer_useram = false + gtdbtk_pplacer_useram = false // long read preprocessing options skip_adapter_trimming = false From 375c018a396fc485bcd2cb501b89306f7f71bf46 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Oct 2024 10:07:13 +0200 Subject: [PATCH 23/24] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- CHANGELOG.md | 6 +++--- nextflow_schema.json | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9c81bd1..e08d2a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 3.1.0 [2024-10-02] +## 3.1.0 [2024-10-03] ### `Added` - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) -- [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_useram to run GTDBTk in memory mode rather than write to disk (requested by @harper357, fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Added `--gtdbtk_pplacer_useram` to run GTDBTk in memory mode rather than write to disk (requested by @harper357, fixed by @jfy133) ### `Changed` @@ -29,7 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` -- [#670](https://github.com/nf-core/mag/pull/670) - Deprecated --gtdbtk_pplacer_scratch due to unintuitive usage (reported by harper357, fixed by @jfy133) +- [#670](https://github.com/nf-core/mag/pull/670) - Deprecated `--gtdbtk_pplacer_scratch` due to unintuitive usage (reported by harper357, fixed by @jfy133) ## 3.0.3 [2024-08-27] diff --git a/nextflow_schema.json b/nextflow_schema.json index dab2eb50..aaff9835 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -385,8 +385,8 @@ }, "host_fasta_bowtie2index": { "type": "string", - "description": "Bowtie2 index directory corresponding to --host_fasta reference file for host contamination removal.", - "help_text": "This parameter must be used in combination with --host_fasta, and should be a directory containing files from the output of `bowtie2-build`, i.e. files ending in `.bt2`" + "description": "Bowtie2 index directory corresponding to `--host_fasta` reference file for host contamination removal.", + "help_text": "This parameter must be used in combination with `--host_fasta`, and should be a directory containing files from the output of `bowtie2-build`, i.e. files ending in `.bt2`" }, "host_removal_verysensitive": { "type": "boolean", From a4d80ac704f7b1163b836863fd57a6f345866e6a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Oct 2024 16:08:27 +0200 Subject: [PATCH 24/24] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e08d2a9f..a80183c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 3.1.0 [2024-10-03] +## 3.1.0 [2024-10-04] ### `Added`