Skip to content

Commit

Permalink
Update nf-hello-gatk to match format in hello-gatk training
Browse files Browse the repository at this point in the history
Changes:
 - Uses samplesheet input (with raw Github URLs)
 - Uses file() on reference data objects instead of channels
 - Uses reads_bam as channel name
 - calling_intervals variable becomes intervals
  • Loading branch information
adamrtalbot committed Sep 27, 2024
1 parent f9e14b1 commit b7e8e6e
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 24 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: test-pipeline

on: [push, pull_request]

# Cancel if a newer run is started
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
nextflow:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: setup-nextflow
uses: nf-core/setup-nextflow@v1

- name: run-nextflow
run: nextflow run .
Binary file removed data/ref.tar.gz
Binary file not shown.
3 changes: 3 additions & 0 deletions data/sample_bams.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://github.com/seqeralabs/nf-hello-gatk/raw/refs/heads/main/data/bam/reads_mother.bam
https://github.com/seqeralabs/nf-hello-gatk/raw/refs/heads/main/data/bam/reads_father.bam
https://github.com/seqeralabs/nf-hello-gatk/raw/refs/heads/main/data/bam/reads_son.bam
40 changes: 18 additions & 22 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -137,43 +137,39 @@ workflow {
*/

// Primary input
params.reads_bam = "${workflow.projectDir}/data/bam/*.bam"
params.reads_bam = "${workflow.projectDir}/data/sample_bams.txt"

// Accessory files
params.reference = "${workflow.projectDir}/data/ref/ref.fasta"
params.reference_index = "${workflow.projectDir}/data/ref/ref.fasta.fai"
params.reference_dict = "${workflow.projectDir}/data/ref/ref.dict"
params.calling_intervals = "${workflow.projectDir}/data/ref/intervals.bed"
params.intervals = "${workflow.projectDir}/data/ref/intervals.bed"

// Base name for final output file
params.cohort_name = "family_trio"

// Output directory
params.outdir = "results"

// Create input channel from BAM files
// We convert it to a tuple with the file name and the file path
// See https://www.nextflow.io/docs/latest/script.html#getting-file-attributes
bam_ch = Channel.fromPath(params.reads_bam, checkIfExists: true)
// Create input channel from list of input files in plain text
reads_ch = Channel.fromPath(params.reads_bam).splitText()

// Create reference channels using the fromPath channel factory
// The collect converts from a queue channel to a value channel
// See https://www.nextflow.io/docs/latest/channel.html#channel-types for details
ref_ch = Channel.fromPath(params.reference, checkIfExists: true).collect()
ref_index_ch = Channel.fromPath(params.reference_index, checkIfExists: true).collect()
ref_dict_ch = Channel.fromPath(params.reference_dict, checkIfExists: true).collect()
calling_intervals_ch = Channel.fromPath(params.calling_intervals, checkIfExists: true).collect()
// Create channels for the accessory files (reference and intervals)
ref_file = file(params.reference)
ref_index_file = file(params.reference_index)
ref_dict_file = file(params.reference_dict)
intervals_file = file(params.intervals)

// Create index file for input BAM file
SAMTOOLS_INDEX(bam_ch)
SAMTOOLS_INDEX(reads_ch)

// Call variants from the indexed BAM file
GATK_HAPLOTYPECALLER(
SAMTOOLS_INDEX.out,
ref_ch,
ref_index_ch,
ref_dict_ch,
calling_intervals_ch
ref_file,
ref_index_file,
ref_dict_file,
intervals_file
)

all_vcfs = GATK_HAPLOTYPECALLER.out[0].collect()
Expand All @@ -184,10 +180,10 @@ workflow {
all_vcfs,
all_tbis,
params.cohort_name,
ref_ch,
ref_index_ch,
ref_dict_ch,
calling_intervals_ch
ref_file,
ref_index_file,
ref_dict_file,
intervals_file
)

BCFTOOLS_STATS(
Expand Down
5 changes: 3 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
"outdir": {
"type": "string",
"description": "Output directory to write results to.",
"default": "results"
"default": "results",
"format": "path"
},
"reference": {
"type": "string",
Expand All @@ -36,7 +37,7 @@
"description": "Path to genome GATK dictionary file (.dict)",
"format": "file-path"
},
"calling_intervals": {
"intervals": {
"type": "string",
"description": "Path to intervals for variant calling in BED format (.bed)",
"format": "file-path"
Expand Down

0 comments on commit b7e8e6e

Please sign in to comment.