Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Submitting PR for bwa for dragen duplex #66

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,9 @@ workflows:
subclass: WDL
primaryDescriptorPath: /CNV_Array_Prober/cnvArrayProber.wdl
testParameterFiles:
- /CNV_Array_Prober/cnvArrayProber.inputs.json
- /CNV_Array_Prober/cnvArrayProber.inputs.json
- name: RevertBamAndBwaAln
subclass: WDL
primaryDescriptorPath: /Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.wdl
testParameterFiles:
- /Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.inputs.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Import BaitSetNameCheck task
import "../../checkBaitSetName/checkBaitSetName.dev.wdl" as checkBaitSetName
import "../RevertBamAndBwaAln/subworkflows/CopyUmiFromReadName.wdl" as copyUmi

workflow GenerateDuplexConsensusBams {

Expand Down Expand Up @@ -43,6 +44,8 @@ workflow GenerateDuplexConsensusBams {
Int? num_clip_bases_three_prime
Boolean? run_bwa_mem_on_raw
Boolean run_bwa_mem_on_raw_or_default = select_first([run_bwa_mem_on_raw, false])
Boolean? copy_umi_from_readname
Boolean copy_umi_or_default = select_first([copy_umi_from_readname, false])
Int compression_level

# scripts
Expand All @@ -65,6 +68,14 @@ workflow GenerateDuplexConsensusBams {
target_intervals = target_intervals,
fail_task = fail_on_intervals_mismatch
}
if(copy_umi_or_default){
call copyUmi.CopyUmiTask as CopyUmiTask {
input:
bam_file = bam_file,
bam_index = bam_index,
base_name = base_name
}
}
# Get the version of BWA that we are using.
call GetBwaVersion {
input:
Expand All @@ -77,8 +88,8 @@ workflow GenerateDuplexConsensusBams {
call DownsampleSam {
input:
bloodbiopsydocker = bloodbiopsydocker,
bam_file = bam_file,
bam_index = bam_index,
bam_file = select_first([CopyUmiTask.umi_extracted_bam, bam_file]),
bam_index = select_first([CopyUmiTask.umi_extracted_bam_index, bam_index]),
downsample_probability = downsample_probability,
base_name = base_name,
preemptible_attempts = preemptible_attempts,
Expand All @@ -91,7 +102,7 @@ workflow GenerateDuplexConsensusBams {
call QuerySortSam {
input:
bloodbiopsydocker = bloodbiopsydocker,
input_bam = select_first([DownsampleSam.output_bam, bam_file]),
input_bam = select_first([DownsampleSam.output_bam, CopyUmiTask.umi_extracted_bam, bam_file]),
base_name = base_name,
preemptible_attempts = preemptible_attempts,
disk_pad = disk_pad
Expand All @@ -117,8 +128,8 @@ workflow GenerateDuplexConsensusBams {
}
}

File preprocessed_raw_bam = select_first([AlignRawBamWithBwaMem.output_bam, DownsampleSam.output_bam, bam_file])
File preprocessed_raw_bam_index = select_first([AlignRawBamWithBwaMem.output_bam_index, DownsampleSam.output_bam_index, bam_index])
File preprocessed_raw_bam = select_first([AlignRawBamWithBwaMem.output_bam, DownsampleSam.output_bam, CopyUmiTask.umi_extracted_bam, bam_file])
File preprocessed_raw_bam_index = select_first([AlignRawBamWithBwaMem.output_bam_index, DownsampleSam.output_bam_index, CopyUmiTask.umi_extracted_bam_index, bam_index])

# Collect HS or Targeted PCR metrics after deduplication by start and stop
# position (but not incluing UMIs).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"AlignRawReadsBwaAln.CopyUmiTask.bloodbiopsydocker":"${}","AlignRawReadsBwaAln.GetBwaVersion.bwa_path":"/usr/gitc/bwa","AlignRawReadsBwaAln.GetBwaVersion.preemptible_attempts":"${}","AlignRawReadsBwaAln.MBATask.bwa_tool":"bwa","AlignRawReadsBwaAln.MBATask.bwa_version":"0.7.15-r1140","AlignRawReadsBwaAln.MBATask.compression_level":"${workspace.compression_level}","AlignRawReadsBwaAln.MBATask.cpu":"${}","AlignRawReadsBwaAln.MBATask.disk_size":"${250}","AlignRawReadsBwaAln.MBATask.extra_mem":"${}","AlignRawReadsBwaAln.MBATask.gatk_docker":"${}","AlignRawReadsBwaAln.MBATask.mba_extra_args":"${}","AlignRawReadsBwaAln.MBATask.preemptible_tries":"${}","AlignRawReadsBwaAln.MBATask.sort_order":"${}","AlignRawReadsBwaAln.bwa_alignment.cpu":"${8}","AlignRawReadsBwaAln.bwa_alignment.diskSpaceGb":"${500}","AlignRawReadsBwaAln.bwa_alignment.memoryGb":"${32}","AlignRawReadsBwaAln.extract_umis":"${true}","AlignRawReadsBwaAln.gitc_docker":"us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135","AlignRawReadsBwaAln.input_bam":"${this.bam_file}","AlignRawReadsBwaAln.input_bam_index":"${this.bai_file}","AlignRawReadsBwaAln.ref_alt":"${workspace.reference_alt}","AlignRawReadsBwaAln.ref_amb":"${workspace.reference_amb}","AlignRawReadsBwaAln.ref_ann":"${workspace.reference_ann}","AlignRawReadsBwaAln.ref_bwt":"${workspace.reference_bwt}","AlignRawReadsBwaAln.ref_dict":"${workspace.reference_dict}","AlignRawReadsBwaAln.ref_fai":"${workspace.reference_index}","AlignRawReadsBwaAln.ref_fasta":"${workspace.reference}","AlignRawReadsBwaAln.ref_pac":"${workspace.reference_pac}","AlignRawReadsBwaAln.ref_sa":"${workspace.reference_sa}","AlignRawReadsBwaAln.revertsam_task.additional_args":"-RHC false","AlignRawReadsBwaAln.revertsam_task.disk_buffer":"${}","AlignRawReadsBwaAln.revertsam_task.docker_override":"${}","AlignRawReadsBwaAln.revertsam_task.gatk_path":"${}","AlignRawReadsBwaAln.revertsam_task.maxRetries":"${}","AlignRawReadsBwaAln.revertsam_task.mem":"${}","AlignRawReadsBwaAln.revertsam_task.preemptible_count":"${}","AlignRawReadsBwaAln.revertsam_task.sort_order":"${}","AlignRawReadsBwaAln.revertsam_task.threads":"${}","AlignRawReadsBwaAln.sample_name":"${this.sample_id}","AlignRawReadsBwaAln.samtofastq_task.disk_space":"${}","AlignRawReadsBwaAln.samtofastq_task.docker_override":"${}","AlignRawReadsBwaAln.samtofastq_task.gatk_override":"${}","AlignRawReadsBwaAln.samtofastq_task.memory":"${}","AlignRawReadsBwaAln.samtofastq_task.num_preempt":"${0}","AlignRawReadsBwaAln.samtofastq_task.num_threads":"${}","AlignRawReadsBwaAln.sortbam.diskgb_buffer":"${200}"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import "./subworkflows/CopyUmiFromReadName.wdl" as CopyUmiFromReadName
import "./subworkflows/RevertSam.wdl" as RevertSam
import "./subworkflows/BwaAlignment.wdl" as bwa_aln
import "./subworkflows/MergeBamAlignment.wdl" as MergeBamAlignment
import "./subworkflows/SamToFastq.wdl" as samtofastq

workflow AlignRawReadsBwaAln {
File input_bam
File input_bam_index
Boolean extract_umis
String sample_name
String? gitc_docker
String gitc_docker_or_default = select_first([gitc_docker, "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"])
File ref_fasta
File ref_fai
File ref_dict
File ref_alt
File ref_amb
File ref_ann
File ref_bwt
File ref_pac
File ref_sa

call GetBwaVersion {
input: gitc_docker = gitc_docker_or_default
}

if(extract_umis){
call CopyUmiFromReadName.CopyUmiTask as CopyUmiTask {
input: bam_file = input_bam,
bam_index = input_bam_index,
base_name = sample_name
}
}

call RevertSam.RevertSam as revertsam_task {
input: input_bam = select_first([CopyUmiTask.umi_extracted_bam, input_bam]),
base_name = sample_name,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fai,
ref_fasta_dict = ref_dict
}

call samtofastq.samtofastq as samtofastq_task {
input: input_bam = revertsam_task.output_bam
}

scatter(i in range(length(samtofastq_task.firstEndFastqs))){
call bwa_aln.BwaAlignment as bwa_alignment {
input: refFasta = ref_fasta,
refFastaIndex = ref_fai,
refFastaDict = ref_dict,
ref_alt = ref_alt,
ref_amb = ref_amb,
ref_ann = ref_ann,
ref_bwt = ref_bwt,
ref_pac = ref_pac,
ref_sa = ref_sa,
firstEndFastq = samtofastq_task.firstEndFastqs[i],
secondEndFastq = samtofastq_task.secondEndFastqs[i],
sampleName = sample_name,
gitc_docker = gitc_docker_or_default
}
}

call MergeBamAlignment.MergeBamAlignmentTask as MBATask {
input: mapped_bam = bwa_alignment.raw_aligned_bam,
unmapped_bam = revertsam_task.output_bam,
bwa_commandline = bwa_alignment.bwa_command,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fai,
ref_dict = ref_dict,
output_bam_basename = sample_name
}

call sortbam {
input: input_bam = MBATask.output_bam,
output_bam_basename = sample_name
}
}

task GetBwaVersion {
String gitc_docker
String bwa_path
Int? preemptible_attempts

command {
${bwa_path} 2>&1 | \
grep -e '^Version' | \
sed 's/Version: //'
}
runtime {
docker: gitc_docker
memory: "1 GB"
maxRetries: 3
preemptible: select_first([preemptible_attempts, 2])
}
output {
String version = read_string(stdout())
}
}

task sortbam {
File input_bam
String output_bam_basename
Int? preemptible_tries = 1
Int? compression_level = 2
Int? diskgb_buffer
Int diskSpaceGb = 50 + select_first([diskgb_buffer, 0])
Float? extra_mem
Float memory = 10 + select_first([extra_mem, 0])

command <<<


set -euxo pipefail


java -Dsamjdk.compression_level=${compression_level} -Xms4000m -jar /usr/gitc/picard.jar \
SortSam \
INPUT=${input_bam} \
OUTPUT=${output_bam_basename}.bam \
SORT_ORDER="coordinate" \
CREATE_INDEX=true \
CREATE_MD5_FILE=true \
MAX_RECORDS_IN_RAM=300000

>>>
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
disks: "local-disk ${diskSpaceGb} HDD"
bootDiskSizeGb: 12
memory: memory + " GB"
preemptible: select_first([preemptible_tries])
}

output {
File output_bam = "${output_bam_basename}.bam"
File output_bam_index = "${output_bam_basename}.bai"
File output_bam_md5 = "${output_bam_basename}.bam.md5"
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
workflow BwaAlignmentTest {
call BwaAlignment
}

task BwaAlignment {
File refFasta
File refFastaIndex
File refFastaDict
File ref_alt
File ref_amb
File ref_ann
File ref_bwt
File ref_pac
File ref_sa
File firstEndFastq
String fq1 = basename(firstEndFastq)
String basename1 = basename(firstEndFastq, ".fastq.gz")
File secondEndFastq
String fq2 = basename(secondEndFastq)
String basename2 = basename(secondEndFastq, ".fastq.gz")
String sampleName
String gitc_docker
Int memoryGb
Int diskSpaceGb
Int cpu

command <<<

mv ${firstEndFastq} ./${fq1}
mv ${secondEndFastq} ./${fq2}

/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t ${cpu} -o 1 ${refFasta} ./${fq1} -f ./${basename1}.sai
export bwa_cmd="/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t "${cpu}" -o 1 "${refFasta}" ./"${fq1}" -f ./"${basename1}".sai\;"

/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t ${cpu} -o 1 ${refFasta} ./${fq2} -f ./${basename2}.sai
export bwa_cmd=$bwa_cmd" /usr/gitc/bwa aln -q 5 -l 32 -k 2 -t "${cpu}" -o 1 "${refFasta}" ./"${fq2}" -f ./"${basename2}".sai\;"

/usr/gitc/bwa sampe -P ${refFasta} ./${basename1}.sai ./${basename2}.sai ./${fq1} ./${fq2} -f ./${sampleName}.aligned.sam
export bwa_cmd=$bwa_cmd" /usr/gitc/bwa sampe -P "${refFasta}" ./"${basename1}".sai ./"${basename2}".sai ./"${fq1}" ./"${fq2}" -f ./"${sampleName}".aligned.sam"
echo $bwa_cmd > bwa_cmd.txt

samtools sort -n ${sampleName}.aligned.sam -o ${sampleName}.aligned.bam

>>>

output {
File raw_aligned_bam = "${sampleName}.aligned.bam"
String bwa_command = read_string("bwa_cmd.txt")
}

runtime {
docker: gitc_docker
memory: "${memoryGb} GB"
cpu: "${cpu}"
disks: "local-disk ${diskSpaceGb} HDD"
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
workflow CopyUmiFromReadName {
call CopyUmiTask
}

task CopyUmiTask {
String? bloodbiopsydocker = "us.gcr.io/tag-team-160914/liquidbiopsy:0.0.4.5"
String base_name
String? fgbio_override
File bam_file
File bam_index
Boolean? remove_umi_from_read_name = true

Int? preemptible = 2
Int? maxRetries = 1
Int? disk_pad
Int disk_size = ceil(size(bam_file, "GB") * 5) + select_first([disk_pad,0])
Float? extra_mem
Float mem = 25 + select_first([extra_mem, 0])
Int? cpu = 4
Int compute_mem = ceil(mem) * 1000 - 500

command {
export FGBIO_LOCAL_JAR=${default="/usr/fgbio-2.0.2.jar" fgbio_override}

ln -vs ${bam_file} ${base_name}_input.bam
ln -vs ${bam_index} ${base_name}_input.bai

java -Xmx${compute_mem}m -jar $FGBIO_LOCAL_JAR \
CopyUmiFromReadName \
-i ${base_name}_input.bam \
-o ${base_name}.bam \
--remove-umi ${remove_umi_from_read_name}
}

output {
File umi_extracted_bam = "${base_name}.bam"
File umi_extracted_bam_index = "${base_name}.bai"
}

runtime {
docker: select_first([bloodbiopsydocker])
disks: "local-disk " + disk_size + " HDD, /cromwell_root/tmp 500 HDD"
memory: mem + " GB"
maxRetries: select_first([maxRetries])
preemptible: select_first([preemptible])
cpu: select_first([cpu])
}

}
Loading
Loading