diff --git a/.dockstore.yml b/.dockstore.yml index 60df846..69cf6e5 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -133,4 +133,9 @@ workflows: subclass: WDL primaryDescriptorPath: /CNV_Array_Prober/cnvArrayProber.wdl testParameterFiles: - - /CNV_Array_Prober/cnvArrayProber.inputs.json \ No newline at end of file + - /CNV_Array_Prober/cnvArrayProber.inputs.json + - name: RevertBamAndBwaAln + subclass: WDL + primaryDescriptorPath: /Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.wdl + testParameterFiles: + - /Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.inputs.json \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/GenerateDuplexConsensusBams/GenerateDuplexConsensusBams.wdl b/Liquid_Biopsy_Duplex_Analysis/GenerateDuplexConsensusBams/GenerateDuplexConsensusBams.wdl index db9444c..b506717 100644 --- a/Liquid_Biopsy_Duplex_Analysis/GenerateDuplexConsensusBams/GenerateDuplexConsensusBams.wdl +++ b/Liquid_Biopsy_Duplex_Analysis/GenerateDuplexConsensusBams/GenerateDuplexConsensusBams.wdl @@ -1,5 +1,6 @@ # Import BaitSetNameCheck task import "../../checkBaitSetName/checkBaitSetName.dev.wdl" as checkBaitSetName +import "../RevertBamAndBwaAln/subworkflows/CopyUmiFromReadName.wdl" as copyUmi workflow GenerateDuplexConsensusBams { @@ -43,6 +44,8 @@ workflow GenerateDuplexConsensusBams { Int? num_clip_bases_three_prime Boolean? run_bwa_mem_on_raw Boolean run_bwa_mem_on_raw_or_default = select_first([run_bwa_mem_on_raw, false]) + Boolean? copy_umi_from_readname + Boolean copy_umi_or_default = select_first([copy_umi_from_readname, false]) Int compression_level # scripts @@ -65,6 +68,14 @@ workflow GenerateDuplexConsensusBams { target_intervals = target_intervals, fail_task = fail_on_intervals_mismatch } + if(copy_umi_or_default){ + call copyUmi.CopyUmiTask as CopyUmiTask { + input: + bam_file = bam_file, + bam_index = bam_index, + base_name = base_name + } + } # Get the version of BWA that we are using. call GetBwaVersion { input: @@ -77,8 +88,8 @@ workflow GenerateDuplexConsensusBams { call DownsampleSam { input: bloodbiopsydocker = bloodbiopsydocker, - bam_file = bam_file, - bam_index = bam_index, + bam_file = select_first([CopyUmiTask.umi_extracted_bam, bam_file]), + bam_index = select_first([CopyUmiTask.umi_extracted_bam_index, bam_index]), downsample_probability = downsample_probability, base_name = base_name, preemptible_attempts = preemptible_attempts, @@ -91,7 +102,7 @@ workflow GenerateDuplexConsensusBams { call QuerySortSam { input: bloodbiopsydocker = bloodbiopsydocker, - input_bam = select_first([DownsampleSam.output_bam, bam_file]), + input_bam = select_first([DownsampleSam.output_bam, CopyUmiTask.umi_extracted_bam, bam_file]), base_name = base_name, preemptible_attempts = preemptible_attempts, disk_pad = disk_pad @@ -117,8 +128,8 @@ workflow GenerateDuplexConsensusBams { } } - File preprocessed_raw_bam = select_first([AlignRawBamWithBwaMem.output_bam, DownsampleSam.output_bam, bam_file]) - File preprocessed_raw_bam_index = select_first([AlignRawBamWithBwaMem.output_bam_index, DownsampleSam.output_bam_index, bam_index]) + File preprocessed_raw_bam = select_first([AlignRawBamWithBwaMem.output_bam, DownsampleSam.output_bam, CopyUmiTask.umi_extracted_bam, bam_file]) + File preprocessed_raw_bam_index = select_first([AlignRawBamWithBwaMem.output_bam_index, DownsampleSam.output_bam_index, CopyUmiTask.umi_extracted_bam_index, bam_index]) # Collect HS or Targeted PCR metrics after deduplication by start and stop # position (but not incluing UMIs). diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.inputs.json b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.inputs.json new file mode 100644 index 0000000..5c05930 --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.inputs.json @@ -0,0 +1 @@ +{"AlignRawReadsBwaAln.CopyUmiTask.bloodbiopsydocker":"${}","AlignRawReadsBwaAln.GetBwaVersion.bwa_path":"/usr/gitc/bwa","AlignRawReadsBwaAln.GetBwaVersion.preemptible_attempts":"${}","AlignRawReadsBwaAln.MBATask.bwa_tool":"bwa","AlignRawReadsBwaAln.MBATask.bwa_version":"0.7.15-r1140","AlignRawReadsBwaAln.MBATask.compression_level":"${workspace.compression_level}","AlignRawReadsBwaAln.MBATask.cpu":"${}","AlignRawReadsBwaAln.MBATask.disk_size":"${250}","AlignRawReadsBwaAln.MBATask.extra_mem":"${}","AlignRawReadsBwaAln.MBATask.gatk_docker":"${}","AlignRawReadsBwaAln.MBATask.mba_extra_args":"${}","AlignRawReadsBwaAln.MBATask.preemptible_tries":"${}","AlignRawReadsBwaAln.MBATask.sort_order":"${}","AlignRawReadsBwaAln.bwa_alignment.cpu":"${8}","AlignRawReadsBwaAln.bwa_alignment.diskSpaceGb":"${500}","AlignRawReadsBwaAln.bwa_alignment.memoryGb":"${32}","AlignRawReadsBwaAln.extract_umis":"${true}","AlignRawReadsBwaAln.gitc_docker":"us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135","AlignRawReadsBwaAln.input_bam":"${this.bam_file}","AlignRawReadsBwaAln.input_bam_index":"${this.bai_file}","AlignRawReadsBwaAln.ref_alt":"${workspace.reference_alt}","AlignRawReadsBwaAln.ref_amb":"${workspace.reference_amb}","AlignRawReadsBwaAln.ref_ann":"${workspace.reference_ann}","AlignRawReadsBwaAln.ref_bwt":"${workspace.reference_bwt}","AlignRawReadsBwaAln.ref_dict":"${workspace.reference_dict}","AlignRawReadsBwaAln.ref_fai":"${workspace.reference_index}","AlignRawReadsBwaAln.ref_fasta":"${workspace.reference}","AlignRawReadsBwaAln.ref_pac":"${workspace.reference_pac}","AlignRawReadsBwaAln.ref_sa":"${workspace.reference_sa}","AlignRawReadsBwaAln.revertsam_task.additional_args":"-RHC false","AlignRawReadsBwaAln.revertsam_task.disk_buffer":"${}","AlignRawReadsBwaAln.revertsam_task.docker_override":"${}","AlignRawReadsBwaAln.revertsam_task.gatk_path":"${}","AlignRawReadsBwaAln.revertsam_task.maxRetries":"${}","AlignRawReadsBwaAln.revertsam_task.mem":"${}","AlignRawReadsBwaAln.revertsam_task.preemptible_count":"${}","AlignRawReadsBwaAln.revertsam_task.sort_order":"${}","AlignRawReadsBwaAln.revertsam_task.threads":"${}","AlignRawReadsBwaAln.sample_name":"${this.sample_id}","AlignRawReadsBwaAln.samtofastq_task.disk_space":"${}","AlignRawReadsBwaAln.samtofastq_task.docker_override":"${}","AlignRawReadsBwaAln.samtofastq_task.gatk_override":"${}","AlignRawReadsBwaAln.samtofastq_task.memory":"${}","AlignRawReadsBwaAln.samtofastq_task.num_preempt":"${0}","AlignRawReadsBwaAln.samtofastq_task.num_threads":"${}","AlignRawReadsBwaAln.sortbam.diskgb_buffer":"${200}"} \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.wdl b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.wdl new file mode 100644 index 0000000..c8e5d7e --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/RevertBamAndBwaAln.wdl @@ -0,0 +1,143 @@ +import "./subworkflows/CopyUmiFromReadName.wdl" as CopyUmiFromReadName +import "./subworkflows/RevertSam.wdl" as RevertSam +import "./subworkflows/BwaAlignment.wdl" as bwa_aln +import "./subworkflows/MergeBamAlignment.wdl" as MergeBamAlignment +import "./subworkflows/SamToFastq.wdl" as samtofastq + +workflow AlignRawReadsBwaAln { + File input_bam + File input_bam_index + Boolean extract_umis + String sample_name + String? gitc_docker + String gitc_docker_or_default = select_first([gitc_docker, "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135"]) + File ref_fasta + File ref_fai + File ref_dict + File ref_alt + File ref_amb + File ref_ann + File ref_bwt + File ref_pac + File ref_sa + + call GetBwaVersion { + input: gitc_docker = gitc_docker_or_default + } + + if(extract_umis){ + call CopyUmiFromReadName.CopyUmiTask as CopyUmiTask { + input: bam_file = input_bam, + bam_index = input_bam_index, + base_name = sample_name + } + } + + call RevertSam.RevertSam as revertsam_task { + input: input_bam = select_first([CopyUmiTask.umi_extracted_bam, input_bam]), + base_name = sample_name, + ref_fasta = ref_fasta, + ref_fasta_index = ref_fai, + ref_fasta_dict = ref_dict + } + + call samtofastq.samtofastq as samtofastq_task { + input: input_bam = revertsam_task.output_bam + } + + scatter(i in range(length(samtofastq_task.firstEndFastqs))){ + call bwa_aln.BwaAlignment as bwa_alignment { + input: refFasta = ref_fasta, + refFastaIndex = ref_fai, + refFastaDict = ref_dict, + ref_alt = ref_alt, + ref_amb = ref_amb, + ref_ann = ref_ann, + ref_bwt = ref_bwt, + ref_pac = ref_pac, + ref_sa = ref_sa, + firstEndFastq = samtofastq_task.firstEndFastqs[i], + secondEndFastq = samtofastq_task.secondEndFastqs[i], + sampleName = sample_name, + gitc_docker = gitc_docker_or_default + } + } + + call MergeBamAlignment.MergeBamAlignmentTask as MBATask { + input: mapped_bam = bwa_alignment.raw_aligned_bam, + unmapped_bam = revertsam_task.output_bam, + bwa_commandline = bwa_alignment.bwa_command, + ref_fasta = ref_fasta, + ref_fasta_index = ref_fai, + ref_dict = ref_dict, + output_bam_basename = sample_name + } + + call sortbam { + input: input_bam = MBATask.output_bam, + output_bam_basename = sample_name + } +} + +task GetBwaVersion { + String gitc_docker + String bwa_path + Int? preemptible_attempts + + command { + ${bwa_path} 2>&1 | \ + grep -e '^Version' | \ + sed 's/Version: //' + } + runtime { + docker: gitc_docker + memory: "1 GB" + maxRetries: 3 + preemptible: select_first([preemptible_attempts, 2]) + } + output { + String version = read_string(stdout()) + } +} + +task sortbam { + File input_bam + String output_bam_basename + Int? preemptible_tries = 1 + Int? compression_level = 2 + Int? diskgb_buffer + Int diskSpaceGb = 50 + select_first([diskgb_buffer, 0]) + Float? extra_mem + Float memory = 10 + select_first([extra_mem, 0]) + + command <<< + + + set -euxo pipefail + + + java -Dsamjdk.compression_level=${compression_level} -Xms4000m -jar /usr/gitc/picard.jar \ + SortSam \ + INPUT=${input_bam} \ + OUTPUT=${output_bam_basename}.bam \ + SORT_ORDER="coordinate" \ + CREATE_INDEX=true \ + CREATE_MD5_FILE=true \ + MAX_RECORDS_IN_RAM=300000 + + >>> + runtime { + docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" + disks: "local-disk ${diskSpaceGb} HDD" + bootDiskSizeGb: 12 + memory: memory + " GB" + preemptible: select_first([preemptible_tries]) + } + + output { + File output_bam = "${output_bam_basename}.bam" + File output_bam_index = "${output_bam_basename}.bai" + File output_bam_md5 = "${output_bam_basename}.bam.md5" + } + +} \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/BwaAlignment.wdl b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/BwaAlignment.wdl new file mode 100644 index 0000000..3c69368 --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/BwaAlignment.wdl @@ -0,0 +1,58 @@ +workflow BwaAlignmentTest { + call BwaAlignment +} + +task BwaAlignment { + File refFasta + File refFastaIndex + File refFastaDict + File ref_alt + File ref_amb + File ref_ann + File ref_bwt + File ref_pac + File ref_sa + File firstEndFastq + String fq1 = basename(firstEndFastq) + String basename1 = basename(firstEndFastq, ".fastq.gz") + File secondEndFastq + String fq2 = basename(secondEndFastq) + String basename2 = basename(secondEndFastq, ".fastq.gz") + String sampleName + String gitc_docker + Int memoryGb + Int diskSpaceGb + Int cpu + + command <<< + + mv ${firstEndFastq} ./${fq1} + mv ${secondEndFastq} ./${fq2} + + /usr/gitc/bwa aln -q 5 -l 32 -k 2 -t ${cpu} -o 1 ${refFasta} ./${fq1} -f ./${basename1}.sai + export bwa_cmd="/usr/gitc/bwa aln -q 5 -l 32 -k 2 -t "${cpu}" -o 1 "${refFasta}" ./"${fq1}" -f ./"${basename1}".sai\;" + + /usr/gitc/bwa aln -q 5 -l 32 -k 2 -t ${cpu} -o 1 ${refFasta} ./${fq2} -f ./${basename2}.sai + export bwa_cmd=$bwa_cmd" /usr/gitc/bwa aln -q 5 -l 32 -k 2 -t "${cpu}" -o 1 "${refFasta}" ./"${fq2}" -f ./"${basename2}".sai\;" + + /usr/gitc/bwa sampe -P ${refFasta} ./${basename1}.sai ./${basename2}.sai ./${fq1} ./${fq2} -f ./${sampleName}.aligned.sam + export bwa_cmd=$bwa_cmd" /usr/gitc/bwa sampe -P "${refFasta}" ./"${basename1}".sai ./"${basename2}".sai ./"${fq1}" ./"${fq2}" -f ./"${sampleName}".aligned.sam" + echo $bwa_cmd > bwa_cmd.txt + + samtools sort -n ${sampleName}.aligned.sam -o ${sampleName}.aligned.bam + + >>> + + output { + File raw_aligned_bam = "${sampleName}.aligned.bam" + String bwa_command = read_string("bwa_cmd.txt") + } + + runtime { + docker: gitc_docker + memory: "${memoryGb} GB" + cpu: "${cpu}" + disks: "local-disk ${diskSpaceGb} HDD" + } + +} \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/CopyUmiFromReadName.wdl b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/CopyUmiFromReadName.wdl new file mode 100644 index 0000000..fde3807 --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/CopyUmiFromReadName.wdl @@ -0,0 +1,49 @@ +workflow CopyUmiFromReadName { + call CopyUmiTask +} + +task CopyUmiTask { + String? bloodbiopsydocker = "us.gcr.io/tag-team-160914/liquidbiopsy:0.0.4.5" + String base_name + String? fgbio_override + File bam_file + File bam_index + Boolean? remove_umi_from_read_name = true + + Int? preemptible = 2 + Int? maxRetries = 1 + Int? disk_pad + Int disk_size = ceil(size(bam_file, "GB") * 5) + select_first([disk_pad,0]) + Float? extra_mem + Float mem = 25 + select_first([extra_mem, 0]) + Int? cpu = 4 + Int compute_mem = ceil(mem) * 1000 - 500 + + command { + export FGBIO_LOCAL_JAR=${default="/usr/fgbio-2.0.2.jar" fgbio_override} + + ln -vs ${bam_file} ${base_name}_input.bam + ln -vs ${bam_index} ${base_name}_input.bai + + java -Xmx${compute_mem}m -jar $FGBIO_LOCAL_JAR \ + CopyUmiFromReadName \ + -i ${base_name}_input.bam \ + -o ${base_name}.bam \ + --remove-umi ${remove_umi_from_read_name} + } + + output { + File umi_extracted_bam = "${base_name}.bam" + File umi_extracted_bam_index = "${base_name}.bai" + } + + runtime { + docker: select_first([bloodbiopsydocker]) + disks: "local-disk " + disk_size + " HDD, /cromwell_root/tmp 500 HDD" + memory: mem + " GB" + maxRetries: select_first([maxRetries]) + preemptible: select_first([preemptible]) + cpu: select_first([cpu]) + } + +} \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/MergeBamAlignment.wdl b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/MergeBamAlignment.wdl new file mode 100644 index 0000000..58bda77 --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/MergeBamAlignment.wdl @@ -0,0 +1,124 @@ +workflow RunMBA{ + File sample_name + + call MergeBamAlignmentTask{ + input: output_bam_basename = sample_name + } + + call sortbam { + input: input_bam = MergeBamAlignmentTask.output_bam, + output_bam_basename = sample_name + } +} + +task MergeBamAlignmentTask { + Array[File] mapped_bam + File unmapped_bam + Array[String] bwa_commandline + String bwa_version + String bwa_tool + String output_bam_basename + File ref_fasta + File ref_fasta_index + File ref_dict + Int? extra_mem + String? mba_extra_args + Int memGb = 64 + select_first([extra_mem,0]) + String? sort_order = "coordinate" + + Int? diskgb_buffer + Float disk_size = 50 + size(mapped_bam[0], "GB")*length(mapped_bam)*3 + size(unmapped_bam, "GB")*3 + select_first([diskgb_buffer, 0]) + Int compression_level + Int? preemptible_tries = 1 + String? gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + Int? cpu = 16 + + command <<< + set -o pipefail + set -e + + /gatk/gatk \ + MergeBamAlignment \ + --VALIDATION_STRINGENCY SILENT \ + --EXPECTED_ORIENTATIONS FR \ + --ATTRIBUTES_TO_RETAIN X0 \ + --ATTRIBUTES_TO_REMOVE NM \ + --ATTRIBUTES_TO_REMOVE MD \ + --ALIGNED_BAM ${sep=" --ALIGNED_BAM " mapped_bam} \ + --UNMAPPED_BAM ${unmapped_bam} \ + --OUTPUT ${output_bam_basename}.bam \ + --REFERENCE_SEQUENCE ${ref_fasta} \ + --PAIRED_RUN true \ + --SORT_ORDER ${sort_order} \ + --IS_BISULFITE_SEQUENCE false \ + --ALIGNED_READS_ONLY false \ + --CLIP_ADAPTERS false \ + --MAX_RECORDS_IN_RAM 2000000 \ + --ADD_MATE_CIGAR true \ + --MAX_INSERTIONS_OR_DELETIONS -1 \ + --PRIMARY_ALIGNMENT_STRATEGY MostDistant \ + --PROGRAM_RECORD_ID "${bwa_tool}" \ + --PROGRAM_GROUP_VERSION "${bwa_version}" \ + --PROGRAM_GROUP_COMMAND_LINE "${sep=' / ' bwa_commandline}" \ + --PROGRAM_GROUP_NAME "${bwa_tool}" \ + --ADD_PG_TAG_TO_READS false \ + ${mba_extra_args} + + du --block-size=kB ${output_bam_basename}.bam | \ + awk -F "kB" '{print $1/1000000}' > output_bam_size.txt + >>> + runtime { + preemptible: select_first([preemptible_tries]) + memory: memGb + " GB" + bootDiskSizeGb: 12 + docker: select_first([gatk_docker]) + cpu: select_first([cpu]) + disks: "local-disk " + ceil(disk_size) + " HDD" + } + output { + File output_bam = "${output_bam_basename}.bam" + Float output_bam_size = read_float("output_bam_size.txt") + } +} + +task sortbam { + File input_bam + String output_bam_basename + Int? preemptible_tries = 1 + Int? compression_level = 2 + Int? diskgb_buffer + Int diskSpaceGb = 50 + select_first([diskgb_buffer, 0]) + Float? extra_mem + Float memory = 10 + select_first([extra_mem, 0]) + + command <<< + + + set -euxo pipefail + + + java -Dsamjdk.compression_level=${compression_level} -Xms4000m -jar /usr/gitc/picard.jar \ + SortSam \ + INPUT=${input_bam} \ + OUTPUT=${output_bam_basename}.bam \ + SORT_ORDER="coordinate" \ + CREATE_INDEX=true \ + CREATE_MD5_FILE=true \ + MAX_RECORDS_IN_RAM=300000 + + >>> + runtime { + docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" + disks: "local-disk ${diskSpaceGb} HDD" + bootDiskSizeGb: 12 + memory: memory + " GB" + preemptible: select_first([preemptible_tries]) + } + + output { + File output_bam = "${output_bam_basename}.bam" + File output_bam_index = "${output_bam_basename}.bai" + File output_bam_md5 = "${output_bam_basename}.bam.md5" + } + +} \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/RevertSam.wdl b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/RevertSam.wdl new file mode 100644 index 0000000..574ec71 --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/RevertSam.wdl @@ -0,0 +1,95 @@ +workflow CallRevertSam { + Boolean split_by_rg + call RevertSam + + if(split_by_rg){ + call SplitSamByRG{ + input: input_bam = RevertSam.output_bam + } + } +} + +task SplitSamByRG { + File input_bam + String base_name + String? additional_args + + String? gatk_path = "/gatk/gatk" + + String? docker_override + String docker = select_first([docker_override, "us.gcr.io/broad-gatk/gatk:4.5.0.0"]) + Int? preemptible_count = 2 + Int? maxRetries = 1 + Int? threads = 4 + Float? mem = 15.0 + Int? disk_buffer = 50 + + command <<< + mkdir -p ${base_name} + + ${gatk_path} \ + SplitReads \ + -I ${input_bam} \ + -O ${base_name} \ + -RG true \ + ${additional_args} + >>> + + output { + Array[File] output_rg_bams = glob("${base_name}/*.bam") + } + + runtime { + docker: docker + disks: "local-disk " + sub(((size(input_bam,"GB")+1)*4+disk_buffer),"\\..*","") + " HDD" + memory: mem + " GB" + cpu: threads + maxRetries: maxRetries + preemptible: preemptible_count + } +} + +task RevertSam { + File input_bam + File? input_bam_index + String base_name + String? sort_order + String? additional_args + + String? gatk_path = "/gatk/gatk" + File? ref_fasta + File? ref_fasta_index + File? ref_fasta_dict + + String? docker_override + String docker = select_first([docker_override, "us.gcr.io/broad-gatk/gatk:4.5.0.0"]) + Int? preemptible_count = 2 + Int? maxRetries = 1 + Int? threads = 4 + Float? mem = 15.0 + Int? disk_buffer = 50 + + command <<< + ${gatk_path} \ + RevertSam \ + --INPUT ${input_bam} \ + --OUTPUT ${base_name}.reverted.bam \ + ${'--REFERENCE_SEQUENCE ' + ref_fasta} \ + --VALIDATION_STRINGENCY SILENT \ + ${additional_args} \ + ${"--SORT_ORDER "+ sort_order} + >>> + + output { + File output_bam = "${base_name}.reverted.bam" + } + + runtime { + docker: docker + disks: "local-disk " + sub(((size(input_bam,"GB")+1)*4+disk_buffer),"\\..*","") + " HDD" + memory: mem + " GB" + cpu: threads + maxRetries: maxRetries + preemptible: preemptible_count + } +} \ No newline at end of file diff --git a/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/SamToFastq.wdl b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/SamToFastq.wdl new file mode 100644 index 0000000..2f68f58 --- /dev/null +++ b/Liquid_Biopsy_Duplex_Analysis/RevertBamAndBwaAln/subworkflows/SamToFastq.wdl @@ -0,0 +1,50 @@ +workflow samToFastqTest { + call samtofastq +} + +task samtofastq { + + File input_bam + + Float? memory = 15 + Int? disk_space = 200 + Int? num_threads = 4 + Int num_preempt + + String? docker_override + String gatk_docker = select_first([docker_override,"us.gcr.io/broad-gatk/gatk:4.5.0.0"]) + String? gatk_override + + command { + set -euo pipefail + + export GATK_PATH=${default="/gatk/gatk" gatk_override} + + mkdir -p samtofastq # workaround for named pipes + + samtools view -H ${input_bam} | grep ^@RG > read_groups.txt + + $GATK_PATH SamToFastq \ + -I ${input_bam} \ + --OUTPUT_PER_RG true \ + --COMPRESS_OUTPUTS_PER_RG true \ + --VALIDATION_STRINGENCY SILENT \ + --OUTPUT_DIR samtofastq + + mv samtofastq/*.fastq.gz . + } + + output { + Array[File] firstEndFastqs = glob("*1.fastq.gz") + Array[File] secondEndFastqs = glob("*2.fastq.gz") + Array[String] read_groups = read_lines("read_groups.txt") + } + + runtime { + docker: gatk_docker + memory: select_first([memory])+"GB" + disks: "local-disk "+ select_first([disk_space])+" HDD" + cpu: select_first([num_threads]) + preemptible: "${num_preempt}" + } +} \ No newline at end of file