Skip to content

Commit e2e76a8

Browse files
kirtanav98Kirtana Veeraraghavan
and
Kirtana Veeraraghavan
authored
Add PlotSVCountsPerSample subworkflow to the end of ClusterBatch and FilterBatchSites (#567)
* Update README to link to SV callers used. * Imported PlotSVCountsPerSample.wdl into ClusterBatch.wdl and FilterBatchSites.wdl. Added the N_IQR_cutoff input to the workflow with a default value of 6 to both wdls. Call PlotSVCountsPerSample as a subworkflow at the end of each workflow, passing the final VCF's as the input and the batch as the prefix. Added the outputs of PlotSVCountsPerSample to the workflows' outputs with unique names. Updated the JSON templates for ClusterBatch and FilterBatchSites in test and terra to include the N_IQR_cuffott input with a value of 6. Then validated ClusterBatch and FilterBatchSites wofkflows with womtool and the Terra validation script, and ran the updated workflows on the ref_panel_1kg test data. There was successful completion and decent outputs. * integrate PlotSVCountsPerSample into ClusterBatch and FilterBatchSites directly * integrate PlotSVCountsPerSample into ClusterBatch and FilterBatchSites directly * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * fixed issues from miniwdl * make edits to location of parameters called * make edits to location of parameters called --------- Co-authored-by: Kirtana Veeraraghavan <[email protected]>
1 parent fc9e992 commit e2e76a8

File tree

10 files changed

+96
-20
lines changed

10 files changed

+96
-20
lines changed

inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@
2828
"ClusterBatch.manta_vcf_tar": "${this.std_manta_vcf_tar}",
2929
"ClusterBatch.melt_vcf_tar": "${this.std_melt_vcf_tar}",
3030
"ClusterBatch.scramble_vcf_tar": "${this.std_scramble_vcf_tar}",
31-
"ClusterBatch.ped_file": "${workspace.cohort_ped_file}"
31+
"ClusterBatch.ped_file": "${workspace.cohort_ped_file}",
32+
"ClusterBatch.N_IQR_cutoff_plotting": "6"
3233
}

inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatchSites.json.tmpl

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
"FilterBatchSites.melt_vcf" : "${this.clustered_melt_vcf}",
99
"FilterBatchSites.scramble_vcf" : "${this.clustered_scramble_vcf}",
1010
"FilterBatchSites.evidence_metrics": "${this.metrics}",
11-
"FilterBatchSites.evidence_metrics_common": "${this.metrics_common}"
11+
"FilterBatchSites.evidence_metrics_common": "${this.metrics_common}",
12+
"FilterBatchSites.N_IQR_cutoff_plotting": "6"
1213
}

inputs/templates/test/ClusterBatch/ClusterBatch.json.tmpl

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,6 @@
2929
"ClusterBatch.wham_vcf_tar": {{ test_batch.std_wham_vcf_tar | tojson }},
3030
"ClusterBatch.manta_vcf_tar": {{ test_batch.std_manta_vcf_tar | tojson }},
3131
"ClusterBatch.melt_vcf_tar": {{ test_batch.std_melt_vcf_tar | tojson }},
32-
"ClusterBatch.ped_file": {{ test_batch.ped_file | tojson }}
32+
"ClusterBatch.ped_file": {{ test_batch.ped_file | tojson }},
33+
"ClusterBatch.N_IQR_cutoff_plotting": "6"
3334
}

inputs/templates/test/FilterBatch/FilterBatchSites.json.tmpl

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77
"FilterBatchSites.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }},
88
"FilterBatchSites.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }},
99
"FilterBatchSites.evidence_metrics": {{ test_batch.evidence_metrics | tojson }},
10-
"FilterBatchSites.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }}
10+
"FilterBatchSites.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }},
11+
"FilterBatchSites.N_IQR_cutoff_plotting": "6"
1112
}

inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl

+1-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
"GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_exclude_overlap_fraction": "0.5",
9595
"GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_interval_overlap": "0.8",
9696
"GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_clustering_algorithm": "SINGLE_LINKAGE",
97-
97+
"GATKSVPipelineBatch.N_IQR_cutoff_plotting": "6",
9898
"GATKSVPipelineBatch.GATKSVPipelinePhase1.BAF_split_size": "10000",
9999
"GATKSVPipelineBatch.GATKSVPipelinePhase1.RD_split_size": "10000",
100100
"GATKSVPipelineBatch.GATKSVPipelinePhase1.PE_split_size": "10000",
@@ -105,7 +105,6 @@
105105

106106
"GATKSVPipelineBatch.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }},
107107
"GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_nIQR": "999999",
108-
109108
"GATKSVPipelineBatch.GenotypeBatch.n_RD_genotype_bins": "100000",
110109
"GATKSVPipelineBatch.GenotypeBatch.n_per_split": "5000",
111110
"GATKSVPipelineBatch.GenotypeBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }},

inputs/templates/test/GATKSVPipelinePhase1/GATKSVPipelinePhase1.json.tmpl

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545

4646
"GATKSVPipelinePhase1.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }},
4747
"GATKSVPipelinePhase1.outlier_cutoff_nIQR": "6",
48+
"GATKSVPipelinePhase1.N_IQR_cutoff_plotting": "6",
4849

4950
"GATKSVPipelinePhase1.ploidy_sample_psi_scale": "0.001",
5051
"GATKSVPipelinePhase1.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }},

wdl/ClusterBatch.wdl

+28-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import "DepthClustering.wdl" as depth
55
import "ClusterBatchMetrics.wdl" as metrics
66
import "TasksClusterBatch.wdl" as tasks
77
import "Utils.wdl" as util
8+
import "PlotSVCountsPerSample.wdl" as sv_counts
89

910
workflow ClusterBatch {
1011
input {
@@ -48,6 +49,9 @@ workflow ClusterBatch {
4849
Int pesr_breakend_window
4950
String? pesr_clustering_algorithm
5051

52+
# PlotSVCountsPerSample
53+
Int? N_IQR_cutoff_plotting
54+
5155
# Module metrics parameters
5256
# Run module metrics workflow at the end - on by default
5357
Boolean? run_module_metrics
@@ -81,6 +85,9 @@ workflow ClusterBatch {
8185
RuntimeAttr? runtime_attr_gatk_to_svtk_vcf_depth
8286
RuntimeAttr? runtime_override_concat_vcfs_depth
8387
RuntimeAttr? runtime_attr_exclude_intervals_pesr
88+
RuntimeAttr? runtime_attr_count_svs
89+
RuntimeAttr? runtime_attr_plot_svcounts
90+
RuntimeAttr? runtime_attr_cat_outliers_preview
8491
}
8592

8693
call util.GetSampleIdsFromVcfTar {
@@ -282,6 +289,19 @@ workflow ClusterBatch {
282289
}
283290
}
284291

292+
if (defined(N_IQR_cutoff_plotting)){
293+
call sv_counts.PlotSVCountsPerSample {
294+
input:
295+
prefix = batch,
296+
vcfs = [ClusterDepth.clustered_vcf, ClusterPESR_manta.clustered_vcf, ClusterPESR_wham.clustered_vcf, ClusterPESR_melt.clustered_vcf, ClusterPESR_scramble.clustered_vcf],
297+
N_IQR_cutoff = select_first([N_IQR_cutoff_plotting]),
298+
sv_pipeline_docker = sv_pipeline_docker,
299+
runtime_attr_count_svs = runtime_attr_count_svs,
300+
runtime_attr_plot_svcounts = runtime_attr_plot_svcounts,
301+
runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview
302+
}
303+
}
304+
285305
output {
286306
File clustered_depth_vcf = ClusterDepth.clustered_vcf
287307
File clustered_depth_vcf_index = ClusterDepth.clustered_vcf_index
@@ -293,7 +313,13 @@ workflow ClusterBatch {
293313
File? clustered_melt_vcf_index = ClusterPESR_melt.clustered_vcf_index
294314
File? clustered_scramble_vcf = ClusterPESR_scramble.clustered_vcf
295315
File? clustered_scramble_vcf_index = ClusterPESR_scramble.clustered_vcf_index
296-
316+
Array[File]? clustered_sv_counts = PlotSVCountsPerSample.sv_counts
317+
Array[File]? clustered_sv_count_plots = PlotSVCountsPerSample.sv_count_plots
318+
File? clustered_outlier_samples_preview = PlotSVCountsPerSample.outlier_samples_preview
319+
File? clustered_outlier_samples_with_reason = PlotSVCountsPerSample.outlier_samples_with_reason
320+
Int? clustered_num_outlier_samples = PlotSVCountsPerSample.num_outlier_samples
297321
File? metrics_file_clusterbatch = ClusterBatchMetrics.metrics_file
298322
}
299-
}
323+
324+
325+
}

wdl/FilterBatchSites.wdl

+33-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
version 1.0
22

33
import "Structs.wdl"
4+
import "PlotSVCountsPerSample.wdl" as sv_counts
45

56
workflow FilterBatchSites {
67
input {
@@ -12,13 +13,19 @@ workflow FilterBatchSites {
1213
File? depth_vcf
1314
File evidence_metrics
1415
File evidence_metrics_common
15-
1616
String sv_pipeline_docker
17+
18+
# PlotSVCountsPerSample metrics
19+
Int N_IQR_cutoff_plotting = 6
20+
1721
RuntimeAttr? runtime_attr_adjudicate
1822
RuntimeAttr? runtime_attr_rewrite_scores
1923
RuntimeAttr? runtime_attr_filter_annotate_vcf
2024
RuntimeAttr? runtime_attr_merge_pesr_vcfs
21-
25+
RuntimeAttr? runtime_attr_count_svs
26+
RuntimeAttr? runtime_attr_plot_svcounts
27+
RuntimeAttr? runtime_attr_cat_outliers_preview
28+
2229
}
2330

2431
Array[String] algorithms = ["manta", "wham", "melt", "scramble", "depth"]
@@ -58,6 +65,17 @@ workflow FilterBatchSites {
5865
}
5966
}
6067

68+
call sv_counts.PlotSVCountsPerSample {
69+
input:
70+
prefix = batch,
71+
vcfs=[FilterAnnotateVcf.annotated_vcf[0], FilterAnnotateVcf.annotated_vcf[1], FilterAnnotateVcf.annotated_vcf[2], FilterAnnotateVcf.annotated_vcf[3], FilterAnnotateVcf.annotated_vcf[4]],
72+
N_IQR_cutoff = N_IQR_cutoff_plotting,
73+
sv_pipeline_docker = sv_pipeline_docker,
74+
runtime_attr_count_svs = runtime_attr_count_svs,
75+
runtime_attr_plot_svcounts = runtime_attr_plot_svcounts,
76+
runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview
77+
}
78+
6179
output {
6280
File? sites_filtered_manta_vcf = FilterAnnotateVcf.annotated_vcf[0]
6381
File? sites_filtered_wham_vcf = FilterAnnotateVcf.annotated_vcf[1]
@@ -67,7 +85,13 @@ workflow FilterBatchSites {
6785
File cutoffs = AdjudicateSV.cutoffs
6886
File scores = RewriteScores.updated_scores
6987
File RF_intermediate_files = AdjudicateSV.RF_intermediate_files
88+
Array[File] sites_filtered_sv_counts = PlotSVCountsPerSample.sv_counts
89+
Array[File] sites_filtered_sv_count_plots = PlotSVCountsPerSample.sv_count_plots
90+
File sites_filtered_outlier_samples_preview = PlotSVCountsPerSample.outlier_samples_preview
91+
File sites_filtered_outlier_samples_with_reason = PlotSVCountsPerSample.outlier_samples_with_reason
92+
Int sites_filtered_num_outlier_samples = PlotSVCountsPerSample.num_outlier_samples
7093
}
94+
7195
}
7296

7397
task AdjudicateSV {
@@ -79,7 +103,7 @@ task AdjudicateSV {
79103
}
80104

81105
RuntimeAttr default_attr = object {
82-
cpu_cores: 1,
106+
cpu_cores: 1,
83107
mem_gb: 3.75,
84108
disk_gb: 10,
85109
boot_disk_gb: 10,
@@ -101,7 +125,7 @@ task AdjudicateSV {
101125
mv *_trainable.txt ~{batch}.RF_intermediate_files/
102126
mv *_testable.txt ~{batch}.RF_intermediate_files/
103127
tar -czvf ~{batch}.RF_intermediate_files.tar.gz ~{batch}.RF_intermediate_files
104-
128+
105129
>>>
106130
runtime {
107131
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
@@ -125,7 +149,7 @@ task RewriteScores {
125149
}
126150

127151
RuntimeAttr default_attr = object {
128-
cpu_cores: 1,
152+
cpu_cores: 1,
129153
mem_gb: 3.75,
130154
disk_gb: 10,
131155
boot_disk_gb: 10,
@@ -145,7 +169,7 @@ task RewriteScores {
145169
-m ~{metrics} \
146170
-s ~{scores} \
147171
-o ~{batch}.updated_scores
148-
172+
149173
>>>
150174
runtime {
151175
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
@@ -170,7 +194,7 @@ task FilterAnnotateVcf {
170194
}
171195

172196
RuntimeAttr default_attr = object {
173-
cpu_cores: 1,
197+
cpu_cores: 1,
174198
mem_gb: 3.75,
175199
disk_gb: 10,
176200
boot_disk_gb: 10,
@@ -200,7 +224,7 @@ task FilterAnnotateVcf {
200224

201225
/opt/sv-pipeline/03_variant_filtering/scripts/annotate_RF_evidence.py filtered.corrected_coords.vcf.gz ~{scores} ~{prefix}.with_evidence.vcf
202226
bgzip ~{prefix}.with_evidence.vcf
203-
227+
204228
>>>
205229
runtime {
206230
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
@@ -212,5 +236,4 @@ task FilterAnnotateVcf {
212236
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
213237
}
214238

215-
}
216-
239+
}

wdl/GATKSVPipelineBatch.wdl

+10-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ workflow GATKSVPipelineBatch {
6363
File contig_ploidy_model_tar
6464
Array[File] gcnv_model_tars
6565

66+
# PlotSVCountsPerSample metrics from ClusterBatch in GATKSVPipelinePhase1
67+
Int? N_IQR_cutoff_plotting
68+
6669
File? outlier_cutoff_table
6770
File qc_definitions
6871

@@ -204,13 +207,15 @@ workflow GATKSVPipelineBatch {
204207
counts=counts_files_,
205208
bincov_matrix=EvidenceQC.bincov_matrix,
206209
bincov_matrix_index=EvidenceQC.bincov_matrix_index,
210+
N_IQR_cutoff_plotting = N_IQR_cutoff_plotting,
207211
PE_files=pe_files_,
208212
SR_files=sr_files_,
209213
SD_files=sd_files_,
210214
manta_vcfs=manta_vcfs_,
211215
melt_vcfs=melt_vcfs_,
212216
scramble_vcfs=scramble_vcfs_,
213217
wham_vcfs=wham_vcfs_,
218+
214219
cnmops_chrom_file=autosome_file,
215220
cnmops_allo_file=allosome_file,
216221
allosome_contigs=allosome_file,
@@ -420,6 +425,11 @@ workflow GATKSVPipelineBatch {
420425
File? merged_melt_vcf_index = GATKSVPipelinePhase1.melt_vcf_index
421426
File? merged_wham_vcf = GATKSVPipelinePhase1.wham_vcf
422427
File? merged_wham_vcf_index = GATKSVPipelinePhase1.wham_vcf_index
428+
Array[File] ?clustered_sv_counts = GATKSVPipelinePhase1.clustered_sv_counts
429+
Array[File]? clustered_sv_count_plots = GATKSVPipelinePhase1.clustered_sv_count_plots
430+
File? clustered_outlier_samples_preview = GATKSVPipelinePhase1.clustered_outlier_samples_preview
431+
File? clustered_outlier_samples_with_reason = GATKSVPipelinePhase1.clustered_outlier_samples_with_reason
432+
Int? clustered_num_outlier_samples = GATKSVPipelinePhase1.clustered_num_outlier_samples
423433

424434
File evidence_metrics = GATKSVPipelinePhase1.evidence_metrics
425435
File evidence_metrics_common = GATKSVPipelinePhase1.evidence_metrics_common
@@ -432,7 +442,6 @@ workflow GATKSVPipelineBatch {
432442
File? sites_filtered_wham_vcf = GATKSVPipelinePhase1.sites_filtered_wham_vcf
433443
File? sites_filtered_melt_vcf = GATKSVPipelinePhase1.sites_filtered_melt_vcf
434444
File? sites_filtered_depth_vcf = GATKSVPipelinePhase1.sites_filtered_depth_vcf
435-
436445
File cutoffs = GATKSVPipelinePhase1.cutoffs
437446
File genotyped_pesr_vcf = GenotypeBatch.genotyped_pesr_vcf
438447
File genotyped_depth_vcf = GenotypeBatch.genotyped_depth_vcf

wdl/GATKSVPipelinePhase1.wdl

+15-1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ workflow GATKSVPipelinePhase1 {
160160
Int pesr_breakend_window
161161
String? pesr_clustering_algorithm
162162

163+
Int? N_IQR_cutoff_plotting
164+
163165
File? baseline_depth_vcf_cluster_batch
164166
File? baseline_manta_vcf_cluster_batch
165167
File? baseline_wham_vcf_cluster_batch
@@ -183,6 +185,9 @@ workflow GATKSVPipelinePhase1 {
183185
RuntimeAttr? runtime_attr_gatk_to_svtk_vcf_depth_cluster_batch
184186
RuntimeAttr? runtime_override_concat_vcfs_depth_cluster_batch
185187
RuntimeAttr? runtime_attr_exclude_intervals_pesr_cluster_batch
188+
RuntimeAttr? runtime_attr_count_svs
189+
RuntimeAttr? runtime_attr_plot_svcounts
190+
RuntimeAttr? runtime_attr_cat_outliers_preview
186191

187192
############################################################
188193
## GenerateBatchMetrics
@@ -358,6 +363,7 @@ workflow GATKSVPipelinePhase1 {
358363
pesr_interval_overlap=pesr_interval_overlap,
359364
pesr_breakend_window=pesr_breakend_window,
360365
pesr_clustering_algorithm=pesr_clustering_algorithm,
366+
N_IQR_cutoff_plotting = N_IQR_cutoff_plotting,
361367
run_module_metrics=run_clusterbatch_metrics,
362368
linux_docker=linux_docker,
363369
sv_pipeline_base_docker=sv_pipeline_base_docker,
@@ -384,7 +390,10 @@ workflow GATKSVPipelinePhase1 {
384390
runtime_attr_svcluster_depth=runtime_attr_svcluster_depth_cluster_batch,
385391
runtime_attr_gatk_to_svtk_vcf_depth=runtime_attr_gatk_to_svtk_vcf_depth_cluster_batch,
386392
runtime_override_concat_vcfs_depth=runtime_override_concat_vcfs_depth_cluster_batch,
387-
runtime_attr_exclude_intervals_pesr=runtime_attr_exclude_intervals_pesr_cluster_batch
393+
runtime_attr_exclude_intervals_pesr=runtime_attr_exclude_intervals_pesr_cluster_batch,
394+
runtime_attr_count_svs = runtime_attr_count_svs,
395+
runtime_attr_plot_svcounts = runtime_attr_plot_svcounts,
396+
runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview
388397
}
389398

390399
call batchmetrics.GenerateBatchMetrics as GenerateBatchMetrics {
@@ -500,6 +509,11 @@ workflow GATKSVPipelinePhase1 {
500509
File? melt_vcf_index = ClusterBatch.clustered_melt_vcf_index
501510
File? scramble_vcf = ClusterBatch.clustered_scramble_vcf
502511
File? scramble_vcf_index = ClusterBatch.clustered_scramble_vcf_index
512+
Array[File]? clustered_sv_counts = ClusterBatch.clustered_sv_counts
513+
Array[File]? clustered_sv_count_plots = ClusterBatch.clustered_sv_count_plots
514+
File? clustered_outlier_samples_preview = ClusterBatch.clustered_outlier_samples_preview
515+
File? clustered_outlier_samples_with_reason = ClusterBatch.clustered_outlier_samples_with_reason
516+
Int? clustered_num_outlier_samples = ClusterBatch.clustered_num_outlier_samples
503517

504518
File? metrics_file_clusterbatch = ClusterBatch.metrics_file_clusterbatch
505519

0 commit comments

Comments
 (0)