Skip to content

Commit aef6ac9

Browse files
Filter wham-only DELs and scramble-only SVAs in CleanVcf & docs updates (#740)
1 parent 6ea99cf commit aef6ac9

24 files changed

+357
-390
lines changed

.github/.dockstore.yml

+9
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,15 @@ workflows:
198198
tags:
199199
- /.*/
200200

201+
- subclass: WDL
202+
name: VisualizeCnvs
203+
primaryDescriptorPath: /wdl/VisualizeCnvs.wdl
204+
filters:
205+
branches:
206+
- main
207+
tags:
208+
- /.*/
209+
201210
- subclass: WDL
202211
name: SingleSamplePipeline
203212
primaryDescriptorPath: /wdl/GATKSVPipelineSingleSample.wdl

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
A structural variation discovery pipeline for Illumina short-read whole-genome sequencing (WGS) data.
44

5-
For technical documentation on GATK-SV, including how to run the pipeline, please refer to our website.
5+
For technical documentation on GATK-SV, including how to run the pipeline, please refer to our [website](https://broadinstitute.github.io/gatk-sv/).
66

77
## Repository structure
88
* `/carrot`: [Carrot](https://github.com/broadinstitute/carrot) tests

inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl

+4-214
Large diffs are not rendered by default.

inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl

+157
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"VisualizeCnvs.vcf_or_bed": "${this.filtered_vcf}",
3+
"VisualizeCnvs.prefix": "${this.sample_set_set_id}",
4+
"VisualizeCnvs.median_files": "${this.sample_sets.median_cov}",
5+
"VisualizeCnvs.rd_files": "${this.sample_sets.merged_bincov}",
6+
"VisualizeCnvs.ped_file": "${workspace.cohort_ped_file}",
7+
"VisualizeCnvs.min_size": 50000,
8+
"VisualizeCnvs.flags": "-s 999999999",
9+
"VisualizeCnvs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}"
10+
}

inputs/templates/test/VisualizeCnvs/VisualizeCnvs.json.tmpl

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
"VisualizeCnvs.rd_files": [{{ test_batch.merged_coverage_file | tojson }}],
66
"VisualizeCnvs.ped_file": {{ test_batch.ped_file | tojson }},
77
"VisualizeCnvs.min_size": 50000,
8-
"VisualizeCnvs.flags": "",
8+
"VisualizeCnvs.flags": "-s 999999999",
99
"VisualizeCnvs.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
1010
}

scripts/test/terra_validation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def main():
113113
parser.add_argument("-j", "--womtool-jar", help="Path to womtool jar", required=True)
114114
parser.add_argument("-n", "--num-input-jsons",
115115
help="Number of Terra input JSONs expected",
116-
required=False, default=25, type=int)
116+
required=False, default=26, type=int)
117117
parser.add_argument("--log-level",
118118
help="Specify level of logging information, ie. info, warning, error (not case-sensitive)",
119119
required=False, default="INFO")

wdl/CleanVcfChromosome.wdl

+62-1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ workflow CleanVcfChromosome {
5353
RuntimeAttr? runtime_override_stitch_fragmented_cnvs
5454
RuntimeAttr? runtime_override_final_cleanup
5555
RuntimeAttr? runtime_override_rescue_me_dels
56+
RuntimeAttr? runtime_attr_add_high_fp_rate_filters
5657

5758
# Clean vcf 1b
5859
RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
@@ -299,9 +300,17 @@ workflow CleanVcfChromosome {
299300
runtime_attr_override = runtime_override_rescue_me_dels
300301
}
301302

302-
call FinalCleanup {
303+
call AddHighFDRFilters {
303304
input:
304305
vcf=RescueMobileElementDeletions.out,
306+
prefix="~{prefix}.high_fdr_filtered",
307+
sv_pipeline_docker=sv_pipeline_docker,
308+
runtime_attr_override=runtime_attr_add_high_fp_rate_filters
309+
}
310+
311+
call FinalCleanup {
312+
input:
313+
vcf=AddHighFDRFilters.out,
305314
contig=contig,
306315
prefix="~{prefix}.final_cleanup",
307316
sv_pipeline_docker=sv_pipeline_docker,
@@ -799,6 +808,58 @@ task StitchFragmentedCnvs {
799808
}
800809
}
801810

811+
# Add FILTER status for pockets of variants with high FP rate: wham-only DELs and Scramble-only SVAs with HIGH_SR_BACKGROUND
812+
task AddHighFDRFilters {
813+
input {
814+
File vcf
815+
String prefix
816+
String sv_pipeline_docker
817+
RuntimeAttr? runtime_attr_override
818+
}
819+
820+
Float input_size = size(vcf, "GiB")
821+
RuntimeAttr runtime_default = object {
822+
mem_gb: 3.75,
823+
disk_gb: ceil(10.0 + input_size * 3.0),
824+
cpu_cores: 1,
825+
preemptible_tries: 3,
826+
max_retries: 1,
827+
boot_disk_gb: 10
828+
}
829+
RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
830+
runtime {
831+
memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
832+
disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
833+
cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
834+
preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
835+
maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
836+
docker: sv_pipeline_docker
837+
bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
838+
}
839+
840+
command <<<
841+
set -euo pipefail
842+
843+
python <<CODE
844+
import pysam
845+
with pysam.VariantFile("~{vcf}", 'r') as fin:
846+
header = fin.header
847+
header.add_line("##FILTER=<ID=HIGH_ALGORITHM_FDR,Description=\"Categories of variants with low precision including Wham-only deletions and certain Scramble SVAs\">")
848+
with pysam.VariantFile("~{prefix}.vcf.gz", 'w', header=header) as fo:
849+
for record in fin:
850+
if (record.info['ALGORITHMS'] == ('wham',) and record.info['SVTYPE'] == 'DEL') or \
851+
(record.info['ALGORITHMS'] == ('scramble',) and record.info['HIGH_SR_BACKGROUND'] and record.alts == ('<INS:ME:SVA>',)):
852+
record.filter.add('HIGH_ALGORITHM_FDR')
853+
fo.write(record)
854+
CODE
855+
>>>
856+
857+
output {
858+
File out = "~{prefix}.vcf.gz"
859+
}
860+
}
861+
862+
802863

803864
# Final VCF cleanup
804865
task FinalCleanup {

website/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# Generated files
88
.docusaurus
99
.cache-loader
10+
package-lock.json
1011

1112
# Misc
1213
.DS_Store

website/docs/advanced/cromwell/overview.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Google Cloud Platform (GCP).
2929

3030
# Cromwell Server
3131

32-
There are two option to communicate with a running Cromwell server:
32+
There are two options to communicate with a running Cromwell server:
3333
[REST API](https://cromwell.readthedocs.io/en/stable/tutorials/ServerMode/), and
3434
[Cromshell](https://github.com/broadinstitute/cromshell) which is a command line tool
3535
to interface with a Cromwell server. We recommend using Cromshell due to its simplicity

website/docs/best_practices.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ description: Guide for using GATK-SV
44
sidebar_position: 4
55
---
66

7-
A comprehensive guide for the single-sample calling mode is available in [GATK Best Practices for Structural Variation
8-
Discovery on Single Samples](https://gatk.broadinstitute.org/hc/en-us/articles/9022653744283-GATK-Best-Practices-for-Structural-Variation-Discovery-on-Single-Samples).
7+
A comprehensive guide for the single-sample [calling mode](/docs/gs/calling_modes) is available in
8+
[GATK Best Practices for Structural Variation Discovery on Single Samples](https://gatk.broadinstitute.org/hc/en-us/articles/9022653744283-GATK-Best-Practices-for-Structural-Variation-Discovery-on-Single-Samples).
99
This material covers basic concepts of structural variant calling, specifics of SV VCF formatting, and
1010
advanced troubleshooting that also apply to the joint calling mode as well. This guide is intended to supplement
1111
documentation found here.

0 commit comments

Comments
 (0)