Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/ah_var_store' into gg_VS-1558_Re…
Browse files Browse the repository at this point in the history
…leaseTheMemory
  • Loading branch information
gbggrant committed Feb 4, 2025
2 parents da697ff + ff20fce commit 007241a
Show file tree
Hide file tree
Showing 4 changed files with 275 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ workflow GvsCreateVATfromVDS {

# If the vat version is undefined or v1 then the vat tables would be named like filter_vat, otherwise filter_vat_v2.
String effective_vat_version = if (defined(vat_version) && select_first([vat_version]) != "v1") then "_" + select_first([vat_version]) else ""
String vat_table_name = filter_set_name + "_vat" + effective_vat_version
String effective_vat_table_name = filter_set_name + "_vat" + effective_vat_version

String output_path_without_a_trailing_slash = sub(output_path, "/$", "")
String effective_output_path = if (output_path == output_path_without_a_trailing_slash) then output_path + "/" else output_path
Expand Down Expand Up @@ -262,7 +262,7 @@ workflow GvsCreateVATfromVDS {
project_id = project_id,
dataset_name = dataset_name,
output_path = effective_output_path,
base_vat_table_name = vat_table_name,
base_vat_table_name = effective_vat_table_name,
prep_vt_json_done = PrepVtAnnotationJson.done,
prep_genes_json_done = PrepGenesAnnotationJson.done,
cloud_sdk_docker = effective_cloud_sdk_docker,
Expand All @@ -271,7 +271,7 @@ workflow GvsCreateVATfromVDS {
call DeduplicateVatInBigQuery {
input:
input_vat_table_name = BigQueryLoadJson.vat_table,
output_vat_table_name = vat_table_name,
output_vat_table_name = effective_vat_table_name,
nirvana_schema = MakeSubpopulationFilesAndReadSchemaFiles.vat_schema_json_file,
project_id = project_id,
dataset_name = dataset_name,
Expand All @@ -294,6 +294,7 @@ workflow GvsCreateVATfromVDS {
}

output {
String vat_table_name = effective_vat_table_name
String? cluster_name = GenerateSitesOnlyVcf.cluster_name
File? dropped_sites_file = MergeTsvs.output_file
File? final_tsv_file = GvsCreateVATFilesFromBigQuery.final_tsv_file
Expand Down
35 changes: 20 additions & 15 deletions scripts/variantstore/variant-annotations-table/GvsValidateVAT.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ workflow GvsValidateVat {
String project_id
String dataset_name
String vat_table_name
Boolean? is_small_callset
String? cloud_sdk_docker
String? variants_docker
}
Expand All @@ -25,20 +26,23 @@ workflow GvsValidateVat {
String effective_cloud_sdk_docker = select_first([cloud_sdk_docker, GetToolVersions.cloud_sdk_docker])
String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker])

call Utils.GetBQTableLastModifiedDatetime as SampleDateTime {
input:
project_id = project_id,
fq_table = fq_vat_table,
cloud_sdk_docker = effective_cloud_sdk_docker,
}
# Defining is_small_callset allows us to run this WDL on a dataset that has not had samples loaded (for testing)
if (!defined(is_small_callset)) {
call Utils.GetBQTableLastModifiedDatetime as SampleDateTime {
input:
project_id = project_id,
fq_table = fq_sample_table,
cloud_sdk_docker = effective_cloud_sdk_docker,
}

call Utils.GetNumSamplesLoaded {
input:
fq_sample_table = fq_sample_table,
project_id = project_id,
sample_table_timestamp = SampleDateTime.last_modified_timestamp,
control_samples = false,
cloud_sdk_docker = effective_cloud_sdk_docker,
call Utils.GetNumSamplesLoaded {
input:
fq_sample_table = fq_sample_table,
project_id = project_id,
sample_table_timestamp = SampleDateTime.last_modified_timestamp,
control_samples = false,
cloud_sdk_docker = effective_cloud_sdk_docker,
}
}

call Utils.GetBQTableLastModifiedDatetime as VatDateTime {
Expand Down Expand Up @@ -152,8 +156,9 @@ workflow GvsValidateVat {
cloud_sdk_docker = effective_cloud_sdk_docker,
}

# only check certain things if the callset is larger than 10,000 samples (a guess)
Boolean callset_is_small = GetNumSamplesLoaded.num_samples < 10000
# Check if the input boolean `is_small_callset` is defined,
# if not use the `GetNumSamples` task to find the number of samples in the callset and set the flag if it's < 10000
Boolean callset_is_small = select_first([is_small_callset, select_first([GetNumSamplesLoaded.num_samples, 1]) < 10000])
if (!callset_is_small) {
call ClinvarSignificance {
input:
Expand Down
70 changes: 50 additions & 20 deletions scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ version 1.0

import "GvsQuickstartVcfIntegration.wdl" as QuickstartVcfIntegration
import "GvsQuickstartHailIntegration.wdl" as QuickstartHailIntegration
import "GvsQuickstartVATIntegration.wdl" as QuickstartVATIntegration
import "../GvsJointVariantCalling.wdl" as JointVariantCalling
import "../GvsUtils.wdl" as Utils

Expand All @@ -14,6 +15,8 @@ workflow GvsQuickstartIntegration {
Boolean run_exome_integration = true
Boolean run_beta_integration = true
Boolean run_bge_integration = true
Boolean run_vat_integration = true
Boolean run_vat_integration_test_from_vds = true # If false, will use sites-only VCF
String sample_id_column_name = "sample_id"
String vcf_files_column_name = "hg38_reblocked_gvcf"
String vcf_index_files_column_name = "hg38_reblocked_gvcf_index"
Expand All @@ -25,6 +28,7 @@ workflow GvsQuickstartIntegration {
String? cloud_sdk_docker
String? cloud_sdk_slim_docker
String? variants_docker
String? variants_nirvana_docker
String? gatk_docker
String? hail_version
Boolean chr20_X_Y_only = true
Expand All @@ -36,6 +40,7 @@ workflow GvsQuickstartIntegration {
File full_exome_interval_list = "gs://gcp-public-data--broad-references/hg38/v0/bge_exome_calling_regions.v1.1.interval_list"
String expected_subdir = if (!chr20_X_Y_only) then "all_chrs/" else ""
File expected_output_prefix = "gs://gvs-internal-quickstart/integration/2024-10-29/" + expected_subdir
File truth_data_prefix = "gs://gvs-internal-quickstart/integration/test_data/2025-01-17/"

# WDL 1.0 trick to set a variable ('none') to be undefined.
if (false) {
Expand All @@ -53,6 +58,7 @@ workflow GvsQuickstartIntegration {
String effective_cloud_sdk_docker = select_first([cloud_sdk_docker, GetToolVersions.cloud_sdk_docker])
String effective_cloud_sdk_slim_docker = select_first([cloud_sdk_slim_docker, GetToolVersions.cloud_sdk_slim_docker])
String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker])
String effective_variants_nirvana_docker = select_first([variants_nirvana_docker, GetToolVersions.variants_nirvana_docker])
String effective_gatk_docker = select_first([gatk_docker, GetToolVersions.gatk_docker])
String effective_hail_version = select_first([hail_version, GetToolVersions.hail_version])

Expand All @@ -73,6 +79,10 @@ workflow GvsQuickstartIntegration {
}
}

String workspace_bucket = GetToolVersions.workspace_bucket
String workspace_id = GetToolVersions.workspace_id
String submission_id = GetToolVersions.submission_id

# Note for `GvsQuickstartIntegration` we use the git_branch_or_tag *input* and its corresponding git hash. This is not
# necessarily the same as the branch name selected in Terra for the integration `GvsQuickstartIntegration` workflow,
# though in practice likely they are the same.
Expand All @@ -99,9 +109,9 @@ workflow GvsQuickstartIntegration {
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker,
variants_docker = effective_variants_docker,
gatk_docker = effective_gatk_docker,
workspace_bucket = GetToolVersions.workspace_bucket,
workspace_id = GetToolVersions.workspace_id,
submission_id = GetToolVersions.submission_id,
workspace_bucket = workspace_bucket,
workspace_id = workspace_id,
submission_id = submission_id,
hail_version = effective_hail_version,
maximum_alternate_alleles = maximum_alternate_alleles,
ploidy_table_name = ploidy_table_name,
Expand Down Expand Up @@ -139,9 +149,9 @@ workflow GvsQuickstartIntegration {
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker,
variants_docker = effective_variants_docker,
gatk_docker = effective_gatk_docker,
workspace_bucket = GetToolVersions.workspace_bucket,
workspace_id = GetToolVersions.workspace_id,
submission_id = GetToolVersions.submission_id,
workspace_bucket = workspace_bucket,
workspace_id = workspace_id,
submission_id = submission_id,
maximum_alternate_alleles = maximum_alternate_alleles,
}
call QuickstartVcfIntegration.GvsQuickstartVcfIntegration as QuickstartVcfVQSRIntegration {
Expand All @@ -166,9 +176,9 @@ workflow GvsQuickstartIntegration {
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker,
variants_docker = effective_variants_docker,
gatk_docker = effective_gatk_docker,
workspace_bucket = GetToolVersions.workspace_bucket,
workspace_id = GetToolVersions.workspace_id,
submission_id = GetToolVersions.submission_id,
workspace_bucket = workspace_bucket,
workspace_id = workspace_id,
submission_id = submission_id,
maximum_alternate_alleles = maximum_alternate_alleles,
}

Expand Down Expand Up @@ -212,9 +222,9 @@ workflow GvsQuickstartIntegration {
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker,
variants_docker = effective_variants_docker,
gatk_docker = effective_gatk_docker,
workspace_bucket = GetToolVersions.workspace_bucket,
workspace_id = GetToolVersions.workspace_id,
submission_id = GetToolVersions.submission_id,
workspace_bucket = workspace_bucket,
workspace_id = workspace_id,
submission_id = submission_id,
maximum_alternate_alleles = maximum_alternate_alleles,
target_interval_list = target_interval_list,
}
Expand Down Expand Up @@ -251,9 +261,9 @@ workflow GvsQuickstartIntegration {
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker,
variants_docker = effective_variants_docker,
gatk_docker = effective_gatk_docker,
workspace_bucket = GetToolVersions.workspace_bucket,
workspace_id = GetToolVersions.workspace_id,
submission_id = GetToolVersions.submission_id,
workspace_bucket = workspace_bucket,
workspace_id = workspace_id,
submission_id = submission_id,
maximum_alternate_alleles = maximum_alternate_alleles,
target_interval_list = target_interval_list,
}
Expand All @@ -270,8 +280,6 @@ workflow GvsQuickstartIntegration {
if (run_beta_integration) {
String project_id = "gvs-internal"

String workspace_bucket = GetToolVersions.workspace_bucket
String submission_id = GetToolVersions.submission_id
String extract_output_gcs_dir = "~{workspace_bucket}/output_vcfs/by_submission_id/~{submission_id}/beta"
Boolean collect_variant_calling_metrics = true

Expand All @@ -298,9 +306,9 @@ workflow GvsQuickstartIntegration {
cloud_sdk_docker = effective_cloud_sdk_docker,
variants_docker = effective_variants_docker,
gatk_docker = effective_gatk_docker,
workspace_bucket = GetToolVersions.workspace_bucket,
workspace_id = GetToolVersions.workspace_id,
submission_id = GetToolVersions.submission_id,
workspace_bucket = workspace_bucket,
workspace_id = workspace_id,
submission_id = submission_id,
maximum_alternate_alleles = maximum_alternate_alleles,
git_branch_or_tag = git_branch_or_tag,
sample_id_column_name = sample_id_column_name,
Expand All @@ -319,6 +327,28 @@ workflow GvsQuickstartIntegration {
}
}

if (run_vat_integration) {
String extract_vat_output_gcs_dir = "~{workspace_bucket}/output_vat/by_submission_id/~{submission_id}/vat"

call QuickstartVATIntegration.GvsQuickstartVATIntegration {
input:
git_branch_or_tag = git_branch_or_tag,
git_hash = GetToolVersions.git_hash,
use_default_dockers = use_default_dockers,
truth_data_prefix = truth_data_prefix,
expected_output_prefix = expected_output_prefix,
dataset_suffix = "vat",
output_path = extract_vat_output_gcs_dir,
use_vds_as_input = run_vat_integration_test_from_vds,
basic_docker = effective_basic_docker,
cloud_sdk_docker = effective_cloud_sdk_docker,
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker,
variants_docker = effective_variants_docker,
variants_nirvana_docker = effective_variants_nirvana_docker,
gatk_docker = effective_gatk_docker,
}
}

output {
String recorded_git_hash = GetToolVersions.git_hash
}
Expand Down
Loading

0 comments on commit 007241a

Please sign in to comment.