From 9347fc65c76a3a0b67ea153304fab7a2bcf77b61 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 14 Mar 2022 08:08:27 -0400 Subject: [PATCH 1/2] parameterize the sequencing lab prefix for the genome id (that comes after the US state abbreviation and before the UID --- pipes/WDL/tasks/tasks_sarscov2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_sarscov2.wdl b/pipes/WDL/tasks/tasks_sarscov2.wdl index d3b623f0d..3bded4b05 100644 --- a/pipes/WDL/tasks/tasks_sarscov2.wdl +++ b/pipes/WDL/tasks/tasks_sarscov2.wdl @@ -379,6 +379,7 @@ task crsp_meta_etl { String prefix_map = '{"Broad Institute Clinical Research Sequencing Platform": "CRSP_", "Massachusetts General Hospital": "MGH_", "Rhode Island Department of Health": "RIDOH_", "Biobot Analytics": "Biobot_", "Flow Health":"FlowHealth_", "Colorado Mesa University":"CMU_", "Capture Diagnostics Hawaii":"Capture_", "Boston Medical Center":"BMC_", "University of Central Florida":"UCF_"}' String org_name_map = '{"Broad Institute Clinical Research Sequencing Platform": "Broad Institute Clinical Research Sequencing Platform", "Massachusetts General Hospital": "Massachusetts General Hospital", "RIDOH": "Rhode Island Department of Health", "BIOBOT": "Biobot Analytics", "FLOW":"Flow Health", "MESA":"Colorado Mesa University", "CAPTURE":"Capture Diagnostics Hawaii", "BUBMC":"Boston Medical Center", "UCF":"University of Central Florida"}' String allowed_purposes = '["Baseline surveillance (random sampling)", "Targeted surveillance (non-random sampling)", "Screening for Variants of Concern (VOC)", "Longitudinal surveillance (repeat sampling of individuals)", "Vaccine escape surveillance", "Cluster/Outbreak investigation"]' + String sequencing_lab_prefix = 'CDCBI' String docker = "quay.io/broadinstitute/py3-bio:0.1.2" } @@ -476,7 +477,7 @@ task crsp_meta_etl { for id in hash_input_ids ] sample_meta['host_subject_id'] = sample_meta.apply(lambda row: - 'CDCBI-' + prefix_map[row['collected_by']] + row['hl7_hashed'] + '~{sequencing_lab_prefix}' + '-' + prefix_map[row['collected_by']] + row['hl7_hashed'] , axis=1) sample_meta['sample_name'] = [ f'{country}/{state}-{id}/{year}' From fe521324131b75cc9a848e904732f6772fa7d406 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 14 Mar 2022 08:08:57 -0400 Subject: [PATCH 2/2] CDCBI contract ended, change genome names --- pipes/WDL/tasks/tasks_sarscov2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_sarscov2.wdl b/pipes/WDL/tasks/tasks_sarscov2.wdl index 3bded4b05..7c2d02245 100644 --- a/pipes/WDL/tasks/tasks_sarscov2.wdl +++ b/pipes/WDL/tasks/tasks_sarscov2.wdl @@ -379,7 +379,7 @@ task crsp_meta_etl { String prefix_map = '{"Broad Institute Clinical Research Sequencing Platform": "CRSP_", "Massachusetts General Hospital": "MGH_", "Rhode Island Department of Health": "RIDOH_", "Biobot Analytics": "Biobot_", "Flow Health":"FlowHealth_", "Colorado Mesa University":"CMU_", "Capture Diagnostics Hawaii":"Capture_", "Boston Medical Center":"BMC_", "University of Central Florida":"UCF_"}' String org_name_map = '{"Broad Institute Clinical Research Sequencing Platform": "Broad Institute Clinical Research Sequencing Platform", "Massachusetts General Hospital": "Massachusetts General Hospital", "RIDOH": "Rhode Island Department of Health", "BIOBOT": "Biobot Analytics", "FLOW":"Flow Health", "MESA":"Colorado Mesa University", "CAPTURE":"Capture Diagnostics Hawaii", "BUBMC":"Boston Medical Center", "UCF":"University of Central Florida"}' String allowed_purposes = '["Baseline surveillance (random sampling)", "Targeted surveillance (non-random sampling)", "Screening for Variants of Concern (VOC)", "Longitudinal surveillance (repeat sampling of individuals)", "Vaccine escape surveillance", "Cluster/Outbreak investigation"]' - String sequencing_lab_prefix = 'CDCBI' + String sequencing_lab_prefix = 'Broad' String docker = "quay.io/broadinstitute/py3-bio:0.1.2" }