kids-first · dmiller15 · Jan 23, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 22, 2025
diff --git a/docs/GATK_GERMLINE_README.md b/docs/GATK_GERMLINE_README.md
@@ -4,7 +4,14 @@ Kids First Data Resource Center Single Sample Genotyping Workflow. This workflow
 While the Joint Genotyping Workflow is meant to be used with whole genome
 sequenced trios, this workflow is meant for processing single samples from any
 sequencing experiment. The key difference between the different approaches is
-the filtering process.
+the filtering process. Whole Genome samples will be filtered using GATK's
+Variant Quality Score Recalibration (VQSR). Whole Exome and Targeted Sequencing
+samples will be filtered using GATK's recommended Hard Filter. See below for
+more information on both of these filtering processes. Note: it should be
+possible to run a whole exome cohort of 30 or more samples through this
+workflow. In that case, the workflow will use VQSR to process the cohort.
+No internal testing has been performed for this approach so be prepared to
+make adjustments to the workflow if you are attempting to run a cohort.
 
 While non-germline samples can be run through this workflow, be wary that the
 filtering process (VQSR/Hard Filtering) is specifically tuned for germline

diff --git a/subworkflows/gatk_plot_genotyping_annotations.cwl b/subworkflows/gatk_plot_genotyping_annotations.cwl
@@ -57,7 +57,7 @@ steps:
     in:
       output_filename:
         source: output_basename
-        valueFrom: '$(self).genotyping_annotation_plots.tar.gz'
+        valueFrom: '$(self).single.gatk.genotyped.annotation_plots.tar.gz'
       input_files:
         source: [gatk_plot_annotations_snps/plots, gatk_plot_annotations_indels/plots]
         valueFrom: '$(self[0].concat(self[1]))'

diff --git a/workflows/kfdrc-germline-snv-wf.cwl b/workflows/kfdrc-germline-snv-wf.cwl
@@ -164,7 +164,7 @@ inputs:
           path: 60639019357c3a53540ca7e7, name: Homo_sapiens_assembly38.dict}, {class: File, path: 60639016357c3a53540ca7af, name: Homo_sapiens_assembly38.fasta.fai}]}
   output_basename: {type: 'string', doc: "String to use as the base for output filenames"}
   biospecimen_name: {type: 'string', doc: "String name of biospcimen"}
-  input_reads: {type: 'File', secondaryFiles: [{pattern: '.bai', required: false}, {pattern: '^.bai', required: false}, {pattern: '.crai',
+  input_reads: {type: 'File?', secondaryFiles: [{pattern: '.bai', required: false}, {pattern: '^.bai', required: false}, {pattern: '.crai',
         required: false}, {pattern: '^.crai', required: false}], doc: "Aligned reads files to be analyzed", "sbg:fileTypes": "BAM,CRAM"}
   input_gvcf: {type: 'File?', secondaryFiles: [{pattern: '.tbi', required: true}], doc: "gVCF associated with input_reads. Providing
       this value will skip gVCF creation for the GATK pipeline.", "sbg:fileTypes": "VCF.GZ"}
@@ -297,7 +297,7 @@ steps:
     out: [out_file_array]
   samtools_view:
     run: ../tools/samtools_view.cwl
-    when: $(inputs.input_reads.nameext == '.cram' && inputs.run_gatk)
+    when: $(inputs.input_reads != null && inputs.input_reads.nameext == '.cram' && inputs.run_gatk)
     in:
       run_gatk: run_gatk
       input_reads: input_reads
@@ -308,7 +308,7 @@ steps:
         valueFrom: $(1 == 1)
       output_filename:
         valueFrom: |
-          $(inputs.input_reads.nameroot).bam##idx##$(inputs.input_reads.nameroot).bam.bai
+          $(inputs.input_reads ? inputs.input_reads.nameroot : 'ph').bam##idx##$(inputs.input_reads ? inputs.input_reads.nameroot : 'ph').bam.bai
       cpu:
         valueFrom: $(8)
       ram:
@@ -423,7 +423,7 @@ steps:
       run_gatk: boolean_to_boolean_gvcf/out_bool
       input_bam:
         source: [samtools_view/output, input_reads]
-        pickValue: first_non_null
+        valueFrom: '$(self[0] ? self[0] : self[1])'
       indexed_reference_fasta: indexed_reference_fasta
       scattered_calling_interval_lists: scatter_regions/scattered_intervallists
       biospecimen_name: biospecimen_name
@@ -466,7 +466,7 @@ steps:
       genomicsdbimport_extra_args: genomicsdbimport_extra_args
       output_basename: output_basename
       tool_name:
-        valueFrom: "single.vqsr.filtered.vep_105"
+        valueFrom: "single.gatk.genotyped.filtered.vep_105"
       bcftools_annot_clinvar_columns: bcftools_annot_clinvar_columns
       clinvar_annotation_vcf: clinvar_annotation_vcf
       echtvar_anno_zips: echtvar_anno_zips
@@ -500,5 +500,5 @@ $namespaces:
 - VCF
 - VEP
 "sbg:links":
-- id: 'https://github.com/kids-first/kf-germline-workflow/releases/tag/v1.1.1'
+- id: 'https://github.com/kids-first/kf-germline-workflow/releases/tag/v1.2.0'
   label: github-release
diff --git a/workflows/kfdrc-germline-variant-wf.cwl b/workflows/kfdrc-germline-variant-wf.cwl
@@ -215,7 +215,7 @@ inputs:
           name: Homo_sapiens_assembly38.fasta.64.amb}, {class: File, path: 6063901f357c3a53540ca849, name: Homo_sapiens_assembly38.fasta.64.ann},
         {class: File, path: 6063901d357c3a53540ca81e, name: Homo_sapiens_assembly38.fasta.64.bwt}, {class: File, path: 6063901c357c3a53540ca801,
           name: Homo_sapiens_assembly38.fasta.64.pac}, {class: File, path: 60639015357c3a53540ca7a9, name: Homo_sapiens_assembly38.fasta.64.sa}]}
-  aligned_reads: {type: 'File', secondaryFiles: [{pattern: '.bai', required: false}, {pattern: '^.bai', required: false}, {pattern: '.crai',
+  aligned_reads: {type: 'File?', secondaryFiles: [{pattern: '.bai', required: false}, {pattern: '^.bai', required: false}, {pattern: '.crai',
         required: false}, {pattern: '^.crai', required: false}], doc: "Aligned Reads file(s) from which Germline Variants will be
       discovered", "sbg:fileTypes": "BAM, CRAM"}
   input_gvcf: {type: 'File?', secondaryFiles: [{pattern: '.tbi', required: true}], doc: "gVCF associated with aligned_reads. Providing
@@ -227,11 +227,12 @@ inputs:
         name: experiment_type
         symbols: ["WGS", "WXS", "Targeted Sequencing"]
     doc: "Experimental strategy used to sequence the data of the aligned_reads"
+    default: "WGS"
   output_basename: {type: 'string', doc: "String value to use for the basename of all outputs"}
   cnv_intervals_padding: {type: 'int?', doc: "Length (in bp) of the padding regions on each side of the intervals. This must be the
       same value used for all case samples."}
   cnv_intervals_bin_length: {type: 'int?', doc: "Length (in bp) of the bins. If zero, no binning will be performed."}
-  cnv_intervals: {type: 'File', doc: "Picard or GATK-style interval list of regions to process. For WGS, this should typically only
+  cnv_intervals: {type: 'File?', doc: "Picard or GATK-style interval list of regions to process. For WGS, this should typically only
       include the chromosomes of interest.", "sbg:fileTypes": "INTERVALS, INTERVAL_LIST, LIST"}
   cnv_blacklist_intervals: {type: 'File?', doc: "Picard or GATK-style interval list of regions to ignore.", "sbg:fileTypes": "INTERVALS,
       INTERVAL_LIST, LIST"}
@@ -286,7 +287,7 @@ inputs:
   cnvnator_disable_gc_correction: {type: 'boolean?', doc: "Do not to use GC corrected RD signal"}
   contig_ploidy_model_tar: {type: 'File?', doc: "The contig-ploidy model directory generated by the DetermineGermlineContigPloidyCohortMode
       task in the Cohort workflow.", "sbg:fileTypes": "TAR.GZ"}
-  gcnv_model_tars: {type: 'File[]', doc: "Array of tars of the contig-ploidy model directories generated by the GermlineCNVCallerCohortMode
+  gcnv_model_tars: {type: 'File[]?', doc: "Array of tars of the contig-ploidy model directories generated by the GermlineCNVCallerCohortMode
       tasks in the Cohort workflow.", "sbg:fileTypes": "TAR.GZ"}
   disabled_read_filters_for_collect_counts: {type: 'string[]?', doc: "Read filters to be disabled before analysis by GATK CollectReadCounts."}
   ploidy_mapping_error_rate: {type: 'float?', doc: "Typical mapping error rate."}
@@ -567,6 +568,7 @@ steps:
       indexed_reference_fasta: indexed_reference_fasta
       input_reads: aligned_reads
       input_gvcf: input_gvcf
+      experiment_type: experiment_type
       output_basename: output_basename
       biospecimen_name: biospecimen_name
       calling_regions: snv_calling_regions

diff --git a/workflows/kfdrc-single-sample-genotyping-wf.cwl b/workflows/kfdrc-single-sample-genotyping-wf.cwl
@@ -106,10 +106,12 @@ inputs:
   input_vcfs: {type: 'File[]', doc: 'Input array of individual sample gVCF files'}
   experiment_type:
     type:
+    - 'null'
     - type: enum
       name: experiment_type
       symbols: ["WGS", "WXS", "Targeted Sequencing"]
     doc: "Experimental strategy used to sequence the data in the input_vcfs"
+    default: "WGS"
   axiomPoly_resource_vcf: {type: File, secondaryFiles: [{pattern: '.tbi', required: true}], doc: 'Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz',
     "sbg:suggestedValue": {class: File, path: 60639016357c3a53540ca7c7, name: Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz,
       secondaryFiles: [{class: File, path: 6063901d357c3a53540ca81b, name: Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz.tbi}]}}
@@ -140,7 +142,7 @@ inputs:
   genomicsdbimport_extra_args: {type: 'string?', doc: "Any extra arguments to give to GenomicsDBImport"}
   genotypegvcfs_extra_args: {type: 'string?', doc: "Any extra arguments to give to GenotypeGVCFs"}
   output_basename: string
-  tool_name: {type: 'string?', default: "single.vqsr.filtered.vep_105", doc: "File name string suffx to use for output files"}
+  tool_name: {type: 'string?', default: "single.gatk.genotyped.filtered.vep_105", doc: "File name string suffx to use for output files"}
 
   # VQSR Options
   vqsr_snp_max_gaussians: {type: 'int?', doc: "Interger value for max gaussians in SNP VariantRecalibration. If a dataset gives fewer
@@ -335,7 +337,7 @@ steps:
           $(self.secondaryFiles.filter(function(e) {return e.nameext == '.dict'})[0])
       output_basename:
         source: output_basename
-        valueFrom: $(self).gatk.germline.hardfiltered
+        valueFrom: $(self).single.gatk.genotyped.filtered
       dbsnp_vcf: dbsnp_vcf
       wgs_evaluation_interval_list: wgs_evaluation_interval_list
     out: [output]
@@ -378,5 +380,5 @@ hints:
 - VCF
 - VEP
 "sbg:links":
-- id: 'https://github.com/kids-first/kf-germline-workflow/releases/tag/v1.1.1'
+- id: 'https://github.com/kids-first/kf-germline-workflow/releases/tag/v1.2.0'
   label: github-release