|
| 1 | +cwlVersion: v1.2 |
| 2 | +class: Workflow |
| 3 | +id: kfdrc-gatk-vqsr |
| 4 | +doc: |- |
| 5 | + GATK workflow for Variant Quality Score Recalibration (VQSR) |
| 6 | +requirements: |
| 7 | +- class: ScatterFeatureRequirement |
| 8 | +- class: InlineJavascriptRequirement |
| 9 | +- class: StepInputExpressionRequirement |
| 10 | + |
| 11 | +inputs: |
| 12 | + genotyped_vcfs: {type: 'File[]', secondaryFiles: [{pattern: '.tbi', required: true}], doc: "Input VCF that has been jointly genotyped"} |
| 13 | + output_basename: {type: 'string', doc: "String value to use as the base for the filename of the output"} |
| 14 | + |
| 15 | + axiomPoly_resource_vcf: {type: File, secondaryFiles: [{pattern: '.tbi', required: true}], doc: 'Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz', |
| 16 | + "sbg:suggestedValue": {class: File, path: 60639016357c3a53540ca7c7, name: Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz, |
| 17 | + secondaryFiles: [{class: File, path: 6063901d357c3a53540ca81b, name: Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz.tbi}]}} |
| 18 | + dbsnp_vcf: {type: File, secondaryFiles: [{pattern: '.idx', required: true}], doc: 'Homo_sapiens_assembly38.dbsnp138.vcf', "sbg:suggestedValue": { |
| 19 | + class: File, path: 6063901f357c3a53540ca84b, name: Homo_sapiens_assembly38.dbsnp138.vcf, secondaryFiles: [{class: File, path: 6063901e357c3a53540ca834, |
| 20 | + name: Homo_sapiens_assembly38.dbsnp138.vcf.idx}]}} |
| 21 | + hapmap_resource_vcf: {type: File, secondaryFiles: [{pattern: '.tbi', required: true}], doc: 'Hapmap genotype SNP input vcf', "sbg:suggestedValue": { |
| 22 | + class: File, path: 60639016357c3a53540ca7be, name: hapmap_3.3.hg38.vcf.gz, secondaryFiles: [{class: File, path: 60639016357c3a53540ca7c5, |
| 23 | + name: hapmap_3.3.hg38.vcf.gz.tbi}]}} |
| 24 | + mills_resource_vcf: {type: File, secondaryFiles: [{pattern: '.tbi', required: true}], doc: 'Mills_and_1000G_gold_standard.indels.hg38.vcf.gz', |
| 25 | + "sbg:suggestedValue": {class: File, path: 6063901a357c3a53540ca7f3, name: Mills_and_1000G_gold_standard.indels.hg38.vcf.gz, secondaryFiles: [ |
| 26 | + {class: File, path: 6063901c357c3a53540ca806, name: Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi}]}} |
| 27 | + omni_resource_vcf: {type: File, secondaryFiles: [{pattern: '.tbi', required: true}], doc: '1000G_omni2.5.hg38.vcf.gz', "sbg:suggestedValue": { |
| 28 | + class: File, path: 6063901e357c3a53540ca835, name: 1000G_omni2.5.hg38.vcf.gz, secondaryFiles: [{class: File, path: 60639016357c3a53540ca7b1, |
| 29 | + name: 1000G_omni2.5.hg38.vcf.gz.tbi}]}} |
| 30 | + one_thousand_genomes_resource_vcf: {type: File, secondaryFiles: [{pattern: '.tbi', required: true}], doc: '1000G_phase1.snps.high_confidence.hg38.vcf.gz, |
| 31 | + high confidence snps', "sbg:suggestedValue": {class: File, path: 6063901c357c3a53540ca80f, name: 1000G_phase1.snps.high_confidence.hg38.vcf.gz, |
| 32 | + secondaryFiles: [{class: File, path: 6063901e357c3a53540ca845, name: 1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi}]}} |
| 33 | + snp_max_gaussians: {type: 'int?', doc: "Interger value for max gaussians in SNP VariantRecalibration. If a dataset gives fewer variants |
| 34 | + than the expected scale, the number of Gaussians for training should be turned down. Lowering the max-Gaussians forces the program |
| 35 | + to group variants into a smaller number of clusters, which results in more variants per cluster."} |
| 36 | + indel_max_gaussians: {type: 'int?', doc: "Interger value for max gaussians in INDEL VariantRecalibration. If a dataset gives fewer |
| 37 | + variants than the expected scale, the number of Gaussians for training should be turned down. Lowering the max-Gaussians forces |
| 38 | + the program to group variants into a smaller number of clusters, which results in more variants per cluster."} |
| 39 | + snp_tranches: { type: 'string[]', doc: "The levels of truth sensitivity at which to slice the SNP recalibration data, in percent." } |
| 40 | + snp_annotations: { type: 'string[]', doc: "The names of the annotations which should used for SNP recalibration calculations." } |
| 41 | + indel_tranches: { type: 'string[]', doc: "The levels of truth sensitivity at which to slice the INDEL recalibration data, in percent." } |
| 42 | + indel_annotations: { type: 'string[]', doc: "The names of the annotations which should used for INDEL recalibration calculations." } |
| 43 | + snp_ts_filter_level: { type: 'float', doc: "The truth sensitivity level at which to start filtering SNP data" } |
| 44 | + indel_ts_filter_level: { type: 'float', doc: "The truth sensitivity level at which to start filtering INDEL data" } |
| 45 | + |
| 46 | + # Resource Control |
| 47 | + snp_model_cpu: { type: 'int?', doc: "CPUs to allocate to VariantRecalibrator for SNP model creation." } |
| 48 | + snp_model_ram: { type: 'int?', doc: "GB of RAM to allocate to VariantRecalibrator for SNP model creation." } |
| 49 | + indel_recal_cpu: { type: 'int?', doc: "CPUs to allocate to VariantRecalibrator for INDEL recalibration." } |
| 50 | + indel_recal_ram: { type: 'int?', doc: "GB of RAM to allocate to VariantRecalibrator for INDEL recalibration." } |
| 51 | + snp_recal_cpu: { type: 'int?', doc: "CPUs to allocate to VariantRecalibrator for scattered SNP recalibration." } |
| 52 | + snp_recal_ram: { type: 'int?', doc: "GB of RAM to allocate to VariantRecalibrator for scattered SNP recalibration." } |
| 53 | + gathertranche_cpu: { type: 'int?', doc: "CPUs to allocate to GatherTranches." } |
| 54 | + gathertranche_ram: { type: 'int?', doc: "GB of RAM to allocate to GatherTranches." } |
| 55 | + apply_cpu: { type: 'int?', doc: "CPUs to allocate to ApplyVQSR for INDELs and SNPs." } |
| 56 | + apply_ram: { type: 'int?', doc: "GB of RAM to allocate to ApplyVQSR for INDELs and SNPs." } |
| 57 | + gathervcf_cpu: { type: 'int?', doc: "CPUs to allocate to GatherVcfsCloud." } |
| 58 | + gathervcf_ram: { type: 'int?', doc: "GB of RAM to allocate to GatherVcfsCloud." } |
| 59 | + |
| 60 | +outputs: |
| 61 | + recalibrated_vcf: { type: 'File', secondaryFiles: [.tbi], outputSource: gatk_gatherfinalvcf/output } |
| 62 | + |
| 63 | +steps: |
| 64 | + gatk_filter_excesshet: |
| 65 | + run: ../tools/gatk_variantfiltration.cwl |
| 66 | + scatter: [input_vcf] |
| 67 | + hints: |
| 68 | + - class: 'sbg:AWSInstanceType' |
| 69 | + value: m5.4xlarge |
| 70 | + in: |
| 71 | + input_vcf: genotyped_vcfs |
| 72 | + output_basename: |
| 73 | + valueFrom: 'excesshet_filtered' |
| 74 | + variant_filters: |
| 75 | + valueFrom: '--filter-expression "ExcessHet > 54.69" --filter-name ExcessHet' |
| 76 | + out: [output] |
| 77 | + gatk_makesitesonlyvcf: |
| 78 | + run: ../tools/gatk_makesitesonlyvcf.cwl |
| 79 | + scatter: [input_vcf] |
| 80 | + hints: |
| 81 | + - class: 'sbg:AWSInstanceType' |
| 82 | + value: m5.4xlarge |
| 83 | + in: |
| 84 | + input_vcf: gatk_filter_excesshet/output |
| 85 | + output_filename: |
| 86 | + valueFrom: 'sites_only.variant_filtered.vcf.gz' |
| 87 | + out: [sites_vcf] |
| 88 | + gatk_gathervcfs: |
| 89 | + run: ../tools/gatk_gathervcfs.cwl |
| 90 | + in: |
| 91 | + input_vcfs: gatk_makesitesonlyvcf/sites_vcf |
| 92 | + out: [output] |
| 93 | + gatk_snpsvariantrecalibratorcreatemodel: |
| 94 | + run: ../tools/gatk_snpsvariantrecalibratorcreatemodel.cwl |
| 95 | + in: |
| 96 | + dbsnp_resource_vcf: dbsnp_vcf |
| 97 | + hapmap_resource_vcf: hapmap_resource_vcf |
| 98 | + omni_resource_vcf: omni_resource_vcf |
| 99 | + one_thousand_genomes_resource_vcf: one_thousand_genomes_resource_vcf |
| 100 | + sites_only_variant_filtered_vcf: gatk_gathervcfs/output |
| 101 | + max_gaussians: snp_max_gaussians |
| 102 | + tranche: snp_tranches |
| 103 | + annotations: snp_annotations |
| 104 | + cpu: snp_model_cpu |
| 105 | + ram: snp_model_ram |
| 106 | + out: [model_report] |
| 107 | + gatk_indelsvariantrecalibrator: |
| 108 | + run: ../tools/gatk_indelsvariantrecalibrator.cwl |
| 109 | + in: |
| 110 | + axiomPoly_resource_vcf: axiomPoly_resource_vcf |
| 111 | + dbsnp_resource_vcf: dbsnp_vcf |
| 112 | + mills_resource_vcf: mills_resource_vcf |
| 113 | + sites_only_variant_filtered_vcf: gatk_gathervcfs/output |
| 114 | + max_gaussians: indel_max_gaussians |
| 115 | + tranche: indel_tranches |
| 116 | + annotations: indel_annotations |
| 117 | + cpu: indel_recal_cpu |
| 118 | + ram: indel_recal_ram |
| 119 | + out: [recalibration, tranches] |
| 120 | + gatk_snpsvariantrecalibratorscattered: |
| 121 | + run: ../tools/gatk_snpsvariantrecalibratorscattered.cwl |
| 122 | + scatter: [sites_only_variant_filtered_vcf] |
| 123 | + hints: |
| 124 | + - class: 'sbg:AWSInstanceType' |
| 125 | + value: r5.2xlarge |
| 126 | + in: |
| 127 | + sites_only_variant_filtered_vcf: gatk_filter_excesshet/output |
| 128 | + model_report: gatk_snpsvariantrecalibratorcreatemodel/model_report |
| 129 | + hapmap_resource_vcf: hapmap_resource_vcf |
| 130 | + omni_resource_vcf: omni_resource_vcf |
| 131 | + one_thousand_genomes_resource_vcf: one_thousand_genomes_resource_vcf |
| 132 | + dbsnp_resource_vcf: dbsnp_vcf |
| 133 | + max_gaussians: snp_max_gaussians |
| 134 | + tranche: snp_tranches |
| 135 | + annotations: snp_annotations |
| 136 | + cpu: snp_recal_cpu |
| 137 | + ram: snp_recal_ram |
| 138 | + out: [recalibration, tranches] |
| 139 | + gatk_gathertranches: |
| 140 | + run: ../tools/gatk_gathertranches.cwl |
| 141 | + hints: |
| 142 | + - class: 'sbg:AWSInstanceType' |
| 143 | + value: r5.2xlarge |
| 144 | + in: |
| 145 | + tranches: gatk_snpsvariantrecalibratorscattered/tranches |
| 146 | + cpu: gathertranche_cpu |
| 147 | + ram: gathertranche_ram |
| 148 | + out: [output] |
| 149 | + gatk_applyrecalibration: |
| 150 | + run: ../tools/gatk_applyrecalibration.cwl |
| 151 | + scatter: [input_vcf, snps_recalibration] |
| 152 | + scatterMethod: dotproduct |
| 153 | + hints: |
| 154 | + - class: 'sbg:AWSInstanceType' |
| 155 | + value: r5.2xlarge |
| 156 | + in: |
| 157 | + indels_recalibration: gatk_indelsvariantrecalibrator/recalibration |
| 158 | + indels_tranches: gatk_indelsvariantrecalibrator/tranches |
| 159 | + input_vcf: gatk_filter_excesshet/output |
| 160 | + snps_recalibration: gatk_snpsvariantrecalibratorscattered/recalibration |
| 161 | + snps_tranches: gatk_gathertranches/output |
| 162 | + snp_ts_filter_level: snp_ts_filter_level |
| 163 | + indel_ts_filter_level: indel_ts_filter_level |
| 164 | + cpu: apply_cpu |
| 165 | + ram: apply_ram |
| 166 | + out: [recalibrated_vcf] |
| 167 | + gatk_gatherfinalvcf: |
| 168 | + run: ../tools/gatk_gatherfinalvcf.cwl |
| 169 | + in: |
| 170 | + input_vcfs: gatk_applyrecalibration/recalibrated_vcf |
| 171 | + output_basename: output_basename |
| 172 | + cpu: gathervcf_cpu |
| 173 | + ram: gathervcf_ram |
| 174 | + out: [output] |
| 175 | + |
| 176 | +$namespaces: |
| 177 | + sbg: https://sevenbridges.com |
0 commit comments