diff --git a/src/main/java/picard/analysis/CollectRawWgsMetrics.java b/src/main/java/picard/analysis/CollectRawWgsMetrics.java index 175ce20ebf..2f8defc666 100644 --- a/src/main/java/picard/analysis/CollectRawWgsMetrics.java +++ b/src/main/java/picard/analysis/CollectRawWgsMetrics.java @@ -123,6 +123,7 @@ public RawWgsMetrics() { public RawWgsMetrics(final IntervalList intervals, final Histogram highQualityDepthHistogram, + final Histogram highQualityDepthHistogramNonZero final Histogram unfilteredDepthHistogram, final double pctExcludedByAdapter, final double pctExcludedByMapq, @@ -135,7 +136,7 @@ public RawWgsMetrics(final IntervalList intervals, final int coverageCap, final Histogram unfilteredBaseQHistogram, final int sampleSize) { - super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, + super(intervals, highQualityDepthHistogram, highQualityDepthHistogramNonZero,unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize); } } diff --git a/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java b/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java index ac8f2003c5..657bdcde92 100644 --- a/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java +++ b/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java @@ -104,6 +104,7 @@ public WgsMetricsWithNonZeroCoverage() { public WgsMetricsWithNonZeroCoverage(final IntervalList intervals, final Histogram highQualityDepthHistogram, + final Histogram highQualitDepthHistogramNonZero, final Histogram unfilteredDepthHistogram, final double pctExcludedByAdapter, final double pctExcludedByMapq, @@ -116,7 +117,7 @@ public WgsMetricsWithNonZeroCoverage(final IntervalList intervals, final int coverageCap, final Histogram unfilteredBaseQHistogram, final int sampleSize) { - super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, + super(intervals, highQualityDepthHistogram, highQualitDepthHistogramNonZero, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq, pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize); } } @@ -170,6 +171,7 @@ protected int doWork() { @Override protected WgsMetrics generateWgsMetrics(final IntervalList intervals, final Histogram highQualityDepthHistogram, + final Histogram highQualityDepthHistogramNonZero, final Histogram unfilteredDepthHistogram, final double pctExcludedByAdapter, final double pctExcludedByMapq, @@ -185,6 +187,7 @@ protected WgsMetrics generateWgsMetrics(final IntervalList intervals, return new WgsMetricsWithNonZeroCoverage( intervals, highQualityDepthHistogram, + highQualityDepthHistogramNonZeroFinal, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, @@ -207,7 +210,7 @@ protected WgsMetricsCollector getCollector(final int coverageCap, final Interval protected class WgsMetricsWithNonZeroCoverageCollector extends WgsMetricsCollector { Histogram highQualityDepthHistogram; - Histogram highQualityDepthHistogramNonZero; + Histogram highQualityDepthHistogramNonZeroFinal; public WgsMetricsWithNonZeroCoverageCollector(final CollectWgsMetricsWithNonZeroCoverage metrics, final int coverageCap, final IntervalList intervals) { @@ -222,7 +225,7 @@ public void addToMetricsFile(final MetricsFile file, final CountingFilter mapqFilter, final CountingPairedFilter pairFilter) { highQualityDepthHistogram = getDepthHistogram(); - highQualityDepthHistogramNonZero = getDepthHistogramNonZero(); + highQualityDepthHistogramNonZeroFinal = getDepthHistogramNonZero(); // calculate metrics the same way as in CollectWgsMetrics final WgsMetricsWithNonZeroCoverage metrics = (WgsMetricsWithNonZeroCoverage) getMetrics(dupeFilter, adapterFilter, mapqFilter, pairFilter); @@ -239,7 +242,7 @@ public void addToMetricsFile(final MetricsFile file, file.addMetric(metrics); file.addMetric(metricsNonZero); file.addHistogram(highQualityDepthHistogram); - file.addHistogram(highQualityDepthHistogramNonZero); + file.addHistogram(highQualityDepthHistogramNonZeroFinal); if (includeBQHistogram) { addBaseQHistogram(file); @@ -260,7 +263,7 @@ private Histogram getDepthHistogramNonZero() { } public boolean areHistogramsEmpty() { - return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZero.isEmpty()); + return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZeroFinal.isEmpty()); } } } diff --git a/src/main/java/picard/analysis/WgsMetrics.java b/src/main/java/picard/analysis/WgsMetrics.java index 92c1b8f923..807f4da2a4 100644 --- a/src/main/java/picard/analysis/WgsMetrics.java +++ b/src/main/java/picard/analysis/WgsMetrics.java @@ -31,6 +31,7 @@ import picard.PicardException; import picard.util.MathUtil; import picard.util.help.HelpConstants; +import java.util.stream.LongStream; /** Metrics for evaluating the performance of whole genome sequencing experiments. */ @DocumentedFeature(groupName = HelpConstants.DOC_CAT_METRICS, summary = HelpConstants.DOC_CAT_METRICS_SUMMARY) @@ -46,6 +47,9 @@ public class WgsMetrics extends MergeableMetricBase { @MergingIsManual protected final Histogram highQualityDepthHistogram; + /** Count of sites with a given depth of coverage. Excludes bases with quality below MINIMUM_BASE_QUALITY and 0 coverage.*/ + @MergingIsManual + protected final Histogram highQualityDepthHistogramNonZero; /** Count of sites with a given depth of coverage. Includes all but quality 2 bases */ @MergingIsManual protected final Histogram unfilteredDepthHistogram; @@ -68,6 +72,7 @@ public class WgsMetrics extends MergeableMetricBase { public WgsMetrics() { intervals = null; highQualityDepthHistogram = null; + highQualityDepthHistogramNonZero = null; unfilteredDepthHistogram = null; unfilteredBaseQHistogram = null; theoreticalHetSensitivitySampleSize = -1; @@ -77,6 +82,7 @@ public WgsMetrics() { /** * Create an instance of this metric that is mergeable. * + * @param highQualityDepthHistogramNonZero the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY or with 0 coverage. * @param highQualityDepthHistogram the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY. * @param unfilteredDepthHistogram the depth histogram that includes all but quality 2 bases. * @param pctExcludedByAdapter the fraction of aligned bases that were filtered out because they were in reads with 0 mapping quality that looked like adapter sequence. @@ -92,6 +98,7 @@ public WgsMetrics() { * @param theoreticalHetSensitivitySampleSize the sample size used for theoretical het sensitivity sampling. */ public WgsMetrics(final IntervalList intervals, + final Histogram highQualityDepthHistogramNonZero, final Histogram highQualityDepthHistogram, final Histogram unfilteredDepthHistogram, final double pctExcludedByAdapter, @@ -107,6 +114,7 @@ public WgsMetrics(final IntervalList intervals, final int theoreticalHetSensitivitySampleSize) { this.intervals = intervals.uniqued(); this.highQualityDepthHistogram = highQualityDepthHistogram; + this.highQualityDepthHistogramNonZero = new Histogram<>("coverage_or_base_quality", "high_quality_non_zero_coverage_count"); this.unfilteredDepthHistogram = unfilteredDepthHistogram; this.unfilteredBaseQHistogram = unfilteredBaseQHistogram; this.coverageCap = coverageCap; @@ -320,12 +328,21 @@ public void calculateDerivedFields() { PCT_90X = MathUtil.sum(depthHistogramArray, 90, depthHistogramArray.length) / (double) GENOME_TERRITORY; PCT_100X = MathUtil.sum(depthHistogramArray, 100, depthHistogramArray.length) / (double) GENOME_TERRITORY; - + + long maxDepth = 0; + for (final Histogram.Bin bin : highQualityDepthHistogram.values()) { + maxDepth = Math.max((int) bin.getIdValue(), maxDepth); + final int depth = (int) bin.getIdValue(); + if (depth > 0) { + highQualityDepthHistogramNonZero.increment(depth,bin.getValue()); + } + } + LongStream.range(1, maxDepth).forEach(i -> highQualityDepthHistogramNonZero.increment((int) i, 0)); // This roughly measures by how much we must over-sequence so that xx% of bases have coverage at least as deep as the current mean coverage: - if (highQualityDepthHistogram.getCount() > 0) { - FOLD_80_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.2); - FOLD_90_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.1); - FOLD_95_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.05); + if (highQualityDepthHistogramNonZero.getCount() > 0) { + FOLD_80_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.2); + FOLD_90_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.1); + FOLD_95_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.05); } else { FOLD_80_BASE_PENALTY = 0; FOLD_90_BASE_PENALTY = 0; diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 39341fad0c..e5a98501ed 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -126,6 +126,10 @@ public abstract class TargetMetricsCollector highQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count"); + // histogram of depths. does not include bases with quality less than MINIMUM_BASE_QUALITY (default 20) or bases with zero coverage. + // we use this for the calculation of the FOLD_80_BASE_PENALTY metric + private final Histogram highQualitDepthHistogramNonZero = new Histogram<>("coverage_or_base_quality", "high_quality_non_zero_coverage_count"); + // histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity. private final Histogram unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); @@ -708,6 +712,7 @@ public void finish() { private void calculateTargetCoverageMetrics() { LongStream.range(0, hqMaxDepth).forEach(i -> highQualityDepthHistogram.increment((int) i, 0)); + LongStream.range(1, hqMaxDepth).forEach(i -> highQualityDepthHistogramNonZero.increment((int) i, 0)); int zeroCoverageTargets = 0; @@ -738,7 +743,9 @@ private void calculateTargetCoverageMetrics() { totalCoverage += depth; highQualityDepthHistogram.increment(depth, 1); minDepth = Math.min(minDepth, depth); - + if (depth > 0) { + highQualityDepthHistogramNonZero.increment(depth, 1); + } // Add to the "how many target bases at at-least X" calculations. for (int i = 0; i < targetBasesDepth.length; i++) { if (depth >= targetBasesDepth[i]) targetBases[i]++; @@ -750,8 +757,8 @@ private void calculateTargetCoverageMetrics() { if (targetBases[0] != highQualityCoverageByTarget.keySet().stream().mapToInt(Interval::length).sum()) { throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases"); } - - metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY; + + metrics.MEAN_TARGET_COVERAGE = highQualityDepthHistogram.getMean(); metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian(); metrics.MAX_TARGET_COVERAGE = hqMaxDepth; // Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE) @@ -759,7 +766,7 @@ private void calculateTargetCoverageMetrics() { // compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile // this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage - metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / highQualityDepthHistogram.getPercentile(0.2); + metrics.FOLD_80_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.2); metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size(); // Store the "how many bases at at-least X" calculations.