diff --git a/src/main/java/picard/analysis/CollectInsertSizeMetrics.java b/src/main/java/picard/analysis/CollectInsertSizeMetrics.java index d6d7916a8a..55d125c20e 100644 --- a/src/main/java/picard/analysis/CollectInsertSizeMetrics.java +++ b/src/main/java/picard/analysis/CollectInsertSizeMetrics.java @@ -105,6 +105,9 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram { "truncated to a shorter range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a minimum range.", optional=true) public Integer MIN_HISTOGRAM_WIDTH = null; + @Argument(shortName = "TR", doc="Do not truncate the insert size histogram.", optional = true) + public boolean DO_NOT_TRUNCATE_HISTOGRAMS = false; + @Argument(shortName="M", doc="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this " + "percentage of overall reads. (Range: 0 to 1).") public float MINIMUM_PCT = 0.05f; @@ -146,6 +149,10 @@ protected String[] customCommandLineValidation() { IOUtil.assertFileIsWritable(OUTPUT); IOUtil.assertFileIsWritable(Histogram_FILE); + if (DO_NOT_TRUNCATE_HISTOGRAMS){ + HISTOGRAM_WIDTH = Integer.MAX_VALUE; + } + //Delegate actual collection to InsertSizeMetricCollector multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT, HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES); diff --git a/src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java b/src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java index 481a11038e..4c73f87074 100644 --- a/src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java @@ -183,15 +183,15 @@ public void addMetricsToFile(final MetricsFile file) } // Trim the Histogram down to get rid of outliers that would make the chart useless. - final Histogram trimmedHistogram = histogram; // alias it - trimmedHistogram.trimByWidth(getWidthToTrimTo(metrics)); + histogram.trimByWidth(getWidthToTrimTo(metrics)); - if (!trimmedHistogram.isEmpty()) { - metrics.MEAN_INSERT_SIZE = trimmedHistogram.getMean(); - metrics.STANDARD_DEVIATION = trimmedHistogram.getStandardDeviation(); + // Compute metrics that are sensitive to outliers after trimming. + if (!histogram.isEmpty()) { + metrics.MEAN_INSERT_SIZE = histogram.getMean(); + metrics.STANDARD_DEVIATION = histogram.getStandardDeviation(); } - file.addHistogram(trimmedHistogram); + file.addHistogram(histogram); file.addMetric(metrics); } } diff --git a/src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java b/src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java index 8563813d7a..e8756304ea 100755 --- a/src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java +++ b/src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java @@ -30,6 +30,7 @@ import htsjdk.samtools.metrics.MetricsFile; import org.testng.Assert; import org.testng.annotations.Test; +import picard.PicardException; import picard.cmdline.CommandLineProgramTest; import picard.util.RExecutor; @@ -248,6 +249,70 @@ public void testHistogramWidthIsSetProperly() throws IOException { Assert.assertEquals(output.getAllHistograms().size(), 5); } + @Test + public void testSkipTruncatingHistogram() throws IOException { + final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true); + final int readLength = 10; + setBuilder.setReadLength(readLength); + final int minInsertSize = 100; + final int maxInsertSize = 400; + int readCount = 0; + final int start = 1000; + final String cigar = readLength + "M"; + for (int j = minInsertSize; j < maxInsertSize; j++) { + setBuilder.addPair("read:" + readCount++, 0, start, start + j - readLength, false, false, + cigar, cigar, false, true, 60); + } + + // Add an outlier + int outlierInsertSize = 2000; + setBuilder.addPair("outlierRead", 0, start, start + outlierInsertSize - readLength, + false, false, cigar, cigar, false, true, 60); + + final File testSamFile = File.createTempFile("CollectInsertSizeMetrics", ".bam"); + testSamFile.deleteOnExit(); + final File testSamFileIndex = new File(testSamFile.getParentFile(),testSamFile.getName().replace("bam$","bai")); + testSamFileIndex.deleteOnExit(); + try ( final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true) + .makeBAMWriter(setBuilder.getHeader(), false, testSamFile)) { + setBuilder.forEach(writer::addAlignment); + } + + final File outfile = File.createTempFile("test", ".insert_size_metrics"); + final File pdf = File.createTempFile("test", ".pdf"); + outfile.deleteOnExit(); + pdf.deleteOnExit(); + final String[] args = new String[]{ + "INPUT=" + testSamFile.getAbsolutePath(), + "OUTPUT=" + outfile.getAbsolutePath(), + "Histogram_FILE=" + pdf.getAbsolutePath(), + "TR=" + true + }; + + Assert.assertEquals(runPicardCommandLine(args), 0); + final MetricsFile> output = new MetricsFile<>(); + output.read(new FileReader(outfile)); + + Assert.assertEquals(output.getAllHistograms().get(0).get(outlierInsertSize).getValue(), 1.0); + + // Run again with truncation enabled + final File outfileWithTruncation = File.createTempFile("test", ".insert_size_metrics"); + outfileWithTruncation.deleteOnExit(); + final String[] argsWithTruncation = new String[]{ + "INPUT=" + testSamFile.getAbsolutePath(), + "OUTPUT=" + outfileWithTruncation.getAbsolutePath(), + "Histogram_FILE=" + pdf.getAbsolutePath() + }; + + Assert.assertEquals(runPicardCommandLine(argsWithTruncation), 0); + final MetricsFile> outputWithTruncation = new MetricsFile<>(); + outputWithTruncation.read(new FileReader(outfileWithTruncation)); + + // Check that the outlier has been removed when truncation is enabled + Assert.assertFalse(outputWithTruncation.getAllHistograms().get(0).containsKey(outlierInsertSize)); + + } + @Test public void testMultipleOrientationsForHistogram() throws IOException { final File output = new File("testdata/picard/analysis/directed/CollectInsertSizeMetrics", "multiple_orientation.sam.insert_size_metrics"); @@ -291,8 +356,6 @@ public void testWidthOfMetrics() throws IOException { testSamFileIndex.deleteOnExit(); - new File(testSamFile.getAbsolutePath().replace(".bam$",".bai")).deleteOnExit(); - final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true); setBuilder.setReadLength(10); @@ -326,7 +389,6 @@ public void testWidthOfMetrics() throws IOException { setBuilder.forEach(writer::addAlignment); writer.close(); - final File outfile = File.createTempFile("test", ".insert_size_metrics"); final File pdf = File.createTempFile("test", ".pdf"); outfile.deleteOnExit();