Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make truncating the histogram optional in CollectInsertSizeMetrics #1779

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/main/java/picard/analysis/CollectInsertSizeMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
"truncated to a shorter range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a minimum range.", optional=true)
public Integer MIN_HISTOGRAM_WIDTH = null;

@Argument(shortName = "TR", doc="Truncate the insert size histogram to throw out outliers", optional = true)
public boolean TRUNCATE_HISTOGRAMS = true;

@Argument(shortName="M", doc="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this " +
"percentage of overall reads. (Range: 0 to 1).")
public float MINIMUM_PCT = 0.05f;
Expand Down Expand Up @@ -148,7 +151,7 @@ protected String[] customCommandLineValidation() {

//Delegate actual collection to InsertSizeMetricCollector
multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT,
HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES);
HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES, TRUNCATE_HISTOGRAMS);
}

@Override protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,22 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
//calculated value using median + deviations is smaller.
private final Integer minHistogramWidth;

// If false, do not truncate the histogram
private final boolean truncateHistogram;

// If set to true, then duplicates will also be included in the histogram
private final boolean includeDuplicates;

public InsertSizeMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords,
final double minimumPct, final Integer histogramWidth, final Integer minHistogramWidth,
final double deviations, final boolean includeDuplicates) {
final double deviations, final boolean includeDuplicates, final boolean truncateHistogram) {
this.minimumPct = minimumPct;
this.histogramWidth = histogramWidth;
this.minHistogramWidth = minHistogramWidth;
this.deviations = deviations;
this.includeDuplicates = includeDuplicates;
setup(accumulationLevels, samRgRecords);
this.truncateHistogram = truncateHistogram;
}

// We will pass insertSize and PairOrientation with the DefaultPerRecordCollectorArgs passed to the record collectors
Expand Down Expand Up @@ -183,15 +187,17 @@ public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file)
}

// Trim the Histogram down to get rid of outliers that would make the chart useless.
final Histogram<Integer> trimmedHistogram = histogram; // alias it
trimmedHistogram.trimByWidth(getWidthToTrimTo(metrics));
if (truncateHistogram){
histogram.trimByWidth(getWidthToTrimTo(metrics));

if (!trimmedHistogram.isEmpty()) {
metrics.MEAN_INSERT_SIZE = trimmedHistogram.getMean();
metrics.STANDARD_DEVIATION = trimmedHistogram.getStandardDeviation();
// Recompute metrics that are sensitive to outliers after trimming.
if (!histogram.isEmpty()) {
metrics.MEAN_INSERT_SIZE = histogram.getMean();
metrics.STANDARD_DEVIATION = histogram.getStandardDeviation();
}
}

file.addHistogram(trimmedHistogram);
file.addHistogram(histogram);
file.addMetric(metrics);
}
}
Expand Down
55 changes: 49 additions & 6 deletions src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
*/
package picard.analysis;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.*;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please bring back the individual imports

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup intellij

import htsjdk.samtools.metrics.MetricsFile;
import org.testng.Assert;
import org.testng.annotations.Test;
Expand Down Expand Up @@ -248,6 +245,54 @@ public void testHistogramWidthIsSetProperly() throws IOException {
Assert.assertEquals(output.getAllHistograms().size(), 5);
}

@Test
public void testSkipTruncatingHistogram() throws IOException {
// Make a test file
final File testSamFile = File.createTempFile("CollectInsertSizeMetrics", ".bam");
testSamFile.deleteOnExit();
final File testSamFileIndex = new File(testSamFile.getParentFile(),testSamFile.getName().replaceAll("bam$","bai"));
testSamFileIndex.deleteOnExit();

final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true);
final int readLength = 10;
setBuilder.setReadLength(readLength);
final int minInsertSize = 100;
final int maxInsertSize = 400;
int i = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename readCount

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

int start = 1000;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

final

String cigar = readLength + "M";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

final

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

for (int j = minInsertSize; j < maxInsertSize; j++) {
setBuilder.addPair("read:" + i++, 0, start, start + j - readLength, false, false,
cigar, cigar, false, true, 60);
}

// Add an outlier
int outlierInsertSize = 2000;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

final

setBuilder.addPair("outlierRead", 0, start, start + outlierInsertSize - readLength,
false, false, cigar, cigar, false, true, 60);

final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use try-with-resources here.

setBuilder.forEach(writer::addAlignment);
writer.close();

final File outfile = File.createTempFile("test", ".insert_size_metrics");
final File pdf = File.createTempFile("test", ".pdf");
outfile.deleteOnExit();
pdf.deleteOnExit();
final String[] args = new String[]{
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"Histogram_FILE=" + pdf.getAbsolutePath(),
"TR=false"
};

Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));

Assert.assertEquals(output.getAllHistograms().get(0).get(outlierInsertSize).getValue(), 1.0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

run with other argument to show that this argument makes a difference

}

@Test
public void testMultipleOrientationsForHistogram() throws IOException {
final File output = new File("testdata/picard/analysis/directed/CollectInsertSizeMetrics", "multiple_orientation.sam.insert_size_metrics");
Expand Down Expand Up @@ -291,8 +336,6 @@ public void testWidthOfMetrics() throws IOException {
testSamFileIndex.deleteOnExit();


new File(testSamFile.getAbsolutePath().replace(".bam$",".bai")).deleteOnExit();

final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true);
setBuilder.setReadLength(10);

Expand Down