-
Notifications
You must be signed in to change notification settings - Fork 372
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add wrapping counting filter
- Loading branch information
Showing
5 changed files
with
124 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package picard.filter; | ||
|
||
import htsjdk.samtools.SAMRecord; | ||
import htsjdk.samtools.filter.SamRecordFilter; | ||
|
||
/** | ||
* A CountingFilter that wraps any SamRecordFilter and provides a count of the reads and bases filtered | ||
*/ | ||
public class CountingFilterWrapper extends CountingFilter { | ||
private final SamRecordFilter wrappedFilter; | ||
public CountingFilterWrapper(SamRecordFilter wrappedFilter) { | ||
this.wrappedFilter = wrappedFilter; | ||
} | ||
|
||
@Override | ||
public boolean reallyFilterOut(SAMRecord record) { | ||
return wrappedFilter.filterOut(record); | ||
} | ||
} |
50 changes: 50 additions & 0 deletions
50
src/test/java/picard/analysis/CollectUmiPrevalenceMetricsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package picard.analysis; | ||
|
||
import htsjdk.samtools.metrics.MetricsFile; | ||
import htsjdk.samtools.util.Histogram; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
import picard.cmdline.CommandLineProgramTest; | ||
|
||
import java.io.File; | ||
import java.io.FileReader; | ||
import java.io.IOException; | ||
|
||
import static org.testng.Assert.*; | ||
|
||
public class CollectUmiPrevalenceMetricsTest extends CommandLineProgramTest { | ||
|
||
@Override | ||
public String getCommandLineProgramName() { return CollectUmiPrevalenceMetrics.class.getSimpleName();} | ||
|
||
|
||
@Test | ||
public void integrationTest() throws IOException { | ||
final File TEST_DIR = new File("testdata/picard/independent_replicates/"); | ||
|
||
final File samFile = new File(TEST_DIR, "twopairsWithManyUmis.sam"); | ||
final File metricsFile = File.createTempFile("test", ".umi_hist"); | ||
metricsFile.deleteOnExit(); | ||
|
||
final String[] args = new String[]{ | ||
"INPUT=" + samFile.getAbsolutePath(), | ||
"OUTPUT=" + metricsFile.getAbsolutePath(), | ||
"MINIMUM_MQ=20" | ||
}; | ||
Assert.assertEquals(runPicardCommandLine(args), 0); | ||
|
||
final MetricsFile<?, Integer> output = new MetricsFile<>(); | ||
output.read(new FileReader(metricsFile)); | ||
|
||
final Histogram<Integer> hist = output.getHistogram(); | ||
|
||
assertNull(hist.get(1)); | ||
assertEquals(hist.get(2).getValue(),4); | ||
assertEquals(hist.get(3).getValue(),1); | ||
assertEquals(hist.get(4).getValue(),1); | ||
assertNull(hist.get(5)); | ||
assertNull(hist.get(6)); | ||
assertEquals(hist.getCount(),6); // sets | ||
assertEquals(hist.getSum(),15); // unique barcodes (in sets) | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
testdata/picard/independent_replicates/twopairsWithManyUmis.sam
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
@HD VN:1.0 SO:coordinate | ||
@SQ SN:chr1 LN:501 | ||
@SQ SN:chr2 LN:501 | ||
@SQ SN:chr3 LN:501 | ||
@SQ SN:chr4 LN:101 | ||
@SQ SN:chr5 LN:101 | ||
@SQ SN:chr6 LN:101 | ||
@SQ SN:chr7 LN:404 | ||
@SQ SN:chr8 LN:202 | ||
@RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA | ||
@PG ID:bwa PN:bwa VN:3 CL:bwa aln | ||
@CO manually created file. second pair is a copy of the first pair except that it's marked as a duplicate, and the 10th base of the | ||
@CO first read is modified C->t (lower case for easy view). this corresponds to position 11 | ||
@CO 123456789 123456789 123456789 123456789 123456789 1234567890 | ||
1AAXX.3.1.1 83 chr1 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1AAXX.3.1.2 83 chr1 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CC-CC QX:Z:AA AA | ||
1AAXX.3.1.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1AAXX.3.1.2 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CC-CC QX:Z:AA AA | ||
1AAXX.3.2.1 99 chr2 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1AAXX.3.2.2 163 chr2 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CC-CC QX:Z:AA AA | ||
1AAXX.3.2.1 147 chr2 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1AAXX.3.2.2 83 chr2 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CC-CC QX:Z:AA AA | ||
1ACXX.3.3.1 83 chr3 2 255 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1ACXX.3.3.2 83 chr3 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CC-CC QX:Z:AA AA | ||
1ACXX.3.4.1 83 chr3 2 255 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1ACXX.3.4.2 83 chr3 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:GG-GG QX:Z:AA AA | ||
1ACXX.3.3.1 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AA-AA QX:Z:AA AA | ||
1ACXX.3.3.2 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CC-CC QX:Z:AA AA | ||
1ACXX.3.4.1 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:TT-TT QX:Z:AA AA | ||
1ACXX.3.4.2 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:GG-GG QX:Z:AA AA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters