diff --git a/src/main/java/picard/sam/SamErrorMetric/SimpleErrorCalculator.java b/src/main/java/picard/sam/SamErrorMetric/SimpleErrorCalculator.java index 7d56e2637e..2f7b60ba5f 100644 --- a/src/main/java/picard/sam/SamErrorMetric/SimpleErrorCalculator.java +++ b/src/main/java/picard/sam/SamErrorMetric/SimpleErrorCalculator.java @@ -24,16 +24,11 @@ package picard.sam.SamErrorMetric; -import htsjdk.samtools.AlignmentBlock; -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; import htsjdk.samtools.reference.SamLocusAndReferenceIterator; import htsjdk.samtools.util.AbstractRecordAndOffset; import htsjdk.samtools.util.SamLocusIterator; import htsjdk.samtools.util.SequenceUtil; -import java.util.List; /** * A calculator that estimates the error rate of the bases it observes, assuming that the reference is truth. @@ -50,7 +45,7 @@ public void addBase(final SamLocusIterator.RecordAndOffset recordAndOffset, fina super.addBase(recordAndOffset, locusAndRef); if (recordAndOffset.getAlignmentType() == AbstractRecordAndOffset.AlignmentType.Match) { final byte readBase = recordAndOffset.getReadBase(); - if (!SequenceUtil.isNoCall(readBase) && (readBase != locusAndRef.getReferenceBase())) { + if (!SequenceUtil.isNoCall(readBase) && (!SequenceUtil.basesEqual(readBase, locusAndRef.getReferenceBase()))) { nMismatchingBases++; } } diff --git a/src/test/java/picard/sam/SamErrorMetric/BaseErrorCalculationTest.java b/src/test/java/picard/sam/SamErrorMetric/BaseErrorCalculationTest.java index 144093519f..79c41d05c4 100644 --- a/src/test/java/picard/sam/SamErrorMetric/BaseErrorCalculationTest.java +++ b/src/test/java/picard/sam/SamErrorMetric/BaseErrorCalculationTest.java @@ -1,7 +1,5 @@ package picard.sam.SamErrorMetric; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Iterables; import htsjdk.samtools.*; import htsjdk.samtools.reference.ReferenceSequenceFileWalker; import htsjdk.samtools.reference.SamLocusAndReferenceIterator; @@ -14,8 +12,6 @@ import java.io.File; import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; import java.util.List; /** @@ -33,7 +29,7 @@ public void testSimpleErrorCalculator() { final SAMRecord samRecord = new SAMRecord(samFileHeader); samRecord.setReadBases("CgTGtGGAcAAAgAAA".getBytes()); - final byte[] refBases = "CATGGGGAAAAAAAAA".getBytes(); + final byte[] refBases = "CATGGGGAAAAAAaaa".getBytes(); final int n = refBases.length; samRecord.setReadUnmappedFlag(false); diff --git a/src/test/java/picard/sam/SamErrorMetric/CollectSamErrorMetricsTest.java b/src/test/java/picard/sam/SamErrorMetric/CollectSamErrorMetricsTest.java index ce72a82df9..313191cd64 100644 --- a/src/test/java/picard/sam/SamErrorMetric/CollectSamErrorMetricsTest.java +++ b/src/test/java/picard/sam/SamErrorMetric/CollectSamErrorMetricsTest.java @@ -261,10 +261,10 @@ public Object[][] OneCovariateErrorMetricsDataProvider() { new BaseErrorMetric("62A40.2", 72L, 1L)}, // No additional mismatches are found on the read with 1 mismatch. {".error_by_mismatches_in_read", simpleSamWithBaseErrors1, priorQ, - new BaseErrorMetric("1", 35L, 0L)}, + new BaseErrorMetric("-1", 36L, 0L)}, // No additional mismatches are found on the read with 1 mismatch. (Just another way to check) {".error_by_mismatches_in_read", simpleSamWithBaseErrors1, priorQ, - new BaseErrorMetric("0", 37L, 1L)}, + new BaseErrorMetric("0", 36L, 1L)}, // There should be no errors in the CAG context because it matches reference {".error_by_one_base_padded_context", simpleSamWithBaseErrors1, priorQ, new BaseErrorMetric("CAG", 1L, 0L)}, @@ -400,10 +400,10 @@ public Object[][] oneCovariateIndelErrorMetricsDataProvider() { new BaseErrorMetric("62A40.2", 72L, 1L)}, // No additional mismatches are found on the read with 1 mismatch. {".error_by_mismatches_in_read", simpleSamWithBaseErrors1, priorQ, - new BaseErrorMetric("1", 35L, 0L)}, + new BaseErrorMetric("-1", 36L, 0L)}, // No additional mismatches are found on the read with 1 mismatch. (Just another way to check) {".error_by_mismatches_in_read", simpleSamWithBaseErrors1, priorQ, - new BaseErrorMetric("0", 37L, 1L)}, + new BaseErrorMetric("0", 36L, 1L)}, // There should be no errors in the CAG context because it matches reference {".error_by_one_base_padded_context", simpleSamWithBaseErrors1, priorQ, new BaseErrorMetric("CAG", 1L, 0L)}, @@ -477,7 +477,7 @@ public void testOneCovariateIndelErrorMetrics(final String errorSubscript, final .stream() .filter(m -> m.COVARIATE.equals(expectedMetric.COVARIATE)) .findAny() - .orElseThrow(() -> new AssertionError("didn't find metric with COVARIATE==" + expectedMetric.COVARIATE)); + .orElseThrow(() -> new AssertionError("didn't find metric with COVARIATE==" + expectedMetric.COVARIATE + ": " + (metrics))); Assert.assertEquals(metric, expectedMetric); } diff --git a/testdata/picard/reference/chrM.reference.fasta b/testdata/picard/reference/chrM.reference.fasta index 020e694807..040527216c 100644 --- a/testdata/picard/reference/chrM.reference.fasta +++ b/testdata/picard/reference/chrM.reference.fasta @@ -1,7 +1,7 @@ >chrM -GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT -TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG -GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT +gatcacaggtctatcaccctattaaccactcacgggagctctccatgcat +ttggtattttcgtctggggggtgtgcacgcgatagcattgcgagacgctg +gagccggagcaccctatgtcgcagtatctgtctttgattcctgcctcatt CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA @@ -187,57 +187,57 @@ ACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCC CATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTA GCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACT AACCAACACACTAACCATATACCAATGGTGGCGCGATGTAACACGAGAAA -GCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATAC -GGGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTT -CTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAACTAGGAGGGC -ACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTC -CTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCA -CCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTA -TTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCAGAGTAC -TTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTT -TGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCC -TCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACAT -CACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGT -TTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTA -GTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAA -AAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGC -CTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACA -TAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCC -CGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATT -ATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAA -CAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATC -CTAGCCCTAAGTCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAGC -CGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGACTCATTAAATT -ATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTA -GCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCAT -ATCCTCCCTACTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAG -CTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAATATTGTGCCT -ATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCT -ACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTACATAACCTAA -ACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCAC -TGACATGACTTTCCAAAAAGCACATAATTTGAATCAACACAACCACCCAC -AGCCTAATTATTAGCATCATCCCCCTACTATTTTTTAACCAAATCAACAA -CAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCC -CCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGC -CAACGCCACTTATCCAGCGAACCACTATCACGAAAAAAACTCTACCTCTC -TATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAG -AACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTG -GCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCAC -ATACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCAC -TAATTTACACTCACAACACCCTAGGCTCACTAAACATTCTACTACTCACT -CTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTTAATATGACT -AGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACT -TATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTA -CTTGCCGCAGTACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCAC -ACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTAC -TATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACA -GACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACATAGCCCTCGT -AGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCA -TTCTCATAATCGCCCACGGACTCACATCCTCATTACTATTCTGCCTAGCA -AACTCAAACTACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGG -ACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCC -TCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCT -GTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGG +gcacataccaaggccaccacacaccacctgtccaaaaaggccttcgatac +gggataatcctatttattacctcagaagtttttttcttcgcaggattttt +ctgagccttttaccactccagcctagcccctaccccccaactaggagggc +actggcccccaacaggcatcaccccgctaaatcccctagaagtcccactc +ctaaacacatccgtattactcgcatcaggagtatcaatcacctgagctca +ccatagtctaatagaaaacaaccgaaaccaaataattcaagcactgctta +ttacaattttactgggtctctattttaccctcctacaagcctcagagtac +ttcgagtctcccttcaccatttccgacggcatctacggctcaacattttt +tgtagccacaggcttccacggacttcacgtcattattggctcaactttcc +tcactatctgcttcatccgccaactaatatttcactttacatccaaacat +cactttggcttcgaagccgccgcctgatactggcattttgtagatgtggt +ttgactatttctgtatgtctccatctattgatgagggtcttactctttta +gtataaatagtaccgttaacttccaattaactagttttgacaacattcaa +aaaagagtaataaacttcgccttaattttaataatcaacaccctcctagc +cttactactaataattattacattttgactaccacaactcaacggctaca +tagaaaaatccaccccttacgagtgcggcttcgaccctatatcccccgcc +cgcgtccctttctccataaaattcttcttagtagctattaccttcttatt +atttgatctagaaattgccctccttttacccctaccatgagccctacaaa +caactaacctgccactaatagttatgtcatccctcttattaatcatcatc +ctagccctaagtctggcctatgagtgactacaaaaaggattagactgagc +cgaattggtatatagtttaaacaaaacgaatgatttcgactcattaaatt +atgataatcatatttaccaaatgcccctcatttacataaatattatacta +gcatttaccatctcacttctaggaatactagtatatcgctcacacctcat +atcctccctactatgcctagaaggaataatactatcgctgttcattatag +ctactctcataaccctcaacacccactccctcttagccaatattgtgcct +attgccatactagtctttgccgcctgcgaagcagcggtgggcctagccct +actagtctcaatctccaacacatatggcctagactacgtacataacctaa +acctactccaatgctaaaactaatcgtcccaacaattatattactaccac +tgacatgactttccaaaaagcacataatttgaatcaacacaaccacccac +agcctaattattagcatcatccccctactattttttaaccaaatcaacaa +caacctatttagctgttccccaaccttttcctccgaccccctaacaaccc +ccctcctaatactaactacctgactcctacccctcacaatcatggcaagc +caacgccacttatccagcgaaccactatcacgaaaaaaactctacctctc +tatactaatctccctacaaatctccttaattataacattcacagccacag +aactaatcatattttatatcttcttcgaaaccacacttatccccaccttg +gctatcatcacccgatgaggcaaccagccagaacgcctgaacgcaggcac +atacttcctattctacaccctagtaggctcccttcccctactcatcgcac +taatttacactcacaacaccctaggctcactaaacattctactactcact +ctcactgcccaagaactatcaaactcctgagccaacaacttaatatgact +agcttacacaatagcttttatagtaaagatacctctttacggactccact +tatgactccctaaagcccatgtcgaagcccccatcgctgggtcaatagta +cttgccgcagtactcttaaaactaggcggctatggtataatacgcctcac +actcattctcaaccccctgacaaaacacatagcctaccccttccttgtac +tatccctatgaggcataattataacaagctccatctgcctacgacaaaca +gacctaaaatcgctcattgcatactcttcaatcagccacatagccctcgt +agtaacagccattctcatccaaaccccctgaagcttcaccggcgcagtca +ttctcataatcgcccacggactcacatcctcattactattctgcctagca +aactcaaactacgaacgcactcacagtcgcatcataatcctctctcaagg +acttcaaactctactcccactaatagctttttgatgacttctagcaagcc +tcgctaacctcgccttaccccccactattaacctactgggagaactctct +gtgctagtaaccacgttctcctgatcaaatatcactctcctacttacagg ACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAA CACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTC ACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCT