From 7d6d220e2b435de4d881f7507891f2c2acb6d8af Mon Sep 17 00:00:00 2001 From: Mark Fleharty Date: Wed, 5 Feb 2025 10:03:54 -0500 Subject: [PATCH 1/5] Create a new tool, FilterFlowCellEdgeReadsTest for filtering out reads with specific flow cell locations --- .../picard/sam/FilterFlowCellEdgeReads.java | 106 ++++++++++++ .../sam/FilterFlowCellEdgeReadsTest.java | 163 ++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 src/main/java/picard/sam/FilterFlowCellEdgeReads.java create mode 100644 src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java diff --git a/src/main/java/picard/sam/FilterFlowCellEdgeReads.java b/src/main/java/picard/sam/FilterFlowCellEdgeReads.java new file mode 100644 index 0000000000..718f8e86a0 --- /dev/null +++ b/src/main/java/picard/sam/FilterFlowCellEdgeReads.java @@ -0,0 +1,106 @@ +package picard.sam; + +import htsjdk.samtools.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import picard.cmdline.CommandLineProgram; +import picard.cmdline.StandardOptionDefinitions; +import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; +import htsjdk.samtools.util.Log; // Added this import +import java.io.File; +import java.io.IOException; + +@CommandLineProgramProperties( + summary = "Filters out reads with specific flowcell coordinates", + oneLineSummary = "Removes reads from specific flowcell positions from BAM/CRAM files", + programGroup = ReadDataManipulationProgramGroup.class +) +public class FilterFlowCellEdgeReads extends CommandLineProgram { + // Initialize logger + private static final Log logger = Log.getInstance(FilterFlowCellEdgeReads.class); + + @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, + doc = "Input BAM/CRAM file") + public String INPUT; + + @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, + doc = "Output BAM/CRAM file") + public String OUTPUT; + + @Argument(shortName = "X", + doc = "X coordinate to filter (default: 1000)", + optional = true) + public int X_COORD = 1000; + + @Argument(shortName = "Y", + doc = "Y coordinate to filter (default: 1000)", + optional = true) + public int Y_COORD = 1000; + + private boolean hasFlowcellCoordinates(String readName) { + // Parse Illumina read name format + // Example format: @HWUSI-EAS100R:6:73:941:1973#0/1 + // or: @EAS139:136:FC706VJ:2:2104:15343:197393 + try { + String[] parts = readName.split(":"); + if (parts.length >= 6) { // Ensure we have enough parts + // The last two numbers are typically X and Y coordinates + int x = Integer.parseInt(parts[parts.length-2]); + int y = Integer.parseInt(parts[parts.length-1].split("[#/]")[0]); // Remove any trailing /1 or #0 + + return x == X_COORD && y == Y_COORD; + } + } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) { + // If we can't parse the coordinates, assume it doesn't match + return false; + } + return false; + } + + @Override + protected int doWork() { + final SamReader reader = SamReaderFactory.makeDefault() + .referenceSequence(REFERENCE_SEQUENCE) + .open(new File(INPUT)); + + final SAMFileHeader header = reader.getFileHeader(); + final SAMFileWriter writer = new SAMFileWriterFactory() + .makeWriter(header, true, new File(OUTPUT), REFERENCE_SEQUENCE); + + // Process reads + int totalReads = 0; + int filteredReads = 0; + + try { + for (final SAMRecord read : reader) { + totalReads++; + + // Check if read has the specified flowcell coordinates + if (hasFlowcellCoordinates(read.getReadName())) { + filteredReads++; + continue; // Skip this read + } + + // Write read to output if it doesn't match filter criteria + writer.addAlignment(read); + } + } finally { + try { + reader.close(); + } catch (IOException e) { + logger.error("Error closing input file", e); + } + writer.close(); + } + + logger.info("Processed " + totalReads + " total reads"); + logger.info("Filtered " + filteredReads + " reads at flowcell position " + X_COORD + ":" + Y_COORD); + logger.info("Wrote " + (totalReads - filteredReads) + " reads to output"); + + return 0; + } + + public static void main(String[] args) { + new FilterFlowCellEdgeReads().instanceMain(args); + } +} diff --git a/src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java b/src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java new file mode 100644 index 0000000000..e6ffe51d10 --- /dev/null +++ b/src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java @@ -0,0 +1,163 @@ +package picard.sam; + +import htsjdk.samtools.*; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; + +/** + * Unit tests for FilterFlowCellEdgeReads using TestNG. + */ +public class FilterFlowCellEdgeReadsTest { + + // Temporary files for input and output. + private File inputSam; + private File outputSam; + + /** + * Helper method to create a temporary SAM file with one record per provided read name. + * Each record is given minimal required fields. + * + * @param readNames an array of read names to include. + * @return the temporary SAM file. + * @throws IOException if an I/O error occurs. + */ + private File createSamFile(String[] readNames) throws IOException { + File tmpSam = File.createTempFile("FilterFlowCellEdgeReadsTest_input", ".sam"); + tmpSam.deleteOnExit(); + + // Create a minimal SAM file header. + SAMFileHeader header = new SAMFileHeader(); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + // Add one sequence record so that records have a reference. + header.addSequence(new SAMSequenceRecord("chr1", 1000000)); + + // Use SAMFileWriterFactory to write a SAM file. + try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(header, false, tmpSam, null)) { + // Create one record for each read name. + for (String readName : readNames) { + SAMRecord rec = new SAMRecord(header); + rec.setReadName(readName); + rec.setReferenceName("chr1"); + rec.setAlignmentStart(1); + rec.setCigarString("50M"); + // Set dummy bases and qualities. + rec.setReadString("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + rec.setBaseQualityString("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + writer.addAlignment(rec); + } + } + return tmpSam; + } + + /** + * Helper method to count the number of SAM records in a given SAM file. + * + * @param samFile the SAM file. + * @return the number of records. + * @throws IOException if an I/O error occurs. + */ + private int countRecords(File samFile) throws IOException { + int count = 0; + try (SamReader reader = SamReaderFactory.makeDefault().open(samFile)) { + for (SAMRecord rec : reader) { + count++; + } + } + return count; + } + + @AfterMethod + public void tearDown() { + if (inputSam != null && inputSam.exists()) { + inputSam.delete(); + } + if (outputSam != null && outputSam.exists()) { + outputSam.delete(); + } + } + + /** + * Test with a mixed input: + * – One read with a name that matches the default coordinates ("1000:1000") and should be filtered out. + * – One read with non-matching coordinates ("2000:2000") that should be retained. + */ + @Test + public void testMixedReads() throws IOException { + String[] readNames = new String[]{ + "EAS139:136:FC706VJ:2:1000:1000", // should be filtered out (matches default X_COORD and Y_COORD) + "EAS139:136:FC706VJ:2:2000:2000" // should be retained + }; + inputSam = createSamFile(readNames); + outputSam = File.createTempFile("FilterFlowCellEdgeReadsTest_output", ".sam"); + outputSam.deleteOnExit(); + + FilterFlowCellEdgeReads tool = new FilterFlowCellEdgeReads(); + tool.INPUT = inputSam.getAbsolutePath(); + tool.OUTPUT = outputSam.getAbsolutePath(); + // Use default X_COORD=1000, Y_COORD=1000 + + int ret = tool.doWork(); + Assert.assertEquals(ret, 0, "doWork() should return 0"); + + // Only the record that does not match the filter should be written. + int recordCount = countRecords(outputSam); + Assert.assertEquals(recordCount, 1, "Only one record should be written"); + } + + /** + * Test with a read whose name does not contain colon-delimited coordinates. + * The method hasFlowcellCoordinates should catch the exception and return false, + * so the record should be retained. + */ + @Test + public void testNonConformingReadName() throws IOException { + String[] readNames = new String[]{ + "nonconforming_read" // no colon-separated parts → not filtered + }; + inputSam = createSamFile(readNames); + outputSam = File.createTempFile("FilterFlowCellEdgeReadsTest_output", ".sam"); + outputSam.deleteOnExit(); + + FilterFlowCellEdgeReads tool = new FilterFlowCellEdgeReads(); + tool.INPUT = inputSam.getAbsolutePath(); + tool.OUTPUT = outputSam.getAbsolutePath(); + // Defaults are used. + + int ret = tool.doWork(); + Assert.assertEquals(ret, 0); + + // The read should be retained. + int recordCount = countRecords(outputSam); + Assert.assertEquals(recordCount, 1, "The nonconforming read should be kept"); + } + + /** + * Test with an input that has only a read with coordinates matching the filter. + * In this case, the tool should filter out the only record and write an empty output. + */ + @Test + public void testAllReadsFiltered() throws IOException { + String[] readNames = new String[]{ + "EAS139:136:FC706VJ:2:1000:1000" // matches filter → filtered out + }; + inputSam = createSamFile(readNames); + outputSam = File.createTempFile("FilterFlowCellEdgeReadsTest_output", ".sam"); + outputSam.deleteOnExit(); + + FilterFlowCellEdgeReads tool = new FilterFlowCellEdgeReads(); + tool.INPUT = inputSam.getAbsolutePath(); + tool.OUTPUT = outputSam.getAbsolutePath(); + // Defaults: X_COORD=1000, Y_COORD=1000 + + int ret = tool.doWork(); + Assert.assertEquals(ret, 0); + + // Expect zero records in the output. + int recordCount = countRecords(outputSam); + Assert.assertEquals(recordCount, 0, "No records should be written"); + } +} From 4a3f36eeaaa434016942f1a83e2605fc8c578e89 Mon Sep 17 00:00:00 2001 From: Mark Fleharty Date: Wed, 5 Feb 2025 10:30:13 -0500 Subject: [PATCH 2/5] Renaming tool to FilterReadsByFlowCellLocation --- ...lEdgeReads.java => FilterReadsByFlowCellLocation.java} | 6 +++--- ...dsTest.java => FilterReadsByFlowCellLocationTest.java} | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) rename src/main/java/picard/sam/{FilterFlowCellEdgeReads.java => FilterReadsByFlowCellLocation.java} (94%) rename src/test/java/picard/sam/{FilterFlowCellEdgeReadsTest.java => FilterReadsByFlowCellLocationTest.java} (95%) diff --git a/src/main/java/picard/sam/FilterFlowCellEdgeReads.java b/src/main/java/picard/sam/FilterReadsByFlowCellLocation.java similarity index 94% rename from src/main/java/picard/sam/FilterFlowCellEdgeReads.java rename to src/main/java/picard/sam/FilterReadsByFlowCellLocation.java index 718f8e86a0..7873ef020f 100644 --- a/src/main/java/picard/sam/FilterFlowCellEdgeReads.java +++ b/src/main/java/picard/sam/FilterReadsByFlowCellLocation.java @@ -15,9 +15,9 @@ oneLineSummary = "Removes reads from specific flowcell positions from BAM/CRAM files", programGroup = ReadDataManipulationProgramGroup.class ) -public class FilterFlowCellEdgeReads extends CommandLineProgram { +public class FilterReadsByFlowCellLocation extends CommandLineProgram { // Initialize logger - private static final Log logger = Log.getInstance(FilterFlowCellEdgeReads.class); + private static final Log logger = Log.getInstance(FilterReadsByFlowCellLocation.class); @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input BAM/CRAM file") @@ -101,6 +101,6 @@ protected int doWork() { } public static void main(String[] args) { - new FilterFlowCellEdgeReads().instanceMain(args); + new FilterReadsByFlowCellLocation().instanceMain(args); } } diff --git a/src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java b/src/test/java/picard/sam/FilterReadsByFlowCellLocationTest.java similarity index 95% rename from src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java rename to src/test/java/picard/sam/FilterReadsByFlowCellLocationTest.java index e6ffe51d10..4055d290e9 100644 --- a/src/test/java/picard/sam/FilterFlowCellEdgeReadsTest.java +++ b/src/test/java/picard/sam/FilterReadsByFlowCellLocationTest.java @@ -11,7 +11,7 @@ /** * Unit tests for FilterFlowCellEdgeReads using TestNG. */ -public class FilterFlowCellEdgeReadsTest { +public class FilterReadsByFlowCellLocationTest { // Temporary files for input and output. private File inputSam; @@ -95,7 +95,7 @@ public void testMixedReads() throws IOException { outputSam = File.createTempFile("FilterFlowCellEdgeReadsTest_output", ".sam"); outputSam.deleteOnExit(); - FilterFlowCellEdgeReads tool = new FilterFlowCellEdgeReads(); + FilterReadsByFlowCellLocation tool = new FilterReadsByFlowCellLocation(); tool.INPUT = inputSam.getAbsolutePath(); tool.OUTPUT = outputSam.getAbsolutePath(); // Use default X_COORD=1000, Y_COORD=1000 @@ -122,7 +122,7 @@ public void testNonConformingReadName() throws IOException { outputSam = File.createTempFile("FilterFlowCellEdgeReadsTest_output", ".sam"); outputSam.deleteOnExit(); - FilterFlowCellEdgeReads tool = new FilterFlowCellEdgeReads(); + FilterReadsByFlowCellLocation tool = new FilterReadsByFlowCellLocation(); tool.INPUT = inputSam.getAbsolutePath(); tool.OUTPUT = outputSam.getAbsolutePath(); // Defaults are used. @@ -148,7 +148,7 @@ public void testAllReadsFiltered() throws IOException { outputSam = File.createTempFile("FilterFlowCellEdgeReadsTest_output", ".sam"); outputSam.deleteOnExit(); - FilterFlowCellEdgeReads tool = new FilterFlowCellEdgeReads(); + FilterReadsByFlowCellLocation tool = new FilterReadsByFlowCellLocation(); tool.INPUT = inputSam.getAbsolutePath(); tool.OUTPUT = outputSam.getAbsolutePath(); // Defaults: X_COORD=1000, Y_COORD=1000 From a01d3ed7007558e22ff14a06fb3beae2f3d83acc Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Wed, 5 Feb 2025 09:50:58 -0800 Subject: [PATCH 3/5] Fix failures to allocate arrays with a very large Java heap size (#1986) The maximum length of a Java array is not exactly Integer.MAX_VALUE, but slightly less due to the space taken up by the object header. The exact maximum differs depending on the platform and Java version. This was already accounted for in one instance, but not others. This commit fixes the other instances and changes the maximum size in the existing instance to Integer.MAX_VALUE - 32 instead of Integer.MAX_VALUE - 5 to decrease the likelihood of allocation failures on different Java versions and platforms. --- .../sam/markduplicates/EstimateLibraryComplexity.java | 4 ++-- src/main/java/picard/sam/markduplicates/MarkDuplicates.java | 6 +++--- src/main/java/picard/util/SequenceDictionaryUtils.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java b/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java index 6f32f33d8c..69fee46db3 100644 --- a/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java +++ b/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java @@ -425,7 +425,7 @@ public EstimateLibraryComplexity() { } else { sizeInBytes = PairedReadSequence.getSizeInBytes(); } - MAX_RECORDS_IN_RAM = (int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2; + MAX_RECORDS_IN_RAM = Math.min((int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2, Integer.MAX_VALUE - 32); } /** @@ -673,4 +673,4 @@ boolean passesQualityCheck(final byte[] bases, final byte[] quals, final int see for (int i = 0; i < readLength; i++) total += quals[i]; return total / readLength >= minQuality; } -} \ No newline at end of file +} diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java index 669fb8cbca..7d1b84c051 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java @@ -479,8 +479,8 @@ private void buildSortedReadEndLists(final boolean useBarcodes) { } else { sizeInBytes = ReadEndsForMarkDuplicates.getSizeOf(); } - MAX_RECORDS_IN_RAM = (int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2; - final int maxInMemory = (int) ((Runtime.getRuntime().maxMemory() * SORTING_COLLECTION_SIZE_RATIO) / sizeInBytes); + MAX_RECORDS_IN_RAM = Math.min((int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2, Integer.MAX_VALUE - 32); + final int maxInMemory = Math.min((int) ((Runtime.getRuntime().maxMemory() * SORTING_COLLECTION_SIZE_RATIO) / sizeInBytes), Integer.MAX_VALUE - 32); log.info("Will retain up to " + maxInMemory + " data points before spilling to disk."); final ReadEndsForMarkDuplicatesCodec fragCodec, pairCodec, diskCodec; @@ -719,7 +719,7 @@ protected void sortIndicesForDuplicates(final boolean indexOpticalDuplicates){ entryOverhead = SortingLongCollection.SIZEOF; } // Keep this number from getting too large even if there is a huge heap. - int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 5)); + int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 32)); // If we're also tracking optical duplicates, reduce maxInMemory, since we'll need two sorting collections if (indexOpticalDuplicates) { maxInMemory /= ((entryOverhead + SortingLongCollection.SIZEOF) / entryOverhead); diff --git a/src/main/java/picard/util/SequenceDictionaryUtils.java b/src/main/java/picard/util/SequenceDictionaryUtils.java index 178935fb52..972f90a674 100644 --- a/src/main/java/picard/util/SequenceDictionaryUtils.java +++ b/src/main/java/picard/util/SequenceDictionaryUtils.java @@ -190,7 +190,7 @@ public static SortingCollection makeSortingCollection() { String.class, new StringCodec(), String::compareTo, - (int) Math.min(maxNamesInRam, Integer.MAX_VALUE), + (int) Math.min(maxNamesInRam, Integer.MAX_VALUE - 32), tmpDir.toPath() ); } From c3f6a50a50bea429fd63eea03038d0e7a4d8f366 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Wed, 5 Feb 2025 13:40:57 -0500 Subject: [PATCH 4/5] Updating GH actions upload-artifact v3 -> v4 (#1993) --- .github/workflows/cloud_tests.yml | 2 +- .github/workflows/tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cloud_tests.yml b/.github/workflows/cloud_tests.yml index e8049f415f..7526a531ba 100644 --- a/.github/workflows/cloud_tests.yml +++ b/.github/workflows/cloud_tests.yml @@ -87,7 +87,7 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: cloud-test-results-${{ matrix.Java }}-barclay-${{ matrix.run_barclay_tests}} path: build/reports/tests \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 30fddcd431..af0fa3bae3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -50,7 +50,7 @@ jobs: java -jar build/libs/picard.jar MarkDuplicates -I testdata/picard/sam/aligned_queryname_sorted.bam -O out.bam --METRICS_FILE out.metrics - name: Upload test results if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-results-${{ matrix.Java }}-barclay-${{ matrix.run_barclay_tests}} path: build/reports/tests From 28dc6154051700940ad83a576ea9db1272b06ca9 Mon Sep 17 00:00:00 2001 From: Mark Fleharty Date: Wed, 5 Feb 2025 15:52:27 -0500 Subject: [PATCH 5/5] Making a few modifications for a PR --- .../sam/FilterReadsByFlowCellLocation.java | 92 ++++++++++++++----- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/src/main/java/picard/sam/FilterReadsByFlowCellLocation.java b/src/main/java/picard/sam/FilterReadsByFlowCellLocation.java index 7873ef020f..09b0d58293 100644 --- a/src/main/java/picard/sam/FilterReadsByFlowCellLocation.java +++ b/src/main/java/picard/sam/FilterReadsByFlowCellLocation.java @@ -6,7 +6,9 @@ import picard.cmdline.CommandLineProgram; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; -import htsjdk.samtools.util.Log; // Added this import +import htsjdk.samtools.util.Log; +import picard.sam.util.ReadNameParser; +import picard.sam.util.PhysicalLocation; import java.io.File; import java.io.IOException; @@ -16,7 +18,6 @@ programGroup = ReadDataManipulationProgramGroup.class ) public class FilterReadsByFlowCellLocation extends CommandLineProgram { - // Initialize logger private static final Log logger = Log.getInstance(FilterReadsByFlowCellLocation.class); @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, @@ -37,24 +38,76 @@ public class FilterReadsByFlowCellLocation extends CommandLineProgram { optional = true) public int Y_COORD = 1000; + private final ReadNameParser readNameParser = new ReadNameParser(ReadNameParser.DEFAULT_READ_NAME_REGEX); + private boolean hasFlowcellCoordinates(String readName) { - // Parse Illumina read name format - // Example format: @HWUSI-EAS100R:6:73:941:1973#0/1 - // or: @EAS139:136:FC706VJ:2:2104:15343:197393 - try { - String[] parts = readName.split(":"); - if (parts.length >= 6) { // Ensure we have enough parts - // The last two numbers are typically X and Y coordinates - int x = Integer.parseInt(parts[parts.length-2]); - int y = Integer.parseInt(parts[parts.length-1].split("[#/]")[0]); // Remove any trailing /1 or #0 + class ReadLocation implements PhysicalLocation { + private short libraryId; + private int x = -1, y = -1; // Default to invalid values + private short tile; + + @Override + public void setLibraryId(short libraryId) { + this.libraryId = libraryId; + } + + @Override + public short getLibraryId() { + return libraryId; + } + + @Override + public void setX(int x) { + this.x = x; + } + + @Override + public int getX() { + return x; + } + + @Override + public void setY(int y) { + this.y = y; + } + + @Override + public int getY() { + return y; + } - return x == X_COORD && y == Y_COORD; + @Override + public void setReadGroup(short readGroup) {} + + @Override + public short getReadGroup() { + return 0; + } + + @Override + public void setTile(short tile) { + this.tile = tile; + } + + @Override + public short getTile() { + return tile; } - } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) { - // If we can't parse the coordinates, assume it doesn't match - return false; } - return false; + + ReadLocation location = new ReadLocation(); + try { + readNameParser.addLocationInformation(readName, location); + } catch (Exception e) { + logger.warn("Failed to parse read name: " + readName, e); + return false; // Keep the read if parsing fails + } + + if (location.getX() == -1 || location.getY() == -1) { + return false; // Keep the read if coordinates are invalid + } + + return location.getX() == X_COORD && location.getY() == Y_COORD; } @Override @@ -67,21 +120,16 @@ protected int doWork() { final SAMFileWriter writer = new SAMFileWriterFactory() .makeWriter(header, true, new File(OUTPUT), REFERENCE_SEQUENCE); - // Process reads int totalReads = 0; int filteredReads = 0; try { for (final SAMRecord read : reader) { totalReads++; - - // Check if read has the specified flowcell coordinates if (hasFlowcellCoordinates(read.getReadName())) { filteredReads++; - continue; // Skip this read + continue; } - - // Write read to output if it doesn't match filter criteria writer.addAlignment(read); } } finally {