KAFKA-18683: Handle slicing of file records for updated start position

apoorvmittal10 · apoorvmittal10 · commit edec69762137 · 2025-01-31T10:28:57.000Z
diff --git a/clients/src/main/java/org/apache/kafka/common/record/FileRecords.java b/clients/src/main/java/org/apache/kafka/common/record/FileRecords.java
@@ -161,7 +161,9 @@ private int availableBytes(int position, int size) {
 
         if (position < 0)
             throw new IllegalArgumentException("Invalid position: " + position + " in read from " + this);
-        if (position > currentSizeInBytes - start)
+        // position should always be relative to the start of the file hence compare with file size
+        // to verify if the position is within the file.
+        if (position > currentSizeInBytes)
             throw new IllegalArgumentException("Slice from position " + position + " exceeds end position of " + this);
         if (size < 0)
             throw new IllegalArgumentException("Invalid size: " + size + " in read from " + this);
diff --git a/clients/src/test/java/org/apache/kafka/common/record/FileRecordsTest.java b/clients/src/test/java/org/apache/kafka/common/record/FileRecordsTest.java
@@ -44,6 +44,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.stream.IntStream;
 
 import static java.util.Arrays.asList;
 import static org.apache.kafka.common.utils.Utils.utf8;
@@ -433,6 +434,38 @@ public void testSearchForTimestamp() throws IOException {
         }
     }
 
+    /**
+     * Test slice when already sliced file records have start position greater than available bytes
+     * in the file records.
+     */
+    @Test
+    public void testSliceForAlreadySlicedFileRecords() throws IOException {
+        byte[][] values = new byte[][] {
+            "abcd".getBytes(),
+            "efgh".getBytes(),
+            "ijkl".getBytes(),
+            "mnop".getBytes(),
+            "qrst".getBytes()
+        };
+        try (FileRecords fileRecords = createFileRecords(values)) {
+            List<RecordBatch> items = batches(fileRecords.slice(0, fileRecords.sizeInBytes()));
+
+            // Slice from fourth message until the end.
+            int position = IntStream.range(0, 3).map(i -> items.get(i).sizeInBytes()).sum();
+            FileRecords sliced  = fileRecords.slice(position, fileRecords.sizeInBytes() - position);
+            assertEquals(fileRecords.sizeInBytes() - position, sliced.sizeInBytes());
+            assertEquals(items.subList(3, items.size()), batches(sliced), "Read starting from the fourth message");
+
+            // Further slice the already sliced file records, from fifth message until the end. Now the
+            // bytes available in the sliced file records are less than the start position. However, the
+            // position to slice is relative hence reset position to first batch in the sliced file records.
+            position = items.get(4).sizeInBytes();
+            FileRecords finalSliced = sliced.slice(position, sliced.sizeInBytes() - position);
+            assertEquals(sliced.sizeInBytes() - position, finalSliced.sizeInBytes());
+            assertEquals(items.subList(4, items.size()), batches(finalSliced), "Read starting from the fifth message");
+        }
+    }
+
     private void testSearchForTimestamp(RecordVersion version) throws IOException {
         File temp = tempFile();
         FileRecords fileRecords = FileRecords.open(temp, false, 1024 * 1024, true);