diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index de349ec120999..df3dc6c28e385 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -299,9 +299,7 @@ def get_num_common_prefix_blocks( While all scheduled requests must be in the RUNNING state, the inverse is not necessarily true. There may be RUNNING requests that are not - scheduled in the current step. As of 1/1/2025, the scheduler does not - allow this case, but it is possible in the future, as we allow more - flexible scheduling. + scheduled in the current step. This can result in an edge case where the number of common prefix blocks is 0, even though all scheduled requests share a common prefix. This