pypa · jsirois · Nov 27, 2024 · Nov 27, 2024 · Jan 18, 2025 · Jan 30, 2025
diff --git a/news/13086.bugfix.rst b/news/13086.bugfix.rst
@@ -0,0 +1 @@
+Fix ``pip cache remove`` pattern matching.
diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py
@@ -1,4 +1,6 @@
+import fnmatch
 import os
+import re
 import textwrap
 from optparse import Values
 from typing import Any, List
@@ -207,22 +209,55 @@ def _find_http_files(self, options: Values) -> List[str]:
     def _find_wheels(self, options: Values, pattern: str) -> List[str]:
         wheel_dir = self._cache_dir(options, "wheels")
 
-        # The wheel filename format, as specified in PEP 427, is:
+        # The wheel filename format, as originally specified in PEP 427, is:
         #     {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
         #
         # Additionally, non-alphanumeric values in the distribution are
         # normalized to underscores (_), meaning hyphens can never occur
         # before `-{version}`.
         #
         # Given that information:
-        # - If the pattern we're given contains a hyphen (-), the user is
-        #   providing at least the version. Thus, we can just append `*.whl`
-        #   to match the rest of it.
-        # - If the pattern we're given doesn't contain a hyphen (-), the
-        #   user is only providing the name. Thus, we append `-*.whl` to
-        #   match the hyphen before the version, followed by anything else.
+        # - If the pattern we're given is not a glob:
+        #   + We attempt project name normalization such that pyyaml matches PyYAML,
+        #     etc. as outlined here:
+        #     https://packaging.python.org/specifications/name-normalization.
+        #     The one difference is we normalize non-alphanumeric to `_` instead of `-`
+        #     since that is how the wheel filename components are normalized
+        #     (facepalm).
+        #   + If the pattern we're given contains a hyphen (-), the user is providing
+        #     at least the version. Thus, we can just append `*.whl` to match the rest
+        #     of it if it doesn't already glob to the end of the wheel file name.
-        #   + If the pattern we're given contains a hyphen (-), the user is providing
-        #     at least the version. Thus, we can just append `*.whl` to match the rest
-        #     of it if it doesn't already glob to the end of the wheel file name.
+        #   + If the pattern contains a hyphen (-), the user is providing
+        #     the version. Thus, we can just append `*.whl` to match the rest
+        #     of the wheel filename.
-        #   + If the pattern we're given contains a hyphen (-), the user is providing
-        #     at least the version. Thus, we can just append `*.whl` to match the rest
-        #     of it if it doesn't already glob to the end of the wheel file name.
+        #   + If the pattern contains a hyphen (-), the user is providing
+        #     the version. Thus, we can just append `*.whl` to match the rest
+        #     of the wheel filename.
+        #   + If the pattern we're given is not a glob and doesn't contain a
+        #     hyphen (-), the user is only providing the name. Thus, we append `-*.whl`
+        #     to match the hyphen before the version, followed by anything else.
-        #   + If the pattern we're given is not a glob and doesn't contain a
-        #     hyphen (-), the user is only providing the name. Thus, we append `-*.whl`
-        #     to match the hyphen before the version, followed by anything else.
+        #   + If the pattern does NOT contain a
+        #     hyphen (-), the user is only providing the name. Thus, we append `-*.whl`
+        #     to match the hyphen before the version, followed by anything else.
-        #   + If the pattern we're given is not a glob and doesn't contain a
-        #     hyphen (-), the user is only providing the name. Thus, we append `-*.whl`
-        #     to match the hyphen before the version, followed by anything else.
+        #   + If the pattern does NOT contain a
+        #     hyphen (-), the user is only providing the name. Thus, we append `-*.whl`
+        #     to match the hyphen before the version, followed by anything else.
+        # - If the pattern is a glob, we ensure `*.whl` is appended if needed but
+        #   cowardly do not attempt glob parsing / project name normalization. The user
+        #   is on their own at that point.
         #
         # PEP 427: https://www.python.org/dev/peps/pep-0427/
-        #
-        # PEP 427: https://www.python.org/dev/peps/pep-0427/
-        #
-        # PEP 427: https://www.python.org/dev/peps/pep-0427/
-        pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl")
-
-        return filesystem.find_files(wheel_dir, pattern)
+        # And: https://packaging.python.org/specifications/binary-distribution-format/
+        uses_glob_patterns = any(
+            glob_pattern_char in pattern for glob_pattern_char in ("[", "*", "?")
+        )
-        uses_glob_patterns = any(
-            glob_pattern_char in pattern for glob_pattern_char in ("[", "*", "?")
-        )
+	    uses_glob_patterns = any(char in "[]*?" for char in pattern)
-        uses_glob_patterns = any(
-            glob_pattern_char in pattern for glob_pattern_char in ("[", "*", "?")
-        )
+	    uses_glob_patterns = any(char in "[]*?" for char in pattern)
+        if not uses_glob_patterns:
+            project_name, sep, rest = pattern.partition("-")
+            # N.B.: This only normalizes non-alphanumeric characters in the name, which
+            # is 1/2 the job. Stripping case sensitivity is the other 1/2 and is
+            # handled below in the conversion from a glob to a regex executed with the
+            # IGNORECASE flag active.
-            # N.B.: This only normalizes non-alphanumeric characters in the name, which
-            # is 1/2 the job. Stripping case sensitivity is the other 1/2 and is
-            # handled below in the conversion from a glob to a regex executed with the
-            # IGNORECASE flag active.
+            # NOTE: This only normalizes non-alphanumeric characters in the name.
+            # Stripping case sensitivity is handled below in the conversion from a glob
+            # to a regex executed with the IGNORECASE flag.
-            # N.B.: This only normalizes non-alphanumeric characters in the name, which
-            # is 1/2 the job. Stripping case sensitivity is the other 1/2 and is
-            # handled below in the conversion from a glob to a regex executed with the
-            # IGNORECASE flag active.
+            # NOTE: This only normalizes non-alphanumeric characters in the name.
+            # Stripping case sensitivity is handled below in the conversion from a glob
+            # to a regex executed with the IGNORECASE flag.
+            partially_normalized_project_name = re.sub(r"[^\w.]+", "_", project_name)
-            partially_normalized_project_name = re.sub(r"[^\w.]+", "_", project_name)
+            partially_normalized_project_name = re.sub(r"[-_.]+", "_", project_name)
-            partially_normalized_project_name = re.sub(r"[^\w.]+", "_", project_name)
+            partially_normalized_project_name = re.sub(r"[-_.]+", "_", project_name)
+            if not sep:
+                sep = "-"
+            pattern = f"{partially_normalized_project_name}{sep}{rest}"
+
+        if not pattern.endswith((".whl", "*")):
+            pattern = f"{pattern}*.whl"
+
+        regex = fnmatch.translate(pattern)
+
+        return [
+            os.path.join(root, filename)
+            for root, _, files in os.walk(wheel_dir)
+            for filename in files
+            if re.match(regex, filename, flags=re.IGNORECASE)
+        ]
diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py
@@ -68,23 +68,26 @@ def populate_http_cache(http_cache_dir: str) -> List[Tuple[str, str]]:
     return files
 
 
-@pytest.fixture
-def populate_wheel_cache(wheel_cache_dir: str) -> List[Tuple[str, str]]:
+def _populate_wheel_cache(
+    wheel_cache_dir: str, *name_and_versions: str
+) -> List[Tuple[str, str]]:
     destination = os.path.join(wheel_cache_dir, "arbitrary", "pathname")
     os.makedirs(destination)
 
-    files = [
-        ("yyy-1.2.3", os.path.join(destination, "yyy-1.2.3-py3-none-any.whl")),
-        ("zzz-4.5.6", os.path.join(destination, "zzz-4.5.6-py3-none-any.whl")),
-        ("zzz-4.5.7", os.path.join(destination, "zzz-4.5.7-py3-none-any.whl")),
-        ("zzz-7.8.9", os.path.join(destination, "zzz-7.8.9-py3-none-any.whl")),
-    ]
-
-    for _name, filename in files:
+    wheel_info = []
+    for name_and_version in name_and_versions:
+        filename = os.path.join(destination, f"{name_and_version}-py3-none-any.whl")
         with open(filename, "w"):
             pass
+        wheel_info.append((name_and_version, filename))
+    return wheel_info
 
-    return files
+
+@pytest.fixture
+def populate_wheel_cache(wheel_cache_dir: str) -> List[Tuple[str, str]]:
+    return _populate_wheel_cache(
+        wheel_cache_dir, "yyy-1.2.3", "zzz-4.5.6", "zzz-4.5.7", "zzz-7.8.9"
+    )
 
 
 @pytest.fixture
@@ -332,34 +335,97 @@ def test_cache_remove_too_many_args(script: PipTestEnvironment) -> None:
     script.pip("cache", "remove", "aaa", "bbb", expect_error=True)
 
 
+@pytest.mark.parametrize(
+    "pattern",
+    [
+        "zzz",
+        "ZZZ",
+        "zZz-*-py3-none-any.whl",
+        "zZz-*.whl",
+        "zzz-*",
+        "zzz*",
+        "zz[a-zA-Z]?",
+        "[!y]",
+    ],
+)
 @pytest.mark.usefixtures("populate_wheel_cache")
 def test_cache_remove_name_match(
-    script: PipTestEnvironment, remove_matches_wheel: RemoveMatches
+    script: PipTestEnvironment, pattern: str, remove_matches_wheel: RemoveMatches
 ) -> None:
-    """Running `pip cache remove zzz` should remove zzz-4.5.6 and zzz-7.8.9,
-    but nothing else."""
-    result = script.pip("cache", "remove", "zzz", "--verbose")
+    """Running `pip cache remove <zzz matching pattern>` should remove zzz-4.5.6,
+    zzz-4.5.7 and zzz-7.8.9, but nothing else."""
+    result = script.pip("cache", "remove", pattern, "--verbose")
 
     assert not remove_matches_wheel("yyy-1.2.3", result)
     assert remove_matches_wheel("zzz-4.5.6", result)
     assert remove_matches_wheel("zzz-4.5.7", result)
     assert remove_matches_wheel("zzz-7.8.9", result)
 
 
+@pytest.mark.parametrize(
+    "pattern",
+    [
+        "zzz-4.5.6",
+        "ZZZ-4.5.6",
+        "zZz-4.5.6-py3-none-any.whl",
+        "zZz-4.5.6-*.whl",
+        "zZz-4.5.[0-6]",
+    ],
+)
 @pytest.mark.usefixtures("populate_wheel_cache")
 def test_cache_remove_name_and_version_match(
-    script: PipTestEnvironment, remove_matches_wheel: RemoveMatches
+    script: PipTestEnvironment, pattern: str, remove_matches_wheel: RemoveMatches
 ) -> None:
-    """Running `pip cache remove zzz-4.5.6` should remove zzz-4.5.6, but
-    nothing else."""
-    result = script.pip("cache", "remove", "zzz-4.5.6", "--verbose")
+    """Running `pip cache remove <zzz-4.5.6 matching pattern>` should remove zzz-4.5.6,
+    but nothing else."""
+    result = script.pip("cache", "remove", pattern, "--verbose")
 
     assert not remove_matches_wheel("yyy-1.2.3", result)
     assert remove_matches_wheel("zzz-4.5.6", result)
     assert not remove_matches_wheel("zzz-4.5.7", result)
     assert not remove_matches_wheel("zzz-7.8.9", result)
 
 
+@pytest.mark.parametrize(
+    "pattern",
+    [
+        "pyfftw",
+        "pyfftw-0.15",
+        "pyfftw-0.15.0",
+        "pyFFTW",
+        "pyFFTW-0.15",
+        "pyFFTW-0.15.0",
+    ],
+)
+def test_issue_13086_op_case(
+    script: PipTestEnvironment,
+    wheel_cache_dir: str,
+    pattern: str,
+    remove_matches_wheel: RemoveMatches,
+) -> None:
+    _populate_wheel_cache(wheel_cache_dir, "pyFFTW-0.15.0", "foo-1.0")
+    result = script.pip("cache", "remove", pattern, "--verbose")
+
+    assert remove_matches_wheel("pyFFTW-0.15.0", result)
+    assert not remove_matches_wheel("foo-1.0", result)
+
+
+def test_issue_13086_glob_case(
+    script: PipTestEnvironment,
+    wheel_cache_dir: str,
+    remove_matches_wheel: RemoveMatches,
+) -> None:
+    _populate_wheel_cache(
+        wheel_cache_dir, "foo0-1.0", "foo1-1.0", "foo2-1.0", "foo1bob-1.0"
+    )
+    result = script.pip("cache", "remove", "foo[0-2]", "--verbose")
+
+    assert remove_matches_wheel("foo0-1.0", result)
+    assert remove_matches_wheel("foo1-1.0", result)
+    assert remove_matches_wheel("foo2-1.0", result)
+    assert not remove_matches_wheel("foo1bob", result)
+
+
 @pytest.mark.usefixtures("populate_http_cache", "populate_wheel_cache")
 def test_cache_purge(
     script: PipTestEnvironment,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix ``pip cache remove`` pattern matching.
Copy link Member ichard26 Mar 9, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. As evident by all of the edge cases, this is moreso a step in the right direction than a total fix. It's a good step forward and certainly needed, but I don't want to claim everything will work w/o issues now :) ALSO, this PR will affect `pip cache list` too. That's a good thing, we may as well mention it.