flashinfer-ai · johnnynunez · Jan 21, 2025 · Jan 21, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/.github/workflows/release_wheel.yml b/.github/workflows/release_wheel.yml
@@ -18,16 +18,16 @@ on:
       #   required: true
 
 env:
-  TORCH_CUDA_ARCH_LIST: "7.5 8.0 8.9 9.0+PTX"
+  TORCH_CUDA_ARCH_LIST: "7.5 8.0 8.9 9.0 10.0 12.0+PTX"
 
 jobs:
   build:
     strategy:
       fail-fast: false
       matrix:
         python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-        cuda: ["11.8", "12.1", "12.4"]
-        torch: ["2.3", "2.4", "2.5"]
+        cuda: ["11.8", "12.1", "12.4", "12.6", "12.8"]
+        torch: ["2.3", "2.4", "2.5", "2.6"]
         exclude: # for cuda 12.4, we only support torch 2.4+
           - cuda: "12.4"
             torch: "2.2"

diff --git a/3rdparty/cutlass b/3rdparty/cutlass
diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -35,7 +35,7 @@ set(FLASHINFER_GEN_MASK_MODES 0 1 2)
 # "native" is a special value for CMAKE_CUDA_ARCHITECTURES which means use the
 # architectures of the host's GPU. it's new in CMake 3.24, if you are using an
 # older of CMake or you want to use a different value, you can set its value
-# here. Supported CUDA architectures include 80;86;89;90
+# here. Supported CUDA architctures include 80;86;89;90;100;120
 # NOTE(Zihao): using "native" might be slow because whenever compile a cuda file
 # with `-arch=native`, nvcc will spawn a `__nvcc_device_query` process to get
 # the architecture of the host's GPU, which could stall the compilation process.

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -155,7 +155,7 @@ You can follow the steps below to install FlashInfer from source code:
            .. code-block:: bash
 
                cd flashinfer
-               TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" FLASHINFER_ENABLE_AOT=1 pip install --no-build-isolation --verbose --editable .
+               TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0 12.0 12.0a" FLASHINFER_ENABLE_AOT=1 pip install --no-build-isolation --verbose --editable .
 
 5. Create FlashInfer distributions (optional):
 
@@ -182,7 +182,7 @@ You can follow the steps below to install FlashInfer from source code:
            .. code-block:: bash
 
                cd flashinfer
-               TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" FLASHINFER_ENABLE_AOT=1 python -m build --no-isolation --wheel
+               TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0 12.0 12.0a" FLASHINFER_ENABLE_AOT=1 python -m build --no-isolation --wheel
                ls -la dist/
 
 C++ API