diff --git a/Cargo.toml b/Cargo.toml
index 8c51848af8..8cb48b8b49 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,6 @@ repository = "https://github.com/briansmith/ring"
 
 # Keep in sync with .github/workflows/ci.yml ("MSRV") and see the MSRV note
 # in cpu/arm.rs.
-# 1.66 is required on x86/x86_64 for https://github.com/rust-lang/rust/pull/101861.
 rust-version = "1.66.0"
 
 # Keep in sync with `links` below.
diff --git a/build.rs b/build.rs
index 9843ad8aa5..e78c505763 100644
--- a/build.rs
+++ b/build.rs
@@ -66,8 +66,6 @@ const RING_SRCS: &[(&[&str], &str)] = &[
 
     (&[ARM, X86_64, X86], "crypto/crypto.c"),
 
-    (&[X86_64, X86], "crypto/cpu_intel.c"),
-
     (&[X86], "crypto/fipsmodule/aes/asm/aesni-x86.pl"),
     (&[X86], "crypto/fipsmodule/aes/asm/ghash-x86.pl"),
     (&[X86], "crypto/fipsmodule/aes/asm/vpaes-x86.pl"),
@@ -886,7 +884,6 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
         "LIMBS_window5_split_window",
         "LIMBS_window5_unsplit_window",
         "LIMB_shr",
-        "OPENSSL_cpuid_setup",
         "aes_gcm_dec_kernel",
         "aes_gcm_dec_update_vaes_avx2",
         "aes_gcm_enc_kernel",
diff --git a/crypto/cpu_intel.c b/crypto/cpu_intel.c
deleted file mode 100644
index 6e792b6ba4..0000000000
--- a/crypto/cpu_intel.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <ring-core/base.h>
-
-
-#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
-
-#if defined(_MSC_VER) && !defined(__clang__)
-#pragma warning(push, 3)
-#include <immintrin.h>
-#include <intrin.h>
-#pragma warning(pop)
-#endif
-
-#include "internal.h"
-
-
-// OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
-// is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
-// |*out_edx|.
-static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
-                          uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
-#if defined(_MSC_VER) && !defined(__clang__)
-  int tmp[4];
-  __cpuid(tmp, (int)leaf);
-  *out_eax = (uint32_t)tmp[0];
-  *out_ebx = (uint32_t)tmp[1];
-  *out_ecx = (uint32_t)tmp[2];
-  *out_edx = (uint32_t)tmp[3];
-#elif defined(__pic__) && defined(OPENSSL_32_BIT)
-  // Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
-  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602.
-  __asm__ volatile (
-    "xor %%ecx, %%ecx\n"
-    "mov %%ebx, %%edi\n"
-    "cpuid\n"
-    "xchg %%edi, %%ebx\n"
-    : "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
-    : "a"(leaf)
-  );
-#else
-  __asm__ volatile (
-    "xor %%ecx, %%ecx\n"
-    "cpuid\n"
-    : "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
-    : "a"(leaf)
-  );
-#endif
-}
-
-// OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
-// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
-//
-// See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
-static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
-#if defined(_MSC_VER) && !defined(__clang__)
-  return (uint64_t)_xgetbv(xcr);
-#else
-  uint32_t eax, edx;
-  __asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
-  return (((uint64_t)edx) << 32) | eax;
-#endif
-}
-
-void OPENSSL_cpuid_setup(uint32_t OPENSSL_ia32cap_P[4]) {
-  // Determine the vendor and maximum input value.
-  uint32_t eax, ebx, ecx, edx;
-  OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
-
-  uint32_t num_ids = eax;
-
-  int is_intel = ebx == 0x756e6547 /* Genu */ &&
-                 edx == 0x49656e69 /* ineI */ &&
-                 ecx == 0x6c65746e /* ntel */;
-
-  uint32_t extended_features[2] = {0};
-  if (num_ids >= 7) {
-    OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7);
-    extended_features[0] = ebx;
-    extended_features[1] = ecx;
-  }
-
-  OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
-
-  const uint32_t base_family = (eax >> 8) & 15;
-  const uint32_t base_model = (eax >> 4) & 15;
-
-  uint32_t family = base_family;
-  uint32_t model = base_model;
-  if (base_family == 15) {
-    const uint32_t ext_family = (eax >> 20) & 255;
-    family += ext_family;
-  }
-  if (base_family == 6 || base_family == 15) {
-    const uint32_t ext_model = (eax >> 16) & 15;
-    model |= ext_model << 4;
-  }
-
-  // Reserved bit #30 is repurposed to signal an Intel CPU.
-  if (is_intel) {
-    edx |= (1u << 30);
-  } else {
-    edx &= ~(1u << 30);
-  }
-
-  uint64_t xcr0 = 0;
-  if (ecx & (1u << 27)) {
-    // XCR0 may only be queried if the OSXSAVE bit is set.
-    xcr0 = OPENSSL_xgetbv(0);
-  }
-  // See Intel manual, volume 1, section 14.3.
-  if ((xcr0 & 6) != 6) {
-    // YMM registers cannot be used.
-    ecx &= ~(1u << 28);  // AVX
-    ecx &= ~(1u << 12);  // FMA
-    ecx &= ~(1u << 11);  // AMD XOP
-    extended_features[0] &= ~(1u << 5);   // AVX2
-    extended_features[1] &= ~(1u << 9);   // VAES
-    extended_features[1] &= ~(1u << 10);  // VPCLMULQDQ
-  }
-  // See Intel manual, volume 1, sections 15.2 ("Detection of AVX-512 Foundation
-  // Instructions") through 15.4 ("Detection of Intel AVX-512 Instruction Groups
-  // Operating at 256 and 128-bit Vector Lengths").
-  if ((xcr0 & 0xe6) != 0xe6) {
-    // Without XCR0.111xx11x, no AVX512 feature can be used. This includes ZMM
-    // registers, masking, SIMD registers 16-31 (even if accessed as YMM or
-    // XMM), and EVEX-coded instructions (even on YMM or XMM). Even if only
-    // XCR0.ZMM_Hi256 is missing, it isn't valid to use AVX512 features on
-    // shorter vectors, since AVX512 ties everything to the availability of
-    // 512-bit vectors. See the above-mentioned sections of the Intel manual,
-    // which say that *all* these XCR0 bits must be checked even when just using
-    // 128-bit or 256-bit vectors, and also volume 2a section 2.7.11 ("#UD
-    // Equations for EVEX") which says that all EVEX-coded instructions raise an
-    // undefined-instruction exception if any of these XCR0 bits is zero.
-    //
-    // AVX10 fixes this by reorganizing the features that used to be part of
-    // "AVX512" and allowing them to be used independently of 512-bit support.
-    // TODO: add AVX10 detection.
-    extended_features[0] &= ~(1u << 16);  // AVX512F
-    extended_features[0] &= ~(1u << 17);  // AVX512DQ
-    extended_features[0] &= ~(1u << 21);  // AVX512IFMA
-    extended_features[0] &= ~(1u << 26);  // AVX512PF
-    extended_features[0] &= ~(1u << 27);  // AVX512ER
-    extended_features[0] &= ~(1u << 28);  // AVX512CD
-    extended_features[0] &= ~(1u << 30);  // AVX512BW
-    extended_features[0] &= ~(1u << 31);  // AVX512VL
-    extended_features[1] &= ~(1u << 1);   // AVX512VBMI
-    extended_features[1] &= ~(1u << 6);   // AVX512VBMI2
-    extended_features[1] &= ~(1u << 11);  // AVX512VNNI
-    extended_features[1] &= ~(1u << 12);  // AVX512BITALG
-    extended_features[1] &= ~(1u << 14);  // AVX512VPOPCNTDQ
-  }
-
-  // Repurpose the bit for the removed MPX feature to indicate when using zmm
-  // registers should be avoided even when they are supported. (When set, AVX512
-  // features can still be used, but only using ymm or xmm registers.) Skylake
-  // suffered from severe downclocking when zmm registers were used, which
-  // affected unrelated code running on the system, making zmm registers not too
-  // useful outside of benchmarks. The situation improved significantly by Ice
-  // Lake, but a small amount of downclocking remained. (See
-  // https://lore.kernel.org/linux-crypto/e8ce1146-3952-6977-1d0e-a22758e58914@intel.com/)
-  // We take a conservative approach of not allowing zmm registers until after
-  // Ice Lake and Tiger Lake, i.e. until Sapphire Rapids on the server side.
-  //
-  // AMD CPUs, which support AVX512 starting with Zen 4, have not been reported
-  // to have any downclocking problem when zmm registers are used.
-  if (is_intel && family == 6 &&
-      (model == 85 ||    // Skylake, Cascade Lake, Cooper Lake (server)
-       model == 106 ||   // Ice Lake (server)
-       model == 108 ||   // Ice Lake (micro server)
-       model == 125 ||   // Ice Lake (client)
-       model == 126 ||   // Ice Lake (mobile)
-       model == 140 ||   // Tiger Lake (mobile)
-       model == 141)) {  // Tiger Lake (client)
-    extended_features[0] |= 1u << 14;
-  } else {
-    extended_features[0] &= ~(1u << 14);
-  }
-
-  OPENSSL_ia32cap_P[0] = edx;
-  OPENSSL_ia32cap_P[1] = ecx;
-  OPENSSL_ia32cap_P[2] = extended_features[0];
-  OPENSSL_ia32cap_P[3] = extended_features[1];
-}
-
-#endif  // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64)
diff --git a/crypto/internal.h b/crypto/internal.h
index 99223d1aca..c7013b857b 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -441,30 +441,10 @@ static inline void CRYPTO_store_u32_be(void *out, uint32_t v) {
 
 // Runtime CPU feature support
 
-#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
-// OPENSSL_ia32cap_P contains the Intel CPUID bits when running on an x86 or
-// x86-64 system.
-//
-//   Index 0:
-//     EDX for CPUID where EAX = 1
-//     Bit 30 is used to indicate an Intel CPU
-//   Index 1:
-//     ECX for CPUID where EAX = 1
-//   Index 2:
-//     EBX for CPUID where EAX = 7, ECX = 0
-//     Bit 14 (for removed feature MPX) is used to indicate a preference for ymm
-//       registers over zmm even when zmm registers are supported
-//   Index 3:
-//     ECX for CPUID where EAX = 7, ECX = 0
-//
-// Note: the CPUID bits are pre-adjusted for the OSXSAVE bit and the XMM, YMM,
-// and AVX512 bits in XCR0, so it is not necessary to check those. (WARNING: See
-// caveats in cpu_intel.c.)
 #if defined(OPENSSL_X86_64)
 extern uint32_t avx2_available;
 extern uint32_t adx_bmi2_available;
 #endif
-#endif
 
 
 #if defined(OPENSSL_ARM)
diff --git a/src/cpu/intel.rs b/src/cpu/intel.rs
index f45052fe7f..000a6fba86 100644
--- a/src/cpu/intel.rs
+++ b/src/cpu/intel.rs
@@ -12,6 +12,11 @@
 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
+// "Intel" citations are for "Intel 64 and IA-32 Architectures Software
+// Developer’s Manual", Combined Volumes, December 2024.
+// "AMD" citations are for "AMD64 Technology AMD64 Architecture
+// Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
+
 use cfg_if::cfg_if;
 
 mod abi_assumptions {
@@ -36,7 +41,7 @@ mod abi_assumptions {
 }
 
 pub(super) mod featureflags {
-    use super::super::CAPS_STATIC;
+    use super::{super::CAPS_STATIC, *};
     use crate::{
         cpu,
         polyfill::{once_cell::race, usize_from_u32},
@@ -44,22 +49,13 @@ pub(super) mod featureflags {
     use core::num::NonZeroUsize;
 
     pub(in super::super) fn get_or_init() -> cpu::Features {
-        // SAFETY: `OPENSSL_cpuid_setup` must be called only in
-        // `INIT.call_once()` below.
-        prefixed_extern! {
-            fn OPENSSL_cpuid_setup(out: &mut [u32; 4]);
-        }
-
         let _: NonZeroUsize = FEATURES.get_or_init(|| {
-            let mut cpuid = [0; 4];
-            // SAFETY: We assume that it is safe to execute CPUID and XGETBV.
-            unsafe {
-                OPENSSL_cpuid_setup(&mut cpuid);
-            }
-            let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid);
+            // SAFETY: `cpuid_all` assumes CPUID is available and that it is
+            // compatible with Intel.
+            let cpuid_results = unsafe { cpuid_all() };
+            let detected = cpuid_to_caps_and_set_c_flags(cpuid_results);
             let merged = CAPS_STATIC | detected;
-
-            let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32));
+            let merged = usize_from_u32(merged) | (1 << (Shift::Initialized as u32));
             NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit.
         });
 
@@ -98,30 +94,120 @@ pub(super) mod featureflags {
     #[cfg(target_arch = "x86_64")]
     #[rustfmt::skip]
     pub const STATIC_DETECTED: u32 = 0
-        | if cfg!(target_feature = "sse4.1") { super::Sse41::mask() } else { 0 }
-        | if cfg!(target_feature = "ssse3") { super::Ssse3::mask() } else { 0 }
+        | if cfg!(target_feature = "sse4.1") { Sse41::mask() } else { 0 }
+        | if cfg!(target_feature = "ssse3") { Ssse3::mask() } else { 0 }
         ;
 
     pub const FORCE_DYNAMIC_DETECTION: u32 = 0;
 }
 
-fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
-    // "Intel" citations are for "Intel 64 and IA-32 Architectures Software
-    // Developer’s Manual", Combined Volumes, December 2024.
-    // "AMD" citations are for "AMD64 Technology AMD64 Architecture
-    // Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
+struct CpuidSummary {
+    #[cfg(target_arch = "x86_64")]
+    is_intel: bool,
+    leaf1_edx: u32,
+    leaf1_ecx: u32,
+    #[cfg(target_arch = "x86_64")]
+    extended_features_ecx: u32,
+    #[cfg(target_arch = "x86_64")]
+    extended_features_ebx: u32,
+    xcr0: u64,
+}
+
+// SAFETY: This unconditionally uses CPUID because we don't have a good
+// way to detect CPUID and because we don't know of a CPU that supports
+// SSE2 (that we currently statically require) but doesn't support
+// CPUID. SGX is one environment where CPUID isn't allowed but where
+// SSE2 is statically supported. Ideally there would be a
+// `cfg!(target_feature = "cpuid")` we could use.
+unsafe fn cpuid_all() -> CpuidSummary {
+    #[cfg(target_arch = "x86")]
+    use core::arch::x86 as arch;
+    #[cfg(target_arch = "x86_64")]
+    use core::arch::x86_64 as arch;
 
-    // The `prefixed_extern!` uses below assume this
+    // MSRV(1.66) avoids miscompilations when calling `__cpuid`;
+    // see https://github.com/rust-lang/rust/pull/101861.
+
+    // Intel: "21.1.1 Notes on Where to Start".
+    let r = unsafe { arch::__cpuid(0) };
+
+    let leaf1_edx;
+    let leaf1_ecx;
+
+    #[cfg(target_arch = "x86_64")]
+    let is_intel = (r.ebx == 0x756e6547) && (r.edx == 0x49656e69) && (r.ecx == 0x6c65746e);
+
+    #[cfg(target_arch = "x86_64")]
+    let (extended_features_ecx, extended_features_ebx);
+
+    if r.eax >= 1 {
+        // SAFETY: `r.eax >= 1` indicates leaf 1 is available.
+        let r = unsafe { arch::__cpuid(1) };
+        leaf1_edx = r.edx;
+        leaf1_ecx = r.ecx;
+
+        #[cfg(target_arch = "x86_64")]
+        if r.eax >= 7 {
+            // SAFETY: `r.eax >= 7` implies we can execute this.
+            let r = unsafe { arch::__cpuid(7) };
+            extended_features_ecx = r.ecx;
+            extended_features_ebx = r.ebx;
+        } else {
+            extended_features_ecx = 0;
+            extended_features_ebx = 0;
+        }
+    } else {
+        // Expected to be unreachable on any environment we currently
+        // support.
+        leaf1_edx = 0;
+        leaf1_ecx = 0;
+        #[cfg(target_arch = "x86_64")]
+        {
+            extended_features_ecx = 0;
+            extended_features_ebx = 0;
+        }
+    }
+
+    let xcr0 = if check(leaf1_ecx, 27) {
+        unsafe { arch::_xgetbv(0) }
+    } else {
+        0
+    };
+
+    CpuidSummary {
+        #[cfg(target_arch = "x86_64")]
+        is_intel,
+        leaf1_edx,
+        leaf1_ecx,
+        #[cfg(target_arch = "x86_64")]
+        extended_features_ecx,
+        #[cfg(target_arch = "x86_64")]
+        extended_features_ebx,
+        xcr0,
+    }
+}
+
+fn cpuid_to_caps_and_set_c_flags(r: CpuidSummary) -> u32 {
     #[cfg(target_arch = "x86_64")]
     use core::{mem::align_of, sync::atomic::AtomicU32};
+
+    let CpuidSummary {
+        #[cfg(target_arch = "x86_64")]
+        is_intel,
+        leaf1_edx,
+        leaf1_ecx,
+        #[cfg(target_arch = "x86_64")]
+        extended_features_ecx,
+        #[cfg(target_arch = "x86_64")]
+        extended_features_ebx,
+        xcr0,
+    } = r;
+
+    // The `prefixed_extern!` uses below assume this
     #[cfg(target_arch = "x86_64")]
     const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () =
         assert!(align_of::<AtomicU32>() == align_of::<u32>());
 
-    fn check(leaf: u32, bit: u32) -> bool {
-        let shifted = 1 << bit;
-        (leaf & shifted) == shifted
-    }
     fn set(out: &mut u32, shift: Shift) {
         let shifted = 1 << (shift as u32);
         debug_assert_eq!(*out & shifted, 0);
@@ -129,16 +215,6 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
         debug_assert_eq!(*out & shifted, shifted);
     }
 
-    #[cfg(target_arch = "x86_64")]
-    let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup`
-
-    // CPUID leaf 1.
-    let leaf1_ecx = cpuid[1];
-
-    // Intel: "Structured Extended Feature Flags Enumeration Leaf"
-    #[cfg(target_arch = "x86_64")]
-    let (extended_features_ebx, extended_features_ecx) = (cpuid[2], cpuid[3]);
-
     let mut caps = 0;
 
     // AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
@@ -179,6 +255,7 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
             set(&mut caps, Shift::Sse2);
         }
     }
+    let _ = leaf1_edx;
 
     // Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const
     // assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they
@@ -192,76 +269,74 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
     // Intel: "12.7.2 Checking for SSSE3 Support"
     // If/when we support dynamic detection of SSE/SSE2, make this conditional
     // on SSE/SSE2.
+    // TODO: Make this conditional on SSE3.
     if check(leaf1_ecx, 9) {
         set(&mut caps, Shift::Ssse3);
-    }
 
-    // Intel: "12.12.2 Checking for Intel SSE4.1 Support"
-    // If/when we support dynamic detection of SSE/SSE2, make this conditional
-    // on SSE/SSE2.
-    // XXX: We don't check for SSE3 and we're not sure if it is compatible for
-    //      us to do so; does AMD advertise SSE3? TODO: address this.
-    // XXX: We don't condition this on SSSE3 being available. TODO: address
-    //      this.
-    #[cfg(target_arch = "x86_64")]
-    if check(leaf1_ecx, 19) {
-        set(&mut caps, Shift::Sse41);
+        // Intel: "12.12.2 Checking for Intel SSE4.1 Support"
+        #[cfg(target_arch = "x86_64")]
+        if check(leaf1_ecx, 19) {
+            set(&mut caps, Shift::Sse41);
+        }
     }
 
     // AMD: "The extended SSE instructions include [...]."
 
     // Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS"
-    // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
-    // support AVX state.
-    let avx_available = check(leaf1_ecx, 28);
-    if avx_available {
+    let os_supports_xmm_and_ymm = (xcr0 & 6) == 6;
+    let cpu_supports_avx = check(leaf1_ecx, 28);
+
+    if os_supports_xmm_and_ymm && cpu_supports_avx {
         set(&mut caps, Shift::Avx);
-    }
 
-    #[cfg(target_arch = "x86_64")]
-    if avx_available {
-        // The Intel docs don't seem to document the detection. The instruction
-        // definitions of the VEX.256 instructions reference the
-        // VAES/VPCLMULQDQ features and the documentation for the extended
-        // features gives the values. We combine these into one feature because
-        // we never use them independently.
-        let vaes_available = check(extended_features_ecx, 9);
-        let vclmul_available = check(extended_features_ecx, 10);
-        if vaes_available && vclmul_available {
-            set(&mut caps, Shift::VAesClmul);
+        #[cfg(target_arch = "x86_64")]
+        {
+            // The Intel docs don't seem to document the detection. The instruction
+            // definitions of the VEX.256 instructions reference the
+            // VAES/VPCLMULQDQ features and the documentation for the extended
+            // features gives the values. We combine these into one feature because
+            // we never use them independently.
+            let vaes_available = check(extended_features_ecx, 9);
+            let vclmul_available = check(extended_features_ecx, 10);
+            if vaes_available && vclmul_available {
+                set(&mut caps, Shift::VAesClmul);
+            }
         }
-    }
 
-    // "14.7.1 Detection of Intel AVX2 Hardware support"
-    // XXX: We don't condition AVX2 on AVX. TODO: Address this.
-    // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
-    // support AVX state.
-    #[cfg(target_arch = "x86_64")]
-    if check(extended_features_ebx, 5) {
-        set(&mut caps, Shift::Avx2);
+        // "14.7.1 Detection of Intel AVX2 Hardware support"
+        // XXX: We don't condition AVX2 on AVX. TODO: Address this.
+        // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
+        // support AVX state.
+        #[cfg(target_arch = "x86_64")]
+        if check(extended_features_ebx, 5) {
+            set(&mut caps, Shift::Avx2);
 
-        // Declared as `uint32_t` in the C code.
-        prefixed_extern! {
-            static avx2_available: AtomicU32;
+            // Declared as `uint32_t` in the C code.
+            prefixed_extern! {
+                static avx2_available: AtomicU32;
+            }
+            // SAFETY: The C code only reads `avx2_available`, and its reads are
+            // synchronized through the `OnceNonZeroUsize` Acquire/Release
+            // semantics as we ensure we have a `cpu::Features` instance before
+            // calling into the C code.
+            let flag = unsafe { &avx2_available };
+            flag.store(1, core::sync::atomic::Ordering::Relaxed);
         }
-        // SAFETY: The C code only reads `avx2_available`, and its reads are
-        // synchronized through the `OnceNonZeroUsize` Acquire/Release
-        // semantics as we ensure we have a `cpu::Features` instance before
-        // calling into the C code.
-        let flag = unsafe { &avx2_available };
-        flag.store(1, core::sync::atomic::Ordering::Relaxed);
     }
 
     // Intel: "12.13.4 Checking for Intel AES-NI Support"
     // If/when we support dynamic detection of SSE/SSE2, revisit this.
     // TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI)
-    // and AES-NI & !AVX.
-    // Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for
-    // an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every
-    // use will either be supported by SSE* or AVX* instructions. We then
-    // assume that those supporting instructions' prerequisites (e.g. OS
-    // support for AVX or SSE state, respectively) are the only prerequisites
-    // for these features.
+    // and (AES-NI & !AVX).
+    //
+    // PCLMULQDQ and AES-NI instructions come in P* (SSE) and VP* (AVX)
+    // variants. The use of the SSE variants must be guarded by a check of both
+    // the `ClMul`/`Aes` feature AND an SSE (e.g. `Ssse3`) or AVX (e.g. `Avx`)
+    // feature. Which SSE/AVX feature to check for will depend on the
+    // supporting instructions around the VPCLMULQDQ/AES-NI constructions.
+    // (PCLMULQDQ and AES-NI also come  additional "VPCLMULQDQ"/"VAES"
+    // variants, which are a separate thing entirely; support for those will be
+    // added later.)
     if check(leaf1_ecx, 1) {
         set(&mut caps, Shift::ClMul);
     }
@@ -302,7 +377,7 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
         // rust `std::arch::is_x86_feature_detected` does a very similar thing
         // but only looks at AVX, not ADX. Note that they reference an older
         // version of the erratum labeled SKL052.
-        let believe_bmi_bits = !is_intel || (adx_available || avx_available);
+        let believe_bmi_bits = !is_intel || (adx_available || cpu_supports_avx);
 
         if check(extended_features_ebx, 3) && believe_bmi_bits {
             set(&mut caps, Shift::Bmi1);
@@ -330,6 +405,11 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
     caps
 }
 
+fn check(leaf: u32, bit: u32) -> bool {
+    let shifted = 1 << bit;
+    (leaf & shifted) == shifted
+}
+
 impl_get_feature! {
     features: [
         { ("x86_64") => VAesClmul },