Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aes-gcm: Enable AVX-512 implementation. #2444

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ include = [
"crypto/curve25519/internal.h",
"crypto/fipsmodule/aes/aes_nohw.c",
"crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl",
"crypto/fipsmodule/aes/asm/aes-gcm-avx512-x86_64.pl",
"crypto/fipsmodule/aes/asm/aesni-x86.pl",
"crypto/fipsmodule/aes/asm/aesni-gcm-x86_64.pl",
"crypto/fipsmodule/aes/asm/aesni-x86_64.pl",
Expand Down
5 changes: 5 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[X86_64], "crypto/chacha/asm/chacha-x86_64.pl"),
(&[X86_64], "crypto/curve25519/curve25519_64_adx.c"),
(&[X86_64], "crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/aes/asm/aes-gcm-avx512-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/aes/asm/aesni-gcm-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/aes/asm/aesni-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/aes/asm/ghash-x86_64.pl"),
Expand Down Expand Up @@ -889,8 +890,10 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
"OPENSSL_cpuid_setup",
"aes_gcm_dec_kernel",
"aes_gcm_dec_update_vaes_avx2",
"aes_gcm_dec_update_vaes_avx512",
"aes_gcm_enc_kernel",
"aes_gcm_enc_update_vaes_avx2",
"aes_gcm_enc_update_vaes_avx512",
"aes_hw_ctr32_encrypt_blocks",
"aes_hw_set_encrypt_key",
"aes_hw_set_encrypt_key_alt",
Expand Down Expand Up @@ -950,12 +953,14 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
"gcm_ghash_clmul",
"gcm_ghash_neon",
"gcm_ghash_vpclmulqdq_avx2_16",
"gcm_ghash_vpclmulqdq_avx512_16",
"gcm_gmult_clmul",
"gcm_gmult_neon",
"gcm_init_avx",
"gcm_init_clmul",
"gcm_init_neon",
"gcm_init_vpclmulqdq_avx2",
"gcm_init_vpclmulqdq_avx512",
"k25519Precomp",
"limbs_mul_add_limb",
"little_endian_bytes_from_scalar",
Expand Down
12 changes: 9 additions & 3 deletions crypto/fipsmodule/aes/asm/aes-gcm-avx512-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -588,18 +588,24 @@ sub _ghash_4x {
return $code;
}

# void gcm_gmult_vpclmulqdq_avx512(uint8_t Xi[16], const u128 Htable[16]);
$code .= _begin_func "gcm_gmult_vpclmulqdq_avx512", 1;
# void gcm_ghash_vpclmulqdq_avx512_16(uint8_t Xi[16], const u128 Htable[16],
# const uint8_t aad[16], size_t aad_len_16););
$code .= _begin_func "gcm_ghash_vpclmulqdq_avx512_16", 1;
{
my ( $GHASH_ACC_PTR, $HTABLE ) = @argregs[ 0 .. 1 ];
my ( $GHASH_ACC_PTR, $HTABLE, $AAD, $AAD_LEN_16 ) = @argregs[ 0 .. 3 ];
my ( $GHASH_ACC, $BSWAP_MASK, $H_POW1, $GFPOLY, $T0, $T1, $T2 ) =
map( "%xmm$_", ( 0 .. 6 ) );

$code .= <<___;
@{[ _save_xmmregs (6) ]}
.seh_endprologue

# Load the GHASH accumulator.
vmovdqu ($GHASH_ACC_PTR), $GHASH_ACC

# XOR the AAD into the accumulator.
vpxor ($AAD), $GHASH_ACC, $GHASH_ACC

vmovdqu .Lbswap_mask(%rip), $BSWAP_MASK
vmovdqu $OFFSETOFEND_H_POWERS-16($HTABLE), $H_POW1
vmovdqu .Lgfpoly(%rip), $GFPOLY
Expand Down
23 changes: 23 additions & 0 deletions src/aead/aes_gcm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use cpu::GetFeature as _;
mod aarch64;
mod aeshwclmulmovbe;
mod vaesclmulavx2;
mod vaesclmulavx512;

#[derive(Clone)]
pub(super) struct Key(DynKey);
Expand All @@ -51,6 +52,9 @@ impl Key {

#[derive(Clone)]
enum DynKey {
#[cfg(target_arch = "x86_64")]
VAesClMulAvx512(Combo<aes::hw::Key, gcm::vclmulavx512::Key>),

#[cfg(target_arch = "x86_64")]
VAesClMulAvx2(Combo<aes::hw::Key, gcm::vclmulavx2::Key>),

Expand Down Expand Up @@ -85,6 +89,9 @@ impl DynKey {
let aes_key = aes::hw::Key::new(key, aes, cpu.get_feature())?;
let gcm_key_value = derive_gcm_key_value(&aes_key);
let combo = if let Some(cpu) = cpu.get_feature() {
let gcm_key = gcm::vclmulavx512::Key::new(gcm_key_value, cpu);
Self::VAesClMulAvx512(Combo { aes_key, gcm_key })
} else if let Some(cpu) = cpu.get_feature() {
let gcm_key = gcm::vclmulavx2::Key::new(gcm_key_value, cpu);
Self::VAesClMulAvx2(Combo { aes_key, gcm_key })
} else if let Some(cpu) = cpu.get_feature() {
Expand Down Expand Up @@ -189,6 +196,11 @@ pub(super) fn seal(
seal_whole_partial(c, aad, in_out, ctr, tag_iv, aarch64::seal_whole)
}

#[cfg(target_arch = "x86_64")]
DynKey::VAesClMulAvx512(c) => {
seal_whole_partial(c, aad, in_out, ctr, tag_iv, vaesclmulavx512::seal_whole)
}

#[cfg(target_arch = "x86_64")]
DynKey::VAesClMulAvx2(c) => seal_whole_partial(
c,
Expand Down Expand Up @@ -316,6 +328,17 @@ pub(super) fn open(
open_whole_partial(c, aad, in_out_slice, src, ctr, tag_iv, aarch64::open_whole)
}

#[cfg(target_arch = "x86_64")]
DynKey::VAesClMulAvx512(c) => open_whole_partial(
c,
aad,
in_out_slice,
src,
ctr,
tag_iv,
vaesclmulavx512::open_whole,
),

#[cfg(target_arch = "x86_64")]
DynKey::VAesClMulAvx2(c) => open_whole_partial(
c,
Expand Down
91 changes: 91 additions & 0 deletions src/aead/aes_gcm/vaesclmulavx512.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2015-2025 Brian Smith.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#![cfg(target_arch = "x86_64")]

use super::{aes, gcm, Counter, BLOCK_LEN};
use crate::{aead::aes::Overlapping, c, polyfill::slice::AsChunksMut};
use core::num::{NonZeroU32, NonZeroUsize};

pub(super) fn seal_whole(
aes_key: &aes::hw::Key,
auth: &mut gcm::Context<gcm::vclmulavx512::Key>,
ctr: &mut Counter,
mut in_out: AsChunksMut<u8, BLOCK_LEN>,
) {
prefixed_extern! {
fn aes_gcm_enc_update_vaes_avx512(
input: *const u8,
output: *mut u8,
len: c::NonZero_size_t, // TODO? zero OK?
key: &aes::AES_KEY,
ivec: &Counter,
Htable: &gcm::HTable,
Xi: &mut gcm::Xi);
}

let in_out = in_out.as_flattened_mut();

// Precondition: Since we have a `gcm::Context` then the number of blocks
// must fit in `u32`.
let blocks = u32::try_from(in_out.len() / BLOCK_LEN).unwrap();

Check warning on line 42 in src/aead/aes_gcm/vaesclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/aes_gcm/vaesclmulavx512.rs#L21-L42

Added lines #L21 - L42 were not covered by tests

if let Some(len) = NonZeroUsize::new(in_out.len()) {
let aes_key = aes_key.inner_less_safe();
let (htable, xi) = auth.inner();
let input = in_out.as_ptr();
let output = in_out.as_mut_ptr();
unsafe { aes_gcm_enc_update_vaes_avx512(input, output, len, aes_key, ctr, htable, xi) };
let blocks = NonZeroU32::new(blocks).unwrap_or_else(|| {
unreachable!() // Due to previous checks.
});
ctr.increment_by_less_safe(blocks);
}
}

Check warning on line 55 in src/aead/aes_gcm/vaesclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/aes_gcm/vaesclmulavx512.rs#L45-L55

Added lines #L45 - L55 were not covered by tests

pub(super) fn open_whole(
aes_key: &aes::hw::Key,
auth: &mut gcm::Context<gcm::vclmulavx512::Key>,
in_out: Overlapping,
ctr: &mut Counter,
) {
prefixed_extern! {
fn aes_gcm_dec_update_vaes_avx512(
input: *const u8,
output: *mut u8,
len: c::NonZero_size_t, // TODO? zero OK?
key: &aes::AES_KEY,
ivec: &mut Counter,
Htable: &gcm::HTable,
Xi: &mut gcm::Xi);
}

// Precondition. TODO: Create an overlapping::AsChunks for this.
assert_eq!(in_out.len() % BLOCK_LEN, 0);

Check warning on line 75 in src/aead/aes_gcm/vaesclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/aes_gcm/vaesclmulavx512.rs#L57-L75

Added lines #L57 - L75 were not covered by tests
// Precondition: Since we have a `gcm::Context` then the number of blocks
// must fit in `u32`.
let blocks = u32::try_from(in_out.len() / BLOCK_LEN).unwrap();

in_out.with_input_output_len(|input, output, len| {

Check warning on line 80 in src/aead/aes_gcm/vaesclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/aes_gcm/vaesclmulavx512.rs#L78-L80

Added lines #L78 - L80 were not covered by tests
if let Some(len) = NonZeroUsize::new(len) {
let aes_key = aes_key.inner_less_safe();
let (htable, xi) = auth.inner();
unsafe { aes_gcm_dec_update_vaes_avx512(input, output, len, aes_key, ctr, htable, xi) };
let blocks = NonZeroU32::new(blocks).unwrap_or_else(|| {
unreachable!() // Due to previous checks.
});
ctr.increment_by_less_safe(blocks);
}
})
}

Check warning on line 91 in src/aead/aes_gcm/vaesclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/aes_gcm/vaesclmulavx512.rs#L82-L91

Added lines #L82 - L91 were not covered by tests
10 changes: 10 additions & 0 deletions src/aead/gcm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
pub(super) mod fallback;
pub(super) mod neon;
pub(super) mod vclmulavx2;
pub(super) mod vclmulavx512;

pub(super) struct Context<'key, K> {
Xi: Xi,
Expand Down Expand Up @@ -128,6 +129,15 @@
}
}

#[cfg(target_arch = "x86_64")]
impl Context<'_, vclmulavx512::Key> {
/// Access to `inner` for the integrated AES-GCM implementations only.
#[inline]
pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) {
(self.key.inner(), &mut self.Xi)
}

Check warning on line 138 in src/aead/gcm.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/gcm.rs#L136-L138

Added lines #L136 - L138 were not covered by tests
}

impl<K: UpdateBlocks> Context<'_, K> {
#[inline(always)]
pub fn update_blocks(&mut self, input: AsChunks<u8, BLOCK_LEN>) {
Expand Down
1 change: 1 addition & 0 deletions src/aead/gcm/vclmulavx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub struct Key {
}

impl Key {
#[inline(never)]
pub(in super::super) fn new(value: KeyValue, _cpu: (Avx2, VAesClmul)) -> Self {
Self {
h_table: unsafe { htable_new!(gcm_init_vpclmulqdq_avx2, value) },
Expand Down
49 changes: 49 additions & 0 deletions src/aead/gcm/vclmulavx512.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2018-2025 Brian Smith.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#![cfg(target_arch = "x86_64")]

use super::{ffi::KeyValue, HTable, UpdateBlock, Xi};
use crate::{
aead::gcm::ffi::BLOCK_LEN,
cpu::intel::{Avx2, Avx512_BW_VL_ZMM, Bmi2, VAesClmul},
polyfill::slice::AsChunks,
};

#[derive(Clone)]
pub struct Key {
h_table: HTable,
}

impl Key {
pub(in super::super) fn new(
value: KeyValue,
_cpu: (Avx2, Avx512_BW_VL_ZMM, Bmi2, VAesClmul),
) -> Self {
Self {
h_table: unsafe { htable_new!(gcm_init_vpclmulqdq_avx512, value) },
}
}

Check warning on line 37 in src/aead/gcm/vclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/gcm/vclmulavx512.rs#L30-L37

Added lines #L30 - L37 were not covered by tests

pub(super) fn inner(&self) -> &HTable {
&self.h_table
}

Check warning on line 41 in src/aead/gcm/vclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/gcm/vclmulavx512.rs#L39-L41

Added lines #L39 - L41 were not covered by tests
}

impl UpdateBlock for Key {
fn update_block(&self, xi: &mut Xi, a: [u8; BLOCK_LEN]) {
let input: AsChunks<u8, BLOCK_LEN> = (&a).into();
unsafe { ghash!(gcm_ghash_vpclmulqdq_avx512_16, xi, &self.h_table, input) }
}

Check warning on line 48 in src/aead/gcm/vclmulavx512.rs

View check run for this annotation

Codecov / codecov/patch

src/aead/gcm/vclmulavx512.rs#L45-L48

Added lines #L45 - L48 were not covered by tests
}
14 changes: 14 additions & 0 deletions src/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,20 @@
}
}

impl<A, B, C, D> GetFeature<(A, B, C, D)> for features::Values
where
features::Values: GetFeature<(A, B)>,
features::Values: GetFeature<(C, D)>,
{
#[inline(always)]
fn get_feature(&self) -> Option<(A, B, C, D)> {
match (self.get_feature(), self.get_feature()) {
(Some((a, b)), Some((c, d))) => Some((a, b, c, d)),

Check warning on line 124 in src/cpu.rs

View check run for this annotation

Codecov / codecov/patch

src/cpu.rs#L124

Added line #L124 was not covered by tests
_ => None,
}
}
}

impl<F> GetFeature<F> for Features
where
features::Values: GetFeature<F>,
Expand Down
39 changes: 39 additions & 0 deletions src/cpu/intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
#[cfg(target_arch = "x86_64")]
let (extended_features_ebx, extended_features_ecx) = (cpuid[2], cpuid[3]);

// `OPENSSL_cpuid_setup` synthesizes this bit when it detects an Intel
// CPU family that is known to downclock when ZMM registers are used.
#[cfg(target_arch = "x86_64")]
let avoid_zmm = check(cpuid[2], 14);

let mut caps = 0;

// AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
Expand Down Expand Up @@ -236,6 +241,35 @@ fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
// calling into the C code.
let flag = unsafe { &avx2_available };
flag.store(1, core::sync::atomic::Ordering::Relaxed);

// AVX-512.
// Initial releases of macOS 12 had a serious bug w.r.t. AVX-512
// support; see https://go-review.googlesource.com/c/sys/+/620256.
// Given that, plus Apple's transition to ARM, AVX-512 isn't worth
// supporting for their targets.
#[cfg(not(target_vendor = "apple"))]
{
// Intel: "15.3 DETECTION OF 512-BIT INSTRUCTION GROUPS OF THE INTEL
// AVX-512 FAMILY".
// `OPENSSL_cpuid_setup` clears these bits when XCR0[7:5] isn't 0b111.
// doesn't AVX-512 state.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming PR #2439 is merged before this, then this will need to be updated.

let f = check(extended_features_ebx, 16);
let bw = check(extended_features_ebx, 30);

// Intel: "15.4 DETECTION OF INTEL AVX-512 INSTRUCTION GROUPS
// OPERATING AT 256 AND 128-BIT VECTOR LENGTHS"
let vl = check(extended_features_ebx, 31);

// Intel: "15.4 DETECTION OF INTEL AVX-512 INSTRUCTION GROUPS
// OPERATING AT 256 AND 128-BIT VECTOR LENGTHS."
if !avoid_zmm && f {
// Intel: "Table 15-2. Feature Flag Collection Required of
// 256/128 Bit Vector Lengths for Each Instruction Group."
if bw && vl {
set(&mut caps, Shift::Avx512_BW_VL_ZMM)
}
}
}
}

// Intel: "12.13.4 Checking for Intel AES-NI Support"
Expand Down Expand Up @@ -348,6 +382,11 @@ impl_get_feature! {
{ ("x86", "x86_64") => Aes },
{ ("x86", "x86_64") => Avx },
{ ("x86_64") => Bmi1 },

// AVX512BW + AVX512VL + AND using ZMM registers isn't expected to caus
// downclocking.
{ ("x86_64") => Avx512_BW_VL_ZMM },

{ ("x86_64") => Avx2 },
{ ("x86_64") => Bmi2 },
{ ("x86_64") => Adx },
Expand Down
Loading