Skip to content

Commit

Permalink
Convert the CRC32 constant generation code to Python
Browse files Browse the repository at this point in the history
  • Loading branch information
ebiggers committed Nov 28, 2024
1 parent 1a6f436 commit bd57e93
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 310 deletions.
2 changes: 1 addition & 1 deletion lib/crc32_multipliers.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* crc32_multipliers.h - constants for CRC-32 folding
*
* THIS FILE WAS GENERATED BY gen_crc32_multipliers.c. DO NOT EDIT.
* THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.
*/

#define CRC32_X159_MODG 0xae689191 /* x^159 mod G(x) */
Expand Down
2 changes: 1 addition & 1 deletion lib/crc32_tables.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* crc32_tables.h - data tables for CRC-32 computation
*
* THIS FILE WAS GENERATED BY gen_crc32_tables.c. DO NOT EDIT.
* THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.
*/

static const u32 crc32_slice1_table[] MAYBE_UNUSED = {
Expand Down
4 changes: 2 additions & 2 deletions lib/x86/crc32_pclmul_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
* instructions. Note that the x86 crc32 instruction cannot be used, as it is
* for a different polynomial, not the gzip one. For an explanation of CRC
* folding with carryless multiplication instructions, see
* scripts/gen_crc32_multipliers.c and the following blog posts and papers:
* scripts/gen-crc32-consts.py and the following blog posts and papers:
*
* "An alternative exposition of crc32_4k_pclmulqdq"
* https://www.corsix.org/content/alternative-exposition-crc32_4k_pclmulqdq
Expand Down Expand Up @@ -189,7 +189,7 @@ ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
* folding across 128 bits. mults_128b differs from mults_1v when
* VL != 16. All multipliers are 64-bit, to match what pclmulqdq needs,
* but since this is for CRC-32 only their low 32 bits are nonzero.
* For more details, see scripts/gen_crc32_multipliers.c.
* For more details, see scripts/gen-crc32-consts.py.
*/
const vec_t mults_8v = MULTS_8V;
const vec_t mults_4v = MULTS_4V;
Expand Down
158 changes: 158 additions & 0 deletions scripts/gen-crc32-consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/usr/bin/env python3
#
# This script generates constants for efficient computation of the gzip CRC-32.

import sys

# This is the generator polynomial G(x) of the gzip CRC-32, represented as an
# int using the natural mapping between bits and polynomial coefficients.
G = 0x104c11db7

# XOR (add) an iterable of polynomials.
def xor(iterable):
res = 0
for val in iterable:
res ^= val
return res

# Multiply two polynomials.
def clmul(a, b):
return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0)

# Polynomial division floor(a / b).
def div(a, b):
q = 0
while a.bit_length() >= b.bit_length():
q ^= 1 << (a.bit_length() - b.bit_length())
a ^= b << (a.bit_length() - b.bit_length())
return q

# Reduce the polynomial 'a' modulo the polynomial 'b'.
def reduce(a, b):
return a ^ clmul(div(a, b), b)

# Reverse the bits of a polynomial.
def bitreverse(poly, num_bits):
return xor(1 << (num_bits - 1 - i) for i in range(num_bits)
if (poly & (1 << i)) != 0)

# Compute x^d mod G.
def x_to_the_d(d):
if d < G.bit_length() - 1:
return 1 << d
t = x_to_the_d(d//2)
t = clmul(t, t)
if d % 2 != 0:
t <<= 1
return reduce(t, G)

def gen_tables():
print('/*')
print(' * crc32_tables.h - data tables for CRC-32 computation')
print(' *')
print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.')
print(' */')
for n in [1, 8]:
print('')
print(f'static const u32 crc32_slice{n}_table[] MAYBE_UNUSED = {{')
# The i'th table entry is the CRC-32 of the message consisting of byte
# i % 256 followed by i // 256 zero bytes.
polys = [bitreverse(i % 256, 8) << (32 + 8*(i//256)) for i in range(256 * n)]
polys = [bitreverse(reduce(poly, G), 32) for poly in polys]
for i in range(0, len(polys), 4):
print(f'\t0x{polys[i+0]:08x}, 0x{polys[i+1]:08x}, 0x{polys[i+2]:08x}, 0x{polys[i+3]:08x},')
print('};')

# Compute the constant multipliers needed for "folding" over various distances
# with the gzip CRC-32. Each such multiplier is x^d mod G(x) for some distance
# d, in bits, over which the folding is occurring.
#
# Folding works as follows: let A(x) be a polynomial (possibly reduced partially
# or fully mod G(x)) for part of the message, and let B(x) be a polynomial
# (possibly reduced partially or fully mod G(x)) for a later part of the
# message. The unreduced combined polynomial is A(x)*x^d + B(x), where d is the
# number of bits separating the two parts of the message plus len(B(x)). Since
# mod G(x) can be applied at any point, x^d mod G(x) can be precomputed and used
# instead of x^d unreduced. That allows the combined polynomial to be computed
# relatively easily in a partially-reduced form A(x)*(x^d mod G(x)) + B(x), with
# length max(len(A(x)) + 31, len(B(x))). This does require doing a polynomial
# multiplication (carryless multiplication).
#
# "Folding" in this way can be used for the entire CRC computation except the
# final reduction to 32 bits; this works well when CPU support for carryless
# multiplication is available. It can also be used to combine CRCs of different
# parts of the message that were computed using a different method.
#
# Note that the gzip CRC-32 uses bit-reversed polynomials. I.e., the low order
# bits are really the high order polynomial coefficients.
def gen_multipliers():
print('/*')
print(' * crc32_multipliers.h - constants for CRC-32 folding')
print(' *')
print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.')
print(' */')
print('')

# Compute the multipliers needed for CRC-32 folding with carryless
# multiplication instructions that operate on the 64-bit halves of 128-bit
# segments. Using the terminology from earlier, for each 64-bit fold
# len(A(x)) = 64, and len(B(x)) = 95 since a 64-bit polynomial multiplied by
# a 32-bit one produces a 95-bit one. When A(x) is the low order polynomial
# half of a 128-bit segments (high order physical half), the separation
# between the message parts is the total length of the 128-bit segments
# separating the values. When A(x) is the high order polynomial half, the
# separation is 64 bits greater.
for i in range(1, 33):
sep_lo = 128 * (i - 1)
sep_hi = sep_lo + 64
len_B = 95
for d in [sep_hi + len_B, # A(x) = high 64 polynomial bits (low 64 physical bits)
sep_lo + len_B # A(x) = low 64 polynomial bits (high 64 physical bits)
]:
poly = bitreverse(x_to_the_d(d), 32)
print(f'#define CRC32_X{d}_MODG 0x{poly:08x} /* x^{d} mod G(x) */')
print('')

# Compute constants for the final 128 => 32 bit reduction.
poly = bitreverse(div(1 << 95, G), 64)
print(f'#define CRC32_BARRETT_CONSTANT_1 0x{poly:016x}ULL /* floor(x^95 / G(x)) */')
poly = bitreverse(G, 33)
print(f'#define CRC32_BARRETT_CONSTANT_2 0x{poly:016x}ULL /* G(x) */')

# Compute multipliers for combining the CRCs of separate chunks.
print('')
num_chunks = 4
table_len = 129
min_chunk_len = 128
print(f'#define CRC32_NUM_CHUNKS {num_chunks}')
print(f'#define CRC32_MIN_VARIABLE_CHUNK_LEN {min_chunk_len}UL')
print(f'#define CRC32_MAX_VARIABLE_CHUNK_LEN {(table_len-1) * min_chunk_len}UL')
print('')
print('/* Multipliers for implementations that use a variable chunk length */')
print('static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {')
print('\t{ 0 /* unused row */ },')
for i in range(1, table_len):
chunk_len = i * min_chunk_len
print(f'\t/* chunk_len={chunk_len} */')
print('\t{ ', end='')
for j in range(num_chunks - 1, 0, -1):
d = (j * 8 * chunk_len) - 33
poly = bitreverse(x_to_the_d(d), 32)
print(f'0x{poly:08x} /* x^{d} mod G(x) */, ', end='')
print('},')
print('};')
fixed_chunk_len = 32768
print('')
print('/* Multipliers for implementations that use a large fixed chunk length */')
print(f'#define CRC32_FIXED_CHUNK_LEN {fixed_chunk_len}UL')
for j in range(1, num_chunks):
d = (j * 8 * fixed_chunk_len) - 33
poly = bitreverse(x_to_the_d(d), 32)
print(f'#define CRC32_FIXED_CHUNK_MULT_{j} 0x{poly:08x} /* x^{d} mod G(x) */')

with open('lib/crc32_tables.h', 'w') as f:
sys.stdout = f
gen_tables()
with open('lib/crc32_multipliers.h', 'w') as f:
sys.stdout = f
gen_multipliers()
201 changes: 0 additions & 201 deletions scripts/gen_crc32_multipliers.c

This file was deleted.

Loading

0 comments on commit bd57e93

Please sign in to comment.