Skip to content

Commit cd5bb02

Browse files
committed
ec: make P384 code a little bit more generic
This change makes it easier to reuse the P384 code which is quite generic already. No algorithmic changes are made, only some code is shuffled around. This prepares the ground for P521 implementation.
1 parent 7f8fb38 commit cd5bb02

File tree

6 files changed

+352
-330
lines changed

6 files changed

+352
-330
lines changed

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ include = [
6969
"crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl",
7070
"crypto/fipsmodule/ec/ecp_nistz.c",
7171
"crypto/fipsmodule/ec/ecp_nistz.h",
72-
"crypto/fipsmodule/ec/ecp_nistz384.h",
73-
"crypto/fipsmodule/ec/ecp_nistz384.inl",
72+
"crypto/fipsmodule/ec/ecp_nistz.inl",
73+
"crypto/fipsmodule/ec/gfp.h",
7474
"crypto/fipsmodule/ec/gfp_p256.c",
7575
"crypto/fipsmodule/ec/gfp_p384.c",
7676
"crypto/fipsmodule/ec/p256.c",

crypto/fipsmodule/ec/ecp_nistz384.inl crypto/fipsmodule/ec/ecp_nistz.inl

+107-114
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,24 @@
2020
* Shay Gueron and Vlad Krasnov
2121
* "Fast Prime Field Elliptic Curve Cryptography with 256 Bit Primes"
2222
* http://eprint.iacr.org/2013/816 */
23-
2423
#include "ecp_nistz.h"
24+
#include "gfp.h"
2525

2626
#if defined(__GNUC__) || defined(__clang__)
2727
#pragma GCC diagnostic push
2828
#pragma GCC diagnostic ignored "-Wsign-conversion"
2929
#endif
3030

31+
#define point_add(prefix, bits) RENAME_FUNC(prefix, bits, point_add)
32+
#define point_double(prefix, bits) RENAME_FUNC(prefix, bits, point_double)
33+
#define point_mul(prefix, bits) RENAME_FUNC(prefix, bits, point_mul)
34+
3135
/* Point double: r = 2*a */
32-
static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) {
33-
BN_ULONG S[P384_LIMBS];
34-
BN_ULONG M[P384_LIMBS];
35-
BN_ULONG Zsqr[P384_LIMBS];
36-
BN_ULONG tmp0[P384_LIMBS];
36+
static void point_double(nistz, BITS)(NIST_POINT *r, const NIST_POINT *a) {
37+
BN_ULONG S[FE_LIMBS];
38+
BN_ULONG M[FE_LIMBS];
39+
BN_ULONG Zsqr[FE_LIMBS];
40+
BN_ULONG tmp0[FE_LIMBS];
3741

3842
const BN_ULONG *in_x = a->X;
3943
const BN_ULONG *in_y = a->Y;
@@ -74,20 +78,20 @@ static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) {
7478
}
7579

7680
/* Point addition: r = a+b */
77-
static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
78-
const P384_POINT *b) {
79-
BN_ULONG U2[P384_LIMBS], S2[P384_LIMBS];
80-
BN_ULONG U1[P384_LIMBS], S1[P384_LIMBS];
81-
BN_ULONG Z1sqr[P384_LIMBS];
82-
BN_ULONG Z2sqr[P384_LIMBS];
83-
BN_ULONG H[P384_LIMBS], R[P384_LIMBS];
84-
BN_ULONG Hsqr[P384_LIMBS];
85-
BN_ULONG Rsqr[P384_LIMBS];
86-
BN_ULONG Hcub[P384_LIMBS];
87-
88-
BN_ULONG res_x[P384_LIMBS];
89-
BN_ULONG res_y[P384_LIMBS];
90-
BN_ULONG res_z[P384_LIMBS];
81+
static void point_add(nistz, BITS)(NIST_POINT *r, const NIST_POINT *a,
82+
const NIST_POINT *b) {
83+
BN_ULONG U2[FE_LIMBS], S2[FE_LIMBS];
84+
BN_ULONG U1[FE_LIMBS], S1[FE_LIMBS];
85+
BN_ULONG Z1sqr[FE_LIMBS];
86+
BN_ULONG Z2sqr[FE_LIMBS];
87+
BN_ULONG H[FE_LIMBS], R[FE_LIMBS];
88+
BN_ULONG Hsqr[FE_LIMBS];
89+
BN_ULONG Rsqr[FE_LIMBS];
90+
BN_ULONG Hcub[FE_LIMBS];
91+
92+
BN_ULONG res_x[FE_LIMBS];
93+
BN_ULONG res_y[FE_LIMBS];
94+
BN_ULONG res_z[FE_LIMBS];
9195

9296
const BN_ULONG *in1_x = a->X;
9397
const BN_ULONG *in1_y = a->Y;
@@ -117,11 +121,11 @@ static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
117121
BN_ULONG is_exceptional = is_equal(U1, U2) & ~in1infty & ~in2infty;
118122
if (is_exceptional) {
119123
if (is_equal(S1, S2)) {
120-
nistz384_point_double(r, a);
124+
point_double(nistz, BITS)(r, a);
121125
} else {
122-
limbs_zero(r->X, P384_LIMBS);
123-
limbs_zero(r->Y, P384_LIMBS);
124-
limbs_zero(r->Z, P384_LIMBS);
126+
limbs_zero(r->X, FE_LIMBS);
127+
limbs_zero(r->Y, FE_LIMBS);
128+
limbs_zero(r->Z, FE_LIMBS);
125129
}
126130
return;
127131
}
@@ -152,147 +156,136 @@ static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
152156
copy_conditional(res_y, in1_y, in2infty);
153157
copy_conditional(res_z, in1_z, in2infty);
154158

155-
limbs_copy(r->X, res_x, P384_LIMBS);
156-
limbs_copy(r->Y, res_y, P384_LIMBS);
157-
limbs_copy(r->Z, res_z, P384_LIMBS);
159+
limbs_copy(r->X, res_x, FE_LIMBS);
160+
limbs_copy(r->Y, res_y, FE_LIMBS);
161+
limbs_copy(r->Z, res_z, FE_LIMBS);
158162
}
159163

160-
static void add_precomputed_w5(P384_POINT *r, crypto_word_t wvalue,
161-
const P384_POINT table[16]) {
164+
static void add_precomputed_w(NIST_POINT *r, crypto_word_t wvalue,
165+
const NIST_POINT table[TBL_SZ]) {
162166
crypto_word_t recoded_is_negative;
163167
crypto_word_t recoded;
164-
booth_recode(&recoded_is_negative, &recoded, wvalue, 5);
168+
booth_recode(&recoded_is_negative, &recoded, wvalue, W_BITS);
165169

166-
alignas(64) P384_POINT h;
167-
p384_point_select_w5(&h, table, recoded);
170+
alignas(64) NIST_POINT h;
171+
NIST_POINT_select_w(&h, table, recoded);
168172

169-
alignas(64) BN_ULONG tmp[P384_LIMBS];
170-
p384_elem_neg(tmp, h.Y);
173+
alignas(64) BN_ULONG tmp[FE_LIMBS];
174+
elem_neg(tmp, h.Y);
171175
copy_conditional(h.Y, tmp, recoded_is_negative);
172176

173-
nistz384_point_add(r, r, &h);
177+
point_add(nistz, BITS)(r, r, &h);
174178
}
175179

176180
/* r = p * p_scalar */
177-
static void nistz384_point_mul(P384_POINT *r,
178-
const BN_ULONG p_scalar[P384_LIMBS],
179-
const Limb p_x[P384_LIMBS],
180-
const Limb p_y[P384_LIMBS]) {
181-
static const size_t kWindowSize = 5;
182-
static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
183-
184-
uint8_t p_str[(P384_LIMBS * sizeof(Limb)) + 1];
181+
static void point_mul(nistz, BITS)(NIST_POINT *r, const BN_ULONG p_scalar[FE_LIMBS],
182+
const BN_ULONG p_x[FE_LIMBS],
183+
const BN_ULONG p_y[FE_LIMBS]) {
184+
uint8_t p_str[(FE_LIMBS * sizeof(Limb)) + 1];
185185
little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
186-
p_scalar, P384_LIMBS);
186+
p_scalar, FE_LIMBS);
187187

188-
/* A |P384_POINT| is (3 * 48) = 144 bytes, and the 64-byte alignment should
188+
/* A |NIST_POINT| is (3 * 48) = 144 bytes, and the 64-byte alignment should
189189
* add no more than 63 bytes of overhead. Thus, |table| should require
190190
* ~2367 ((144 * 16) + 63) bytes of stack space. */
191-
alignas(64) P384_POINT table[16];
191+
alignas(64) NIST_POINT table[TBL_SZ];
192192

193193
/* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
194194
* not stored. All other values are actually stored with an offset of -1 in
195195
* table. */
196-
P384_POINT *row = table;
197-
198-
limbs_copy(row[1 - 1].X, p_x, P384_LIMBS);
199-
limbs_copy(row[1 - 1].Y, p_y, P384_LIMBS);
200-
limbs_copy(row[1 - 1].Z, ONE, P384_LIMBS);
201-
202-
nistz384_point_double(&row[2 - 1], &row[1 - 1]);
203-
nistz384_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
204-
nistz384_point_double(&row[4 - 1], &row[2 - 1]);
205-
nistz384_point_double(&row[6 - 1], &row[3 - 1]);
206-
nistz384_point_double(&row[8 - 1], &row[4 - 1]);
207-
nistz384_point_double(&row[12 - 1], &row[6 - 1]);
208-
nistz384_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
209-
nistz384_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
210-
nistz384_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
211-
nistz384_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
212-
nistz384_point_double(&row[14 - 1], &row[7 - 1]);
213-
nistz384_point_double(&row[10 - 1], &row[5 - 1]);
214-
nistz384_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
215-
nistz384_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
216-
nistz384_point_double(&row[16 - 1], &row[8 - 1]);
217-
218-
static const size_t START_INDEX = 384 - 4;
196+
NIST_POINT *row = table;
197+
198+
limbs_copy(row[0].X, p_x, FE_LIMBS);
199+
limbs_copy(row[0].Y, p_y, FE_LIMBS);
200+
limbs_copy(row[0].Z, ONE, FE_LIMBS);
201+
202+
point_double(nistz, BITS)(&row[1], &row[0]);
203+
204+
for (int i = 2; i < TBL_SZ; i += 2) {
205+
point_add(nistz, BITS)(&row[i], &row[i - 1], &row[0]);
206+
point_double(nistz, BITS)(&row[i + 1], &row[i / 2]);
207+
}
208+
209+
static const size_t ROUND_SIZE = (BITS + W_BITS - 1) / W_BITS * W_BITS;
210+
static const size_t START_INDEX = ROUND_SIZE == BITS + 1 ? ROUND_SIZE - W_BITS: ROUND_SIZE;
219211
size_t index = START_INDEX;
220212

221213
BN_ULONG recoded_is_negative;
222214
crypto_word_t recoded;
223215

224216
crypto_word_t wvalue = p_str[(index - 1) / 8];
225-
wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
217+
wvalue = (wvalue >> ((index - 1) % 8)) & W_MASK;
226218

227-
booth_recode(&recoded_is_negative, &recoded, wvalue, 5);
219+
booth_recode(&recoded_is_negative, &recoded, wvalue, W_BITS);
228220
dev_assert_secret(!recoded_is_negative);
229221

230-
p384_point_select_w5(r, table, recoded);
222+
NIST_POINT_select_w(r, table, recoded);
231223

232-
while (index >= kWindowSize) {
224+
while (index >= W_BITS) {
233225
if (index != START_INDEX) {
234226
size_t off = (index - 1) / 8;
235227

236228
wvalue = p_str[off] | p_str[off + 1] << 8;
237-
wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
238-
add_precomputed_w5(r, wvalue, table);
229+
wvalue = (wvalue >> ((index - 1) % 8)) & W_MASK;
230+
add_precomputed_w(r, wvalue, table);
239231
}
240232

241-
index -= kWindowSize;
233+
index -= W_BITS;
242234

243-
nistz384_point_double(r, r);
244-
nistz384_point_double(r, r);
245-
nistz384_point_double(r, r);
246-
nistz384_point_double(r, r);
247-
nistz384_point_double(r, r);
235+
for (int i = 0; i < W_BITS; i++) {
236+
point_double(nistz, BITS)(r, r);
237+
}
248238
}
249239

250240
/* Final window */
251241
wvalue = p_str[0];
252-
wvalue = (wvalue << 1) & kMask;
253-
add_precomputed_w5(r, wvalue, table);
242+
wvalue = (wvalue << 1) & W_MASK;
243+
add_precomputed_w(r, wvalue, table);
254244
}
255245

256-
void p384_point_double(Limb r[3][P384_LIMBS], const Limb a[3][P384_LIMBS])
246+
void point_double(p, BITS)(Limb r[3][FE_LIMBS], const Limb a[3][FE_LIMBS])
257247
{
258-
P384_POINT t;
259-
limbs_copy(t.X, a[0], P384_LIMBS);
260-
limbs_copy(t.Y, a[1], P384_LIMBS);
261-
limbs_copy(t.Z, a[2], P384_LIMBS);
262-
nistz384_point_double(&t, &t);
263-
limbs_copy(r[0], t.X, P384_LIMBS);
264-
limbs_copy(r[1], t.Y, P384_LIMBS);
265-
limbs_copy(r[2], t.Z, P384_LIMBS);
248+
NIST_POINT t;
249+
limbs_copy(t.X, a[0], FE_LIMBS);
250+
limbs_copy(t.Y, a[1], FE_LIMBS);
251+
limbs_copy(t.Z, a[2], FE_LIMBS);
252+
point_double(nistz, BITS)(&t, &t);
253+
limbs_copy(r[0], t.X, FE_LIMBS);
254+
limbs_copy(r[1], t.Y, FE_LIMBS);
255+
limbs_copy(r[2], t.Z, FE_LIMBS);
266256
}
267257

268-
void p384_point_add(Limb r[3][P384_LIMBS],
269-
const Limb a[3][P384_LIMBS],
270-
const Limb b[3][P384_LIMBS])
258+
void point_add(p, BITS)(Limb r[3][FE_LIMBS],
259+
const Limb a[3][FE_LIMBS],
260+
const Limb b[3][FE_LIMBS])
271261
{
272-
P384_POINT t1;
273-
limbs_copy(t1.X, a[0], P384_LIMBS);
274-
limbs_copy(t1.Y, a[1], P384_LIMBS);
275-
limbs_copy(t1.Z, a[2], P384_LIMBS);
262+
NIST_POINT t1;
263+
limbs_copy(t1.X, a[0], FE_LIMBS);
264+
limbs_copy(t1.Y, a[1], FE_LIMBS);
265+
limbs_copy(t1.Z, a[2], FE_LIMBS);
276266

277-
P384_POINT t2;
278-
limbs_copy(t2.X, b[0], P384_LIMBS);
279-
limbs_copy(t2.Y, b[1], P384_LIMBS);
280-
limbs_copy(t2.Z, b[2], P384_LIMBS);
267+
NIST_POINT t2;
268+
limbs_copy(t2.X, b[0], FE_LIMBS);
269+
limbs_copy(t2.Y, b[1], FE_LIMBS);
270+
limbs_copy(t2.Z, b[2], FE_LIMBS);
281271

282-
nistz384_point_add(&t1, &t1, &t2);
272+
point_add(nistz, BITS)(&t1, &t1, &t2);
283273

284-
limbs_copy(r[0], t1.X, P384_LIMBS);
285-
limbs_copy(r[1], t1.Y, P384_LIMBS);
286-
limbs_copy(r[2], t1.Z, P384_LIMBS);
274+
limbs_copy(r[0], t1.X, FE_LIMBS);
275+
limbs_copy(r[1], t1.Y, FE_LIMBS);
276+
limbs_copy(r[2], t1.Z, FE_LIMBS);
287277
}
288278

289-
void p384_point_mul(Limb r[3][P384_LIMBS], const BN_ULONG p_scalar[P384_LIMBS],
290-
const Limb p_x[P384_LIMBS], const Limb p_y[P384_LIMBS]) {
291-
alignas(64) P384_POINT acc;
292-
nistz384_point_mul(&acc, p_scalar, p_x, p_y);
293-
limbs_copy(r[0], acc.X, P384_LIMBS);
294-
limbs_copy(r[1], acc.Y, P384_LIMBS);
295-
limbs_copy(r[2], acc.Z, P384_LIMBS);
279+
void point_mul(p, BITS)(Limb r[3][FE_LIMBS],
280+
const BN_ULONG p_scalar[FE_LIMBS],
281+
const Limb p_x[FE_LIMBS],
282+
const Limb p_y[FE_LIMBS])
283+
{
284+
alignas(64) NIST_POINT acc;
285+
point_mul(nistz, BITS)(&acc, p_scalar, p_x, p_y);
286+
limbs_copy(r[0], acc.X, FE_LIMBS);
287+
limbs_copy(r[1], acc.Y, FE_LIMBS);
288+
limbs_copy(r[2], acc.Z, FE_LIMBS);
296289
}
297290

298291
#if defined(__GNUC__) || defined(__clang__)

crypto/fipsmodule/ec/ecp_nistz384.h

-34
This file was deleted.

0 commit comments

Comments
 (0)