|
20 | 20 | * Shay Gueron and Vlad Krasnov
|
21 | 21 | * "Fast Prime Field Elliptic Curve Cryptography with 256 Bit Primes"
|
22 | 22 | * http://eprint.iacr.org/2013/816 */
|
23 |
| - |
24 | 23 | #include "ecp_nistz.h"
|
| 24 | +#include "gfp.h" |
25 | 25 |
|
26 | 26 | #if defined(__GNUC__) || defined(__clang__)
|
27 | 27 | #pragma GCC diagnostic push
|
28 | 28 | #pragma GCC diagnostic ignored "-Wsign-conversion"
|
29 | 29 | #endif
|
30 | 30 |
|
| 31 | +#define point_add(prefix, bits) RENAME_FUNC(prefix, bits, point_add) |
| 32 | +#define point_double(prefix, bits) RENAME_FUNC(prefix, bits, point_double) |
| 33 | +#define point_mul(prefix, bits) RENAME_FUNC(prefix, bits, point_mul) |
| 34 | + |
31 | 35 | /* Point double: r = 2*a */
|
32 |
| -static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) { |
33 |
| - BN_ULONG S[P384_LIMBS]; |
34 |
| - BN_ULONG M[P384_LIMBS]; |
35 |
| - BN_ULONG Zsqr[P384_LIMBS]; |
36 |
| - BN_ULONG tmp0[P384_LIMBS]; |
| 36 | +static void point_double(nistz, BITS)(NIST_POINT *r, const NIST_POINT *a) { |
| 37 | + BN_ULONG S[FE_LIMBS]; |
| 38 | + BN_ULONG M[FE_LIMBS]; |
| 39 | + BN_ULONG Zsqr[FE_LIMBS]; |
| 40 | + BN_ULONG tmp0[FE_LIMBS]; |
37 | 41 |
|
38 | 42 | const BN_ULONG *in_x = a->X;
|
39 | 43 | const BN_ULONG *in_y = a->Y;
|
@@ -74,20 +78,20 @@ static void nistz384_point_double(P384_POINT *r, const P384_POINT *a) {
|
74 | 78 | }
|
75 | 79 |
|
76 | 80 | /* Point addition: r = a+b */
|
77 |
| -static void nistz384_point_add(P384_POINT *r, const P384_POINT *a, |
78 |
| - const P384_POINT *b) { |
79 |
| - BN_ULONG U2[P384_LIMBS], S2[P384_LIMBS]; |
80 |
| - BN_ULONG U1[P384_LIMBS], S1[P384_LIMBS]; |
81 |
| - BN_ULONG Z1sqr[P384_LIMBS]; |
82 |
| - BN_ULONG Z2sqr[P384_LIMBS]; |
83 |
| - BN_ULONG H[P384_LIMBS], R[P384_LIMBS]; |
84 |
| - BN_ULONG Hsqr[P384_LIMBS]; |
85 |
| - BN_ULONG Rsqr[P384_LIMBS]; |
86 |
| - BN_ULONG Hcub[P384_LIMBS]; |
87 |
| - |
88 |
| - BN_ULONG res_x[P384_LIMBS]; |
89 |
| - BN_ULONG res_y[P384_LIMBS]; |
90 |
| - BN_ULONG res_z[P384_LIMBS]; |
| 81 | +static void point_add(nistz, BITS)(NIST_POINT *r, const NIST_POINT *a, |
| 82 | + const NIST_POINT *b) { |
| 83 | + BN_ULONG U2[FE_LIMBS], S2[FE_LIMBS]; |
| 84 | + BN_ULONG U1[FE_LIMBS], S1[FE_LIMBS]; |
| 85 | + BN_ULONG Z1sqr[FE_LIMBS]; |
| 86 | + BN_ULONG Z2sqr[FE_LIMBS]; |
| 87 | + BN_ULONG H[FE_LIMBS], R[FE_LIMBS]; |
| 88 | + BN_ULONG Hsqr[FE_LIMBS]; |
| 89 | + BN_ULONG Rsqr[FE_LIMBS]; |
| 90 | + BN_ULONG Hcub[FE_LIMBS]; |
| 91 | + |
| 92 | + BN_ULONG res_x[FE_LIMBS]; |
| 93 | + BN_ULONG res_y[FE_LIMBS]; |
| 94 | + BN_ULONG res_z[FE_LIMBS]; |
91 | 95 |
|
92 | 96 | const BN_ULONG *in1_x = a->X;
|
93 | 97 | const BN_ULONG *in1_y = a->Y;
|
@@ -117,11 +121,11 @@ static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
|
117 | 121 | BN_ULONG is_exceptional = is_equal(U1, U2) & ~in1infty & ~in2infty;
|
118 | 122 | if (is_exceptional) {
|
119 | 123 | if (is_equal(S1, S2)) {
|
120 |
| - nistz384_point_double(r, a); |
| 124 | + point_double(nistz, BITS)(r, a); |
121 | 125 | } else {
|
122 |
| - limbs_zero(r->X, P384_LIMBS); |
123 |
| - limbs_zero(r->Y, P384_LIMBS); |
124 |
| - limbs_zero(r->Z, P384_LIMBS); |
| 126 | + limbs_zero(r->X, FE_LIMBS); |
| 127 | + limbs_zero(r->Y, FE_LIMBS); |
| 128 | + limbs_zero(r->Z, FE_LIMBS); |
125 | 129 | }
|
126 | 130 | return;
|
127 | 131 | }
|
@@ -152,147 +156,136 @@ static void nistz384_point_add(P384_POINT *r, const P384_POINT *a,
|
152 | 156 | copy_conditional(res_y, in1_y, in2infty);
|
153 | 157 | copy_conditional(res_z, in1_z, in2infty);
|
154 | 158 |
|
155 |
| - limbs_copy(r->X, res_x, P384_LIMBS); |
156 |
| - limbs_copy(r->Y, res_y, P384_LIMBS); |
157 |
| - limbs_copy(r->Z, res_z, P384_LIMBS); |
| 159 | + limbs_copy(r->X, res_x, FE_LIMBS); |
| 160 | + limbs_copy(r->Y, res_y, FE_LIMBS); |
| 161 | + limbs_copy(r->Z, res_z, FE_LIMBS); |
158 | 162 | }
|
159 | 163 |
|
160 |
| -static void add_precomputed_w5(P384_POINT *r, crypto_word_t wvalue, |
161 |
| - const P384_POINT table[16]) { |
| 164 | +static void add_precomputed_w(NIST_POINT *r, crypto_word_t wvalue, |
| 165 | + const NIST_POINT table[TBL_SZ]) { |
162 | 166 | crypto_word_t recoded_is_negative;
|
163 | 167 | crypto_word_t recoded;
|
164 |
| - booth_recode(&recoded_is_negative, &recoded, wvalue, 5); |
| 168 | + booth_recode(&recoded_is_negative, &recoded, wvalue, W_BITS); |
165 | 169 |
|
166 |
| - alignas(64) P384_POINT h; |
167 |
| - p384_point_select_w5(&h, table, recoded); |
| 170 | + alignas(64) NIST_POINT h; |
| 171 | + NIST_POINT_select_w(&h, table, recoded); |
168 | 172 |
|
169 |
| - alignas(64) BN_ULONG tmp[P384_LIMBS]; |
170 |
| - p384_elem_neg(tmp, h.Y); |
| 173 | + alignas(64) BN_ULONG tmp[FE_LIMBS]; |
| 174 | + elem_neg(tmp, h.Y); |
171 | 175 | copy_conditional(h.Y, tmp, recoded_is_negative);
|
172 | 176 |
|
173 |
| - nistz384_point_add(r, r, &h); |
| 177 | + point_add(nistz, BITS)(r, r, &h); |
174 | 178 | }
|
175 | 179 |
|
176 | 180 | /* r = p * p_scalar */
|
177 |
| -static void nistz384_point_mul(P384_POINT *r, |
178 |
| - const BN_ULONG p_scalar[P384_LIMBS], |
179 |
| - const Limb p_x[P384_LIMBS], |
180 |
| - const Limb p_y[P384_LIMBS]) { |
181 |
| - static const size_t kWindowSize = 5; |
182 |
| - static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1; |
183 |
| - |
184 |
| - uint8_t p_str[(P384_LIMBS * sizeof(Limb)) + 1]; |
| 181 | +static void point_mul(nistz, BITS)(NIST_POINT *r, const BN_ULONG p_scalar[FE_LIMBS], |
| 182 | + const BN_ULONG p_x[FE_LIMBS], |
| 183 | + const BN_ULONG p_y[FE_LIMBS]) { |
| 184 | + uint8_t p_str[(FE_LIMBS * sizeof(Limb)) + 1]; |
185 | 185 | little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
|
186 |
| - p_scalar, P384_LIMBS); |
| 186 | + p_scalar, FE_LIMBS); |
187 | 187 |
|
188 |
| - /* A |P384_POINT| is (3 * 48) = 144 bytes, and the 64-byte alignment should |
| 188 | + /* A |NIST_POINT| is (3 * 48) = 144 bytes, and the 64-byte alignment should |
189 | 189 | * add no more than 63 bytes of overhead. Thus, |table| should require
|
190 | 190 | * ~2367 ((144 * 16) + 63) bytes of stack space. */
|
191 |
| - alignas(64) P384_POINT table[16]; |
| 191 | + alignas(64) NIST_POINT table[TBL_SZ]; |
192 | 192 |
|
193 | 193 | /* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
|
194 | 194 | * not stored. All other values are actually stored with an offset of -1 in
|
195 | 195 | * table. */
|
196 |
| - P384_POINT *row = table; |
197 |
| - |
198 |
| - limbs_copy(row[1 - 1].X, p_x, P384_LIMBS); |
199 |
| - limbs_copy(row[1 - 1].Y, p_y, P384_LIMBS); |
200 |
| - limbs_copy(row[1 - 1].Z, ONE, P384_LIMBS); |
201 |
| - |
202 |
| - nistz384_point_double(&row[2 - 1], &row[1 - 1]); |
203 |
| - nistz384_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]); |
204 |
| - nistz384_point_double(&row[4 - 1], &row[2 - 1]); |
205 |
| - nistz384_point_double(&row[6 - 1], &row[3 - 1]); |
206 |
| - nistz384_point_double(&row[8 - 1], &row[4 - 1]); |
207 |
| - nistz384_point_double(&row[12 - 1], &row[6 - 1]); |
208 |
| - nistz384_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]); |
209 |
| - nistz384_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]); |
210 |
| - nistz384_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]); |
211 |
| - nistz384_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]); |
212 |
| - nistz384_point_double(&row[14 - 1], &row[7 - 1]); |
213 |
| - nistz384_point_double(&row[10 - 1], &row[5 - 1]); |
214 |
| - nistz384_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]); |
215 |
| - nistz384_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]); |
216 |
| - nistz384_point_double(&row[16 - 1], &row[8 - 1]); |
217 |
| - |
218 |
| - static const size_t START_INDEX = 384 - 4; |
| 196 | + NIST_POINT *row = table; |
| 197 | + |
| 198 | + limbs_copy(row[0].X, p_x, FE_LIMBS); |
| 199 | + limbs_copy(row[0].Y, p_y, FE_LIMBS); |
| 200 | + limbs_copy(row[0].Z, ONE, FE_LIMBS); |
| 201 | + |
| 202 | + point_double(nistz, BITS)(&row[1], &row[0]); |
| 203 | + |
| 204 | + for (int i = 2; i < TBL_SZ; i += 2) { |
| 205 | + point_add(nistz, BITS)(&row[i], &row[i - 1], &row[0]); |
| 206 | + point_double(nistz, BITS)(&row[i + 1], &row[i / 2]); |
| 207 | + } |
| 208 | + |
| 209 | + static const size_t ROUND_SIZE = (BITS + W_BITS - 1) / W_BITS * W_BITS; |
| 210 | + static const size_t START_INDEX = ROUND_SIZE == BITS + 1 ? ROUND_SIZE - W_BITS: ROUND_SIZE; |
219 | 211 | size_t index = START_INDEX;
|
220 | 212 |
|
221 | 213 | BN_ULONG recoded_is_negative;
|
222 | 214 | crypto_word_t recoded;
|
223 | 215 |
|
224 | 216 | crypto_word_t wvalue = p_str[(index - 1) / 8];
|
225 |
| - wvalue = (wvalue >> ((index - 1) % 8)) & kMask; |
| 217 | + wvalue = (wvalue >> ((index - 1) % 8)) & W_MASK; |
226 | 218 |
|
227 |
| - booth_recode(&recoded_is_negative, &recoded, wvalue, 5); |
| 219 | + booth_recode(&recoded_is_negative, &recoded, wvalue, W_BITS); |
228 | 220 | dev_assert_secret(!recoded_is_negative);
|
229 | 221 |
|
230 |
| - p384_point_select_w5(r, table, recoded); |
| 222 | + NIST_POINT_select_w(r, table, recoded); |
231 | 223 |
|
232 |
| - while (index >= kWindowSize) { |
| 224 | + while (index >= W_BITS) { |
233 | 225 | if (index != START_INDEX) {
|
234 | 226 | size_t off = (index - 1) / 8;
|
235 | 227 |
|
236 | 228 | wvalue = p_str[off] | p_str[off + 1] << 8;
|
237 |
| - wvalue = (wvalue >> ((index - 1) % 8)) & kMask; |
238 |
| - add_precomputed_w5(r, wvalue, table); |
| 229 | + wvalue = (wvalue >> ((index - 1) % 8)) & W_MASK; |
| 230 | + add_precomputed_w(r, wvalue, table); |
239 | 231 | }
|
240 | 232 |
|
241 |
| - index -= kWindowSize; |
| 233 | + index -= W_BITS; |
242 | 234 |
|
243 |
| - nistz384_point_double(r, r); |
244 |
| - nistz384_point_double(r, r); |
245 |
| - nistz384_point_double(r, r); |
246 |
| - nistz384_point_double(r, r); |
247 |
| - nistz384_point_double(r, r); |
| 235 | + for (int i = 0; i < W_BITS; i++) { |
| 236 | + point_double(nistz, BITS)(r, r); |
| 237 | + } |
248 | 238 | }
|
249 | 239 |
|
250 | 240 | /* Final window */
|
251 | 241 | wvalue = p_str[0];
|
252 |
| - wvalue = (wvalue << 1) & kMask; |
253 |
| - add_precomputed_w5(r, wvalue, table); |
| 242 | + wvalue = (wvalue << 1) & W_MASK; |
| 243 | + add_precomputed_w(r, wvalue, table); |
254 | 244 | }
|
255 | 245 |
|
256 |
| -void p384_point_double(Limb r[3][P384_LIMBS], const Limb a[3][P384_LIMBS]) |
| 246 | +void point_double(p, BITS)(Limb r[3][FE_LIMBS], const Limb a[3][FE_LIMBS]) |
257 | 247 | {
|
258 |
| - P384_POINT t; |
259 |
| - limbs_copy(t.X, a[0], P384_LIMBS); |
260 |
| - limbs_copy(t.Y, a[1], P384_LIMBS); |
261 |
| - limbs_copy(t.Z, a[2], P384_LIMBS); |
262 |
| - nistz384_point_double(&t, &t); |
263 |
| - limbs_copy(r[0], t.X, P384_LIMBS); |
264 |
| - limbs_copy(r[1], t.Y, P384_LIMBS); |
265 |
| - limbs_copy(r[2], t.Z, P384_LIMBS); |
| 248 | + NIST_POINT t; |
| 249 | + limbs_copy(t.X, a[0], FE_LIMBS); |
| 250 | + limbs_copy(t.Y, a[1], FE_LIMBS); |
| 251 | + limbs_copy(t.Z, a[2], FE_LIMBS); |
| 252 | + point_double(nistz, BITS)(&t, &t); |
| 253 | + limbs_copy(r[0], t.X, FE_LIMBS); |
| 254 | + limbs_copy(r[1], t.Y, FE_LIMBS); |
| 255 | + limbs_copy(r[2], t.Z, FE_LIMBS); |
266 | 256 | }
|
267 | 257 |
|
268 |
| -void p384_point_add(Limb r[3][P384_LIMBS], |
269 |
| - const Limb a[3][P384_LIMBS], |
270 |
| - const Limb b[3][P384_LIMBS]) |
| 258 | +void point_add(p, BITS)(Limb r[3][FE_LIMBS], |
| 259 | + const Limb a[3][FE_LIMBS], |
| 260 | + const Limb b[3][FE_LIMBS]) |
271 | 261 | {
|
272 |
| - P384_POINT t1; |
273 |
| - limbs_copy(t1.X, a[0], P384_LIMBS); |
274 |
| - limbs_copy(t1.Y, a[1], P384_LIMBS); |
275 |
| - limbs_copy(t1.Z, a[2], P384_LIMBS); |
| 262 | + NIST_POINT t1; |
| 263 | + limbs_copy(t1.X, a[0], FE_LIMBS); |
| 264 | + limbs_copy(t1.Y, a[1], FE_LIMBS); |
| 265 | + limbs_copy(t1.Z, a[2], FE_LIMBS); |
276 | 266 |
|
277 |
| - P384_POINT t2; |
278 |
| - limbs_copy(t2.X, b[0], P384_LIMBS); |
279 |
| - limbs_copy(t2.Y, b[1], P384_LIMBS); |
280 |
| - limbs_copy(t2.Z, b[2], P384_LIMBS); |
| 267 | + NIST_POINT t2; |
| 268 | + limbs_copy(t2.X, b[0], FE_LIMBS); |
| 269 | + limbs_copy(t2.Y, b[1], FE_LIMBS); |
| 270 | + limbs_copy(t2.Z, b[2], FE_LIMBS); |
281 | 271 |
|
282 |
| - nistz384_point_add(&t1, &t1, &t2); |
| 272 | + point_add(nistz, BITS)(&t1, &t1, &t2); |
283 | 273 |
|
284 |
| - limbs_copy(r[0], t1.X, P384_LIMBS); |
285 |
| - limbs_copy(r[1], t1.Y, P384_LIMBS); |
286 |
| - limbs_copy(r[2], t1.Z, P384_LIMBS); |
| 274 | + limbs_copy(r[0], t1.X, FE_LIMBS); |
| 275 | + limbs_copy(r[1], t1.Y, FE_LIMBS); |
| 276 | + limbs_copy(r[2], t1.Z, FE_LIMBS); |
287 | 277 | }
|
288 | 278 |
|
289 |
| -void p384_point_mul(Limb r[3][P384_LIMBS], const BN_ULONG p_scalar[P384_LIMBS], |
290 |
| - const Limb p_x[P384_LIMBS], const Limb p_y[P384_LIMBS]) { |
291 |
| - alignas(64) P384_POINT acc; |
292 |
| - nistz384_point_mul(&acc, p_scalar, p_x, p_y); |
293 |
| - limbs_copy(r[0], acc.X, P384_LIMBS); |
294 |
| - limbs_copy(r[1], acc.Y, P384_LIMBS); |
295 |
| - limbs_copy(r[2], acc.Z, P384_LIMBS); |
| 279 | +void point_mul(p, BITS)(Limb r[3][FE_LIMBS], |
| 280 | + const BN_ULONG p_scalar[FE_LIMBS], |
| 281 | + const Limb p_x[FE_LIMBS], |
| 282 | + const Limb p_y[FE_LIMBS]) |
| 283 | +{ |
| 284 | + alignas(64) NIST_POINT acc; |
| 285 | + point_mul(nistz, BITS)(&acc, p_scalar, p_x, p_y); |
| 286 | + limbs_copy(r[0], acc.X, FE_LIMBS); |
| 287 | + limbs_copy(r[1], acc.Y, FE_LIMBS); |
| 288 | + limbs_copy(r[2], acc.Z, FE_LIMBS); |
296 | 289 | }
|
297 | 290 |
|
298 | 291 | #if defined(__GNUC__) || defined(__clang__)
|
|
0 commit comments