22#ifndef SECP256K1_FIELD_52_IMPL_HPP
23#define SECP256K1_FIELD_52_IMPL_HPP
31#if defined(__SIZEOF_INT128__)
35#pragma GCC diagnostic push
36#pragma GCC diagnostic ignored "-Wpedantic"
49#if defined(__riscv) && (__riscv_xlen == 64) && defined(SECP256K1_HAS_RISCV_FE52_ASM) \
50 && !defined(SECP256K1_RISCV_FE52_DISABLE)
51 #define SECP256K1_RISCV_FE52_V1 1
53 void fe52_mul_inner_riscv64(std::uint64_t* r,
const std::uint64_t* a,
const std::uint64_t* b);
54 void fe52_sqr_inner_riscv64(std::uint64_t* r,
const std::uint64_t* a);
64#if defined(SECP256K1_HAS_ASM) && (defined(__x86_64__) || defined(_M_X64))
65 #define SECP256K1_HYBRID_4X64_ACTIVE 1
67 extern "C" __attribute__((sysv_abi))
void field_mul_full_asm(
68 const std::uint64_t* a,
const std::uint64_t* b, std::uint64_t* result);
69 extern "C" __attribute__((sysv_abi))
void field_sqr_full_asm(
70 const std::uint64_t* a, std::uint64_t* result);
73 void field_mul_full_asm(
74 const std::uint64_t* a,
const std::uint64_t* b, std::uint64_t* result);
75 void field_sqr_full_asm(
76 const std::uint64_t* a, std::uint64_t* result);
83#if defined(__GNUC__) || defined(__clang__)
84 #define SECP256K1_FE52_FORCE_INLINE __attribute__((always_inline)) inline
85#elif defined(_MSC_VER)
86 #define SECP256K1_FE52_FORCE_INLINE __forceinline
88 #define SECP256K1_FE52_FORCE_INLINE inline
92#if defined(SECP256K1_HYBRID_4X64_ACTIVE)
95 SECP256K1_FE52_FORCE_INLINE
96 void fe52_normalize_and_pack_4x64(
const std::uint64_t* n, std::uint64_t* out)
noexcept {
97 constexpr std::uint64_t M = 0xFFFFFFFFFFFFFULL;
98 constexpr std::uint64_t M48v = 0xFFFFFFFFFFFFULL;
99 std::uint64_t t0 = n[0], t1 = n[1], t2 = n[2], t3 = n[3], t4 = n[4];
101 t1 += (t0 >> 52); t0 &= M;
102 t2 += (t1 >> 52); t1 &= M;
103 t3 += (t2 >> 52); t2 &= M;
104 t4 += (t3 >> 52); t3 &= M;
106 std::uint64_t
const x = t4 >> 48;
108 t0 += x * 0x1000003D1ULL;
110 t1 += (t0 >> 52); t0 &= M;
111 t2 += (t1 >> 52); t1 &= M;
112 t3 += (t2 >> 52); t2 &= M;
113 t4 += (t3 >> 52); t3 &= M;
115 out[0] = t0 | (t1 << 52);
116 out[1] = (t1 >> 12) | (t2 << 40);
117 out[2] = (t2 >> 24) | (t3 << 28);
118 out[3] = (t3 >> 36) | (t4 << 16);
122 SECP256K1_FE52_FORCE_INLINE
123 void fe64_unpack_to_fe52(
const std::uint64_t* L, std::uint64_t* r)
noexcept {
124 constexpr std::uint64_t M = 0xFFFFFFFFFFFFFULL;
126 r[1] = (L[0] >> 52) | ((L[1] & 0xFFFFFFFFFFULL) << 12);
127 r[2] = (L[1] >> 40) | ((L[2] & 0xFFFFFFFULL) << 24);
128 r[3] = (L[2] >> 28) | ((L[3] & 0xFFFFULL) << 36);
136using namespace fe52_constants;
152SECP256K1_FE52_FORCE_INLINE
153void fe52_mul_inner(std::uint64_t* r,
154 const std::uint64_t* a,
155 const std::uint64_t* b)
noexcept {
156#if defined(SECP256K1_RISCV_FE52_V1)
161 fe52_mul_inner_riscv64(r, a, b);
184 std::uint64_t out0, out1, out2, out3 = 0, out4 = 0;
185 const std::uint64_t a0_v = a[0], a1_v = a[1], a2_v = a[2];
186 const std::uint64_t a3_v = a[3], a4_v = a[4];
187 __asm__ __volatile__ (
189 "xorl %%r8d, %%r8d\n\t"
190 "xorl %%r9d, %%r9d\n\t"
192 "movq %[a0], %%rdx\n\t"
193 "mulxq 24(%[bp]), %%rax, %%rcx\n\t"
194 "adcxq %%rax, %%r8\n\t"
195 "adcxq %%rcx, %%r9\n\t"
196 "movq %[a1], %%rdx\n\t"
197 "mulxq 16(%[bp]), %%rax, %%rcx\n\t"
198 "adcxq %%rax, %%r8\n\t"
199 "adcxq %%rcx, %%r9\n\t"
200 "movq %[a2], %%rdx\n\t"
201 "mulxq 8(%[bp]), %%rax, %%rcx\n\t"
202 "adcxq %%rax, %%r8\n\t"
203 "adcxq %%rcx, %%r9\n\t"
204 "movq %[a3], %%rdx\n\t"
205 "mulxq (%[bp]), %%rax, %%rcx\n\t"
206 "adcxq %%rax, %%r8\n\t"
207 "adcxq %%rcx, %%r9\n\t"
210 "movq %[a4], %%rdx\n\t"
211 "mulxq 32(%[bp]), %%r10, %%r11\n\t"
214 "movabsq $0x1000003D10, %%rdx\n\t"
215 "mulxq %%r10, %%rax, %%rcx\n\t"
216 "addq %%rax, %%r8\n\t"
217 "adcq %%rcx, %%r9\n\t"
219 "movq %%r11, %%r10\n\t"
222 "movq %%r8, %%rax\n\t"
223 "movq $0xFFFFFFFFFFFFF, %%rcx\n\t"
224 "andq %%rcx, %%rax\n\t"
225 "movq %%rax, %[o3]\n\t"
226 "shrdq $52, %%r9, %%r8\n\t"
230 "xorl %%eax, %%eax\n\t"
231 "movq %[a0], %%rdx\n\t"
232 "mulxq 32(%[bp]), %%rax, %%rcx\n\t"
233 "adcxq %%rax, %%r8\n\t"
234 "adcxq %%rcx, %%r9\n\t"
235 "movq %[a1], %%rdx\n\t"
236 "mulxq 24(%[bp]), %%rax, %%rcx\n\t"
237 "adcxq %%rax, %%r8\n\t"
238 "adcxq %%rcx, %%r9\n\t"
239 "movq %[a2], %%rdx\n\t"
240 "mulxq 16(%[bp]), %%rax, %%rcx\n\t"
241 "adcxq %%rax, %%r8\n\t"
242 "adcxq %%rcx, %%r9\n\t"
243 "movq %[a3], %%rdx\n\t"
244 "mulxq 8(%[bp]), %%rax, %%rcx\n\t"
245 "adcxq %%rax, %%r8\n\t"
246 "adcxq %%rcx, %%r9\n\t"
247 "movq %[a4], %%rdx\n\t"
248 "mulxq (%[bp]), %%rax, %%rcx\n\t"
249 "adcxq %%rax, %%r8\n\t"
250 "adcxq %%rcx, %%r9\n\t"
253 "movabsq $0x1000003D10000, %%rdx\n\t"
254 "mulxq %%r10, %%rax, %%rcx\n\t"
255 "addq %%rax, %%r8\n\t"
256 "adcq %%rcx, %%r9\n\t"
259 "movq %%r8, %%r10\n\t"
260 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
261 "andq %%rax, %%r10\n\t"
262 "shrdq $52, %%r9, %%r8\n\t"
266 "xorl %%eax, %%eax\n\t"
267 "movq %[a1], %%rdx\n\t"
268 "mulxq 32(%[bp]), %%rax, %%rcx\n\t"
269 "adcxq %%rax, %%r8\n\t"
270 "adcxq %%rcx, %%r9\n\t"
271 "movq %[a2], %%rdx\n\t"
272 "mulxq 24(%[bp]), %%rax, %%rcx\n\t"
273 "adcxq %%rax, %%r8\n\t"
274 "adcxq %%rcx, %%r9\n\t"
275 "movq %[a3], %%rdx\n\t"
276 "mulxq 16(%[bp]), %%rax, %%rcx\n\t"
277 "adcxq %%rax, %%r8\n\t"
278 "adcxq %%rcx, %%r9\n\t"
279 "movq %[a4], %%rdx\n\t"
280 "mulxq 8(%[bp]), %%rax, %%rcx\n\t"
281 "adcxq %%rax, %%r8\n\t"
282 "adcxq %%rcx, %%r9\n\t"
285 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
286 "movq %%r8, %%rcx\n\t"
287 "andq %%rax, %%rcx\n\t"
288 "shrdq $52, %%r9, %%r8\n\t"
291 "movq %%r10, %%rax\n\t"
292 "shrq $48, %%rax\n\t"
293 "orq %%rax, %%rcx\n\t"
295 "movq $0xFFFFFFFFFFFF, %%rax\n\t"
296 "andq %%rax, %%r10\n\t"
297 "movq %%r10, %[o4]\n\t"
300 "movq %[a0], %%rdx\n\t"
301 "mulxq (%[bp]), %%r10, %%r11\n\t"
302 "movabsq $0x1000003D1, %%rdx\n\t"
303 "mulxq %%rcx, %%rax, %%rcx\n\t"
304 "addq %%rax, %%r10\n\t"
305 "adcq %%rcx, %%r11\n\t"
308 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
309 "movq %%r10, %%rcx\n\t"
310 "andq %%rax, %%rcx\n\t"
311 "movq %%rcx, %[o0]\n\t"
312 "shrdq $52, %%r11, %%r10\n\t"
313 "shrq $52, %%r11\n\t"
316 "xorl %%eax, %%eax\n\t"
317 "movq %[a0], %%rdx\n\t"
318 "mulxq 8(%[bp]), %%rax, %%rcx\n\t"
319 "adoxq %%rax, %%r10\n\t"
320 "adoxq %%rcx, %%r11\n\t"
321 "movq %[a2], %%rdx\n\t"
322 "mulxq 32(%[bp]), %%rax, %%rcx\n\t"
323 "adcxq %%rax, %%r8\n\t"
324 "adcxq %%rcx, %%r9\n\t"
325 "movq %[a1], %%rdx\n\t"
326 "mulxq (%[bp]), %%rax, %%rcx\n\t"
327 "adoxq %%rax, %%r10\n\t"
328 "adoxq %%rcx, %%r11\n\t"
329 "movq %[a3], %%rdx\n\t"
330 "mulxq 24(%[bp]), %%rax, %%rcx\n\t"
331 "adcxq %%rax, %%r8\n\t"
332 "adcxq %%rcx, %%r9\n\t"
333 "movq %[a4], %%rdx\n\t"
334 "mulxq 16(%[bp]), %%rax, %%rcx\n\t"
335 "adcxq %%rax, %%r8\n\t"
336 "adcxq %%rcx, %%r9\n\t"
339 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
340 "movq %%r8, %%rcx\n\t"
341 "andq %%rax, %%rcx\n\t"
342 "shrdq $52, %%r9, %%r8\n\t"
344 "movabsq $0x1000003D10, %%rdx\n\t"
345 "mulxq %%rcx, %%rax, %%rcx\n\t"
346 "addq %%rax, %%r10\n\t"
347 "adcq %%rcx, %%r11\n\t"
350 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
351 "movq %%r10, %%rcx\n\t"
352 "andq %%rax, %%rcx\n\t"
353 "movq %%rcx, %[o1]\n\t"
354 "shrdq $52, %%r11, %%r10\n\t"
355 "shrq $52, %%r11\n\t"
358 "xorl %%eax, %%eax\n\t"
359 "movq %[a0], %%rdx\n\t"
360 "mulxq 16(%[bp]), %%rax, %%rcx\n\t"
361 "adoxq %%rax, %%r10\n\t"
362 "adoxq %%rcx, %%r11\n\t"
363 "movq %[a3], %%rdx\n\t"
364 "mulxq 32(%[bp]), %%rax, %%rcx\n\t"
365 "adcxq %%rax, %%r8\n\t"
366 "adcxq %%rcx, %%r9\n\t"
367 "movq %[a1], %%rdx\n\t"
368 "mulxq 8(%[bp]), %%rax, %%rcx\n\t"
369 "adoxq %%rax, %%r10\n\t"
370 "adoxq %%rcx, %%r11\n\t"
371 "movq %[a4], %%rdx\n\t"
372 "mulxq 24(%[bp]), %%rax, %%rcx\n\t"
373 "adcxq %%rax, %%r8\n\t"
374 "adcxq %%rcx, %%r9\n\t"
375 "movq %[a2], %%rdx\n\t"
376 "mulxq (%[bp]), %%rax, %%rcx\n\t"
377 "adoxq %%rax, %%r10\n\t"
378 "adoxq %%rcx, %%r11\n\t"
381 "movabsq $0x1000003D10, %%rdx\n\t"
382 "mulxq %%r8, %%rax, %%rcx\n\t"
383 "addq %%rax, %%r10\n\t"
384 "adcq %%rcx, %%r11\n\t"
385 "movq %%r9, %%r8\n\t"
386 "xorl %%r9d, %%r9d\n\t"
389 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
390 "movq %%r10, %%rcx\n\t"
391 "andq %%rax, %%rcx\n\t"
392 "movq %%rcx, %[o2]\n\t"
393 "shrdq $52, %%r11, %%r10\n\t"
394 "shrq $52, %%r11\n\t"
397 "movabsq $0x1000003D10000, %%rdx\n\t"
398 "mulxq %%r8, %%rax, %%rcx\n\t"
399 "addq %%rax, %%r10\n\t"
400 "adcq %%rcx, %%r11\n\t"
401 "addq %[o3], %%r10\n\t"
405 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
406 "movq %%r10, %%rcx\n\t"
407 "andq %%rax, %%rcx\n\t"
408 "movq %%rcx, %[o3]\n\t"
409 "shrdq $52, %%r11, %%r10\n\t"
412 "addq %[o4], %%r10\n\t"
413 "movq %%r10, %[o4]\n\t"
415 : [o0]
"=m"(out0), [o1]
"=m"(out1), [o2]
"=m"(out2),
416 [o3]
"+m"(out3), [o4]
"+m"(out4)
417 : [a0]
"r"(a0_v), [a1]
"r"(a1_v), [a2]
"r"(a2_v),
418 [a3]
"r"(a3_v), [a4]
"r"(a4_v), [bp]
"r"(b)
419 :
"rax",
"rcx",
"rdx",
"r8",
"r9",
"r10",
"r11",
"cc",
"memory"
421 r[0] = out0; r[1] = out1; r[2] = out2; r[3] = out3; r[4] = out4;
443 using u128 =
unsigned __int128;
444 std::uint64_t d_lo = 0, d_hi = 0;
445 std::uint64_t c_lo = 0, c_hi = 0;
446 std::uint64_t t3, t4, tx, u0;
447 std::uint64_t sl, sh;
448 const std::uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
453 __asm__ __volatile__(
454 "xor %%ecx, %%ecx\n\t"
455 "mov %[a0], %%rdx\n\t"
456 "mulxq 24(%[bp]), %[sl], %[sh]\n\t"
457 "adcx %[sl], %[dl]\n\t"
458 "adcx %[sh], %[dh]\n\t"
459 "mov %[a4], %%rdx\n\t"
460 "mulxq 32(%[bp]), %[sl], %[sh]\n\t"
461 "adox %[sl], %[cl]\n\t"
462 "adox %[sh], %[ch]\n\t"
463 "mov %[a1], %%rdx\n\t"
464 "mulxq 16(%[bp]), %[sl], %[sh]\n\t"
465 "adcx %[sl], %[dl]\n\t"
466 "adcx %[sh], %[dh]\n\t"
467 "mov %[a2], %%rdx\n\t"
468 "mulxq 8(%[bp]), %[sl], %[sh]\n\t"
469 "adcx %[sl], %[dl]\n\t"
470 "adcx %[sh], %[dh]\n\t"
471 "mov %[a3], %%rdx\n\t"
472 "mulxq (%[bp]), %[sl], %[sh]\n\t"
473 "adcx %[sl], %[dl]\n\t"
474 "adcx %[sh], %[dh]\n\t"
475 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
476 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
477 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
478 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4),
483 { u128 dv = ((u128)d_hi << 64) | d_lo;
484 dv += (u128)R52 * c_lo;
485 d_lo = (std::uint64_t)dv; d_hi = (std::uint64_t)(dv >> 64); }
486 c_lo = c_hi; c_hi = 0;
488 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
492 __asm__ __volatile__(
493 "xor %%ecx, %%ecx\n\t"
494 "mov %[a0], %%rdx\n\t"
495 "mulxq 32(%[bp]), %[sl], %[sh]\n\t"
496 "adcx %[sl], %[dl]\n\t"
497 "adcx %[sh], %[dh]\n\t"
498 "mov %[a1], %%rdx\n\t"
499 "mulxq 24(%[bp]), %[sl], %[sh]\n\t"
500 "adcx %[sl], %[dl]\n\t"
501 "adcx %[sh], %[dh]\n\t"
502 "mov %[a2], %%rdx\n\t"
503 "mulxq 16(%[bp]), %[sl], %[sh]\n\t"
504 "adcx %[sl], %[dl]\n\t"
505 "adcx %[sh], %[dh]\n\t"
506 "mov %[a3], %%rdx\n\t"
507 "mulxq 8(%[bp]), %[sl], %[sh]\n\t"
508 "adcx %[sl], %[dl]\n\t"
509 "adcx %[sh], %[dh]\n\t"
510 "mov %[a4], %%rdx\n\t"
511 "mulxq (%[bp]), %[sl], %[sh]\n\t"
512 "adcx %[sl], %[dl]\n\t"
513 "adcx %[sh], %[dh]\n\t"
514 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
515 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
516 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4),
521 { u128 dv = ((u128)d_hi << 64) | d_lo;
522 dv += (u128)(R52 << 12) * c_lo;
523 d_lo = (std::uint64_t)dv; d_hi = (std::uint64_t)(dv >> 64); }
525 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
526 tx = (t4 >> 48); t4 &= (
M52 >> 4);
532 __asm__ __volatile__(
533 "xor %%ecx, %%ecx\n\t"
534 "mov %[a0], %%rdx\n\t"
535 "mulxq (%[bp]), %[sl], %[sh]\n\t"
536 "adox %[sl], %[cl]\n\t"
537 "adox %[sh], %[ch]\n\t"
538 "mov %[a1], %%rdx\n\t"
539 "mulxq 32(%[bp]), %[sl], %[sh]\n\t"
540 "adcx %[sl], %[dl]\n\t"
541 "adcx %[sh], %[dh]\n\t"
542 "mov %[a2], %%rdx\n\t"
543 "mulxq 24(%[bp]), %[sl], %[sh]\n\t"
544 "adcx %[sl], %[dl]\n\t"
545 "adcx %[sh], %[dh]\n\t"
546 "mov %[a3], %%rdx\n\t"
547 "mulxq 16(%[bp]), %[sl], %[sh]\n\t"
548 "adcx %[sl], %[dl]\n\t"
549 "adcx %[sh], %[dh]\n\t"
550 "mov %[a4], %%rdx\n\t"
551 "mulxq 8(%[bp]), %[sl], %[sh]\n\t"
552 "adcx %[sl], %[dl]\n\t"
553 "adcx %[sh], %[dh]\n\t"
554 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
555 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
556 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
557 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4),
562 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
565 { u128 cv = ((u128)c_hi << 64) | c_lo;
566 cv += (u128)u0 * (R52 >> 4);
567 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
569 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
574 __asm__ __volatile__(
575 "xor %%ecx, %%ecx\n\t"
576 "mov %[a0], %%rdx\n\t"
577 "mulxq 8(%[bp]), %[sl], %[sh]\n\t"
578 "adox %[sl], %[cl]\n\t"
579 "adox %[sh], %[ch]\n\t"
580 "mov %[a2], %%rdx\n\t"
581 "mulxq 32(%[bp]), %[sl], %[sh]\n\t"
582 "adcx %[sl], %[dl]\n\t"
583 "adcx %[sh], %[dh]\n\t"
584 "mov %[a1], %%rdx\n\t"
585 "mulxq (%[bp]), %[sl], %[sh]\n\t"
586 "adox %[sl], %[cl]\n\t"
587 "adox %[sh], %[ch]\n\t"
588 "mov %[a3], %%rdx\n\t"
589 "mulxq 24(%[bp]), %[sl], %[sh]\n\t"
590 "adcx %[sl], %[dl]\n\t"
591 "adcx %[sh], %[dh]\n\t"
592 "mov %[a4], %%rdx\n\t"
593 "mulxq 16(%[bp]), %[sl], %[sh]\n\t"
594 "adcx %[sl], %[dl]\n\t"
595 "adcx %[sh], %[dh]\n\t"
596 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
597 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
598 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
599 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4),
604 { std::uint64_t d_masked = d_lo &
M52;
605 u128 cv = ((u128)c_hi << 64) | c_lo;
606 cv += (u128)d_masked * R52;
607 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
608 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
610 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
615 __asm__ __volatile__(
616 "xor %%ecx, %%ecx\n\t"
617 "mov %[a0], %%rdx\n\t"
618 "mulxq 16(%[bp]), %[sl], %[sh]\n\t"
619 "adox %[sl], %[cl]\n\t"
620 "adox %[sh], %[ch]\n\t"
621 "mov %[a3], %%rdx\n\t"
622 "mulxq 32(%[bp]), %[sl], %[sh]\n\t"
623 "adcx %[sl], %[dl]\n\t"
624 "adcx %[sh], %[dh]\n\t"
625 "mov %[a1], %%rdx\n\t"
626 "mulxq 8(%[bp]), %[sl], %[sh]\n\t"
627 "adox %[sl], %[cl]\n\t"
628 "adox %[sh], %[ch]\n\t"
629 "mov %[a4], %%rdx\n\t"
630 "mulxq 24(%[bp]), %[sl], %[sh]\n\t"
631 "adcx %[sl], %[dl]\n\t"
632 "adcx %[sh], %[dh]\n\t"
633 "mov %[a2], %%rdx\n\t"
634 "mulxq (%[bp]), %[sl], %[sh]\n\t"
635 "adox %[sl], %[cl]\n\t"
636 "adox %[sh], %[ch]\n\t"
637 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
638 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
639 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
640 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4),
645 { u128 cv = ((u128)c_hi << 64) | c_lo;
646 cv += (u128)R52 * d_lo;
647 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
648 d_lo = d_hi; d_hi = 0;
650 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
653 { u128 cv = ((u128)c_hi << 64) | c_lo;
654 cv += (u128)(R52 << 12) * d_lo;
656 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
658 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
662 using u128 =
unsigned __int128;
664 std::uint64_t t3 = 0, t4 = 0, tx = 0, u0 = 0;
665 const std::uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
673 d += (u128)R52 * (std::uint64_t)c;
675 t3 = (std::uint64_t)d & M52;
684 d += (u128)(R52 << 12) * (std::uint64_t)c;
685 t4 = (std::uint64_t)d & M52;
687 tx = (t4 >> 48); t4 &= (
M52 >> 4);
695 u0 = (std::uint64_t)d & M52;
698 c += (u128)u0 * (R52 >> 4);
699 r[0] = (std::uint64_t)c & M52;
708 c += (u128)((std::uint64_t)d &
M52) * R52;
710 r[1] = (std::uint64_t)c & M52;
719 c += (u128)R52 * (std::uint64_t)d;
721 r[2] = (std::uint64_t)c & M52;
725 c += (u128)(R52 << 12) * (std::uint64_t)d;
727 r[3] = (std::uint64_t)c & M52;
730 r[4] = (std::uint64_t)c;
741SECP256K1_FE52_FORCE_INLINE
742void fe52_sqr_inner(std::uint64_t* r,
743 const std::uint64_t* a)
noexcept {
744#if defined(SECP256K1_RISCV_FE52_V1)
747 fe52_sqr_inner_riscv64(r, a);
755 std::uint64_t out0, out1, out2, out3 = 0, out4 = 0;
756 const std::uint64_t a0_v = a[0], a1_v = a[1], a2_v = a[2];
757 const std::uint64_t a3_v = a[3], a4_v = a[4];
758 __asm__ __volatile__ (
761 "xorl %%r8d, %%r8d\n\t"
762 "xorl %%r9d, %%r9d\n\t"
764 "leaq (%[a0], %[a0]), %%rdx\n\t"
765 "mulxq %[a3], %%rax, %%rcx\n\t"
766 "adcxq %%rax, %%r8\n\t"
767 "adcxq %%rcx, %%r9\n\t"
768 "leaq (%[a1], %[a1]), %%rdx\n\t"
769 "mulxq %[a2], %%rax, %%rcx\n\t"
770 "adcxq %%rax, %%r8\n\t"
771 "adcxq %%rcx, %%r9\n\t"
774 "movq %[a4], %%rdx\n\t"
775 "mulxq %[a4], %%r10, %%r11\n\t"
778 "movabsq $0x1000003D10, %%rdx\n\t"
779 "mulxq %%r10, %%rax, %%rcx\n\t"
780 "addq %%rax, %%r8\n\t"
781 "adcq %%rcx, %%r9\n\t"
783 "movq %%r11, %%r10\n\t"
786 "movq %%r8, %%rax\n\t"
787 "movq $0xFFFFFFFFFFFFF, %%rcx\n\t"
788 "andq %%rcx, %%rax\n\t"
789 "movq %%rax, %[o3]\n\t"
790 "shrdq $52, %%r9, %%r8\n\t"
795 "xorl %%eax, %%eax\n\t"
796 "leaq (%[a0], %[a0]), %%rdx\n\t"
797 "mulxq %[a4], %%rax, %%rcx\n\t"
798 "adcxq %%rax, %%r8\n\t"
799 "adcxq %%rcx, %%r9\n\t"
800 "leaq (%[a1], %[a1]), %%rdx\n\t"
801 "mulxq %[a3], %%rax, %%rcx\n\t"
802 "adcxq %%rax, %%r8\n\t"
803 "adcxq %%rcx, %%r9\n\t"
804 "movq %[a2], %%rdx\n\t"
805 "mulxq %[a2], %%rax, %%rcx\n\t"
806 "adcxq %%rax, %%r8\n\t"
807 "adcxq %%rcx, %%r9\n\t"
810 "movabsq $0x1000003D10000, %%rdx\n\t"
811 "mulxq %%r10, %%rax, %%rcx\n\t"
812 "addq %%rax, %%r8\n\t"
813 "adcq %%rcx, %%r9\n\t"
816 "movq %%r8, %%r10\n\t"
817 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
818 "andq %%rax, %%r10\n\t"
819 "shrdq $52, %%r9, %%r8\n\t"
824 "xorl %%eax, %%eax\n\t"
825 "leaq (%[a1], %[a1]), %%rdx\n\t"
826 "mulxq %[a4], %%rax, %%rcx\n\t"
827 "adcxq %%rax, %%r8\n\t"
828 "adcxq %%rcx, %%r9\n\t"
829 "leaq (%[a2], %[a2]), %%rdx\n\t"
830 "mulxq %[a3], %%rax, %%rcx\n\t"
831 "adcxq %%rax, %%r8\n\t"
832 "adcxq %%rcx, %%r9\n\t"
835 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
836 "movq %%r8, %%rcx\n\t"
837 "andq %%rax, %%rcx\n\t"
838 "shrdq $52, %%r9, %%r8\n\t"
841 "movq %%r10, %%rax\n\t"
842 "shrq $48, %%rax\n\t"
843 "orq %%rax, %%rcx\n\t"
845 "movq $0xFFFFFFFFFFFF, %%rax\n\t"
846 "andq %%rax, %%r10\n\t"
847 "movq %%r10, %[o4]\n\t"
850 "movq %[a0], %%rdx\n\t"
851 "mulxq %[a0], %%r10, %%r11\n\t"
852 "movabsq $0x1000003D1, %%rdx\n\t"
853 "mulxq %%rcx, %%rax, %%rcx\n\t"
854 "addq %%rax, %%r10\n\t"
855 "adcq %%rcx, %%r11\n\t"
858 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
859 "movq %%r10, %%rcx\n\t"
860 "andq %%rax, %%rcx\n\t"
861 "movq %%rcx, %[o0]\n\t"
862 "shrdq $52, %%r11, %%r10\n\t"
863 "shrq $52, %%r11\n\t"
867 "xorl %%eax, %%eax\n\t"
868 "leaq (%[a0], %[a0]), %%rdx\n\t"
869 "mulxq %[a1], %%rax, %%rcx\n\t"
870 "adoxq %%rax, %%r10\n\t"
871 "adoxq %%rcx, %%r11\n\t"
872 "leaq (%[a2], %[a2]), %%rdx\n\t"
873 "mulxq %[a4], %%rax, %%rcx\n\t"
874 "adcxq %%rax, %%r8\n\t"
875 "adcxq %%rcx, %%r9\n\t"
876 "movq %[a3], %%rdx\n\t"
877 "mulxq %[a3], %%rax, %%rcx\n\t"
878 "adcxq %%rax, %%r8\n\t"
879 "adcxq %%rcx, %%r9\n\t"
882 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
883 "movq %%r8, %%rcx\n\t"
884 "andq %%rax, %%rcx\n\t"
885 "shrdq $52, %%r9, %%r8\n\t"
887 "movabsq $0x1000003D10, %%rdx\n\t"
888 "mulxq %%rcx, %%rax, %%rcx\n\t"
889 "addq %%rax, %%r10\n\t"
890 "adcq %%rcx, %%r11\n\t"
893 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
894 "movq %%r10, %%rcx\n\t"
895 "andq %%rax, %%rcx\n\t"
896 "movq %%rcx, %[o1]\n\t"
897 "shrdq $52, %%r11, %%r10\n\t"
898 "shrq $52, %%r11\n\t"
902 "xorl %%eax, %%eax\n\t"
903 "leaq (%[a0], %[a0]), %%rdx\n\t"
904 "mulxq %[a2], %%rax, %%rcx\n\t"
905 "adoxq %%rax, %%r10\n\t"
906 "adoxq %%rcx, %%r11\n\t"
907 "leaq (%[a3], %[a3]), %%rdx\n\t"
908 "mulxq %[a4], %%rax, %%rcx\n\t"
909 "adcxq %%rax, %%r8\n\t"
910 "adcxq %%rcx, %%r9\n\t"
911 "movq %[a1], %%rdx\n\t"
912 "mulxq %[a1], %%rax, %%rcx\n\t"
913 "adoxq %%rax, %%r10\n\t"
914 "adoxq %%rcx, %%r11\n\t"
917 "movabsq $0x1000003D10, %%rdx\n\t"
918 "mulxq %%r8, %%rax, %%rcx\n\t"
919 "addq %%rax, %%r10\n\t"
920 "adcq %%rcx, %%r11\n\t"
921 "movq %%r9, %%r8\n\t"
922 "xorl %%r9d, %%r9d\n\t"
925 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
926 "movq %%r10, %%rcx\n\t"
927 "andq %%rax, %%rcx\n\t"
928 "movq %%rcx, %[o2]\n\t"
929 "shrdq $52, %%r11, %%r10\n\t"
930 "shrq $52, %%r11\n\t"
933 "movabsq $0x1000003D10000, %%rdx\n\t"
934 "mulxq %%r8, %%rax, %%rcx\n\t"
935 "addq %%rax, %%r10\n\t"
936 "adcq %%rcx, %%r11\n\t"
937 "addq %[o3], %%r10\n\t"
941 "movq $0xFFFFFFFFFFFFF, %%rax\n\t"
942 "movq %%r10, %%rcx\n\t"
943 "andq %%rax, %%rcx\n\t"
944 "movq %%rcx, %[o3]\n\t"
945 "shrdq $52, %%r11, %%r10\n\t"
948 "addq %[o4], %%r10\n\t"
949 "movq %%r10, %[o4]\n\t"
951 : [o0]
"=m"(out0), [o1]
"=m"(out1), [o2]
"=m"(out2),
952 [o3]
"+m"(out3), [o4]
"+m"(out4)
953 : [a0]
"r"(a0_v), [a1]
"r"(a1_v), [a2]
"r"(a2_v),
954 [a3]
"r"(a3_v), [a4]
"r"(a4_v)
955 :
"rax",
"rcx",
"rdx",
"r8",
"r9",
"r10",
"r11",
"cc",
"memory"
957 r[0] = out0; r[1] = out1; r[2] = out2; r[3] = out3; r[4] = out4;
966 using u128 =
unsigned __int128;
967 std::uint64_t d_lo = 0, d_hi = 0;
968 std::uint64_t c_lo = 0, c_hi = 0;
969 std::uint64_t t3, t4, tx, u0;
970 std::uint64_t sl, sh;
971 const std::uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
976 __asm__ __volatile__(
977 "xor %%ecx, %%ecx\n\t"
978 "lea (%[a0], %[a0]), %%rdx\n\t"
979 "mulxq %[a3], %[sl], %[sh]\n\t"
980 "adcx %[sl], %[dl]\n\t"
981 "adcx %[sh], %[dh]\n\t"
982 "mov %[a4], %%rdx\n\t"
983 "mulxq %[a4], %[sl], %[sh]\n\t"
984 "adox %[sl], %[cl]\n\t"
985 "adox %[sh], %[ch]\n\t"
986 "lea (%[a1], %[a1]), %%rdx\n\t"
987 "mulxq %[a2], %[sl], %[sh]\n\t"
988 "adcx %[sl], %[dl]\n\t"
989 "adcx %[sh], %[dh]\n\t"
990 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
991 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
992 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
993 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4)
996 { u128 dv = ((u128)d_hi << 64) | d_lo;
997 dv += (u128)R52 * c_lo;
998 d_lo = (std::uint64_t)dv; d_hi = (std::uint64_t)(dv >> 64); }
999 c_lo = c_hi; c_hi = 0;
1001 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
1005 __asm__ __volatile__(
1006 "xor %%ecx, %%ecx\n\t"
1007 "lea (%[a0], %[a0]), %%rdx\n\t"
1008 "mulxq %[a4], %[sl], %[sh]\n\t"
1009 "adcx %[sl], %[dl]\n\t"
1010 "adcx %[sh], %[dh]\n\t"
1011 "lea (%[a1], %[a1]), %%rdx\n\t"
1012 "mulxq %[a3], %[sl], %[sh]\n\t"
1013 "adcx %[sl], %[dl]\n\t"
1014 "adcx %[sh], %[dh]\n\t"
1015 "mov %[a2], %%rdx\n\t"
1016 "mulxq %[a2], %[sl], %[sh]\n\t"
1017 "adcx %[sl], %[dl]\n\t"
1018 "adcx %[sh], %[dh]\n\t"
1019 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
1020 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
1021 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4)
1022 :
"rdx",
"rcx",
"cc"
1024 { u128 dv = ((u128)d_hi << 64) | d_lo;
1025 dv += (u128)(R52 << 12) * c_lo;
1026 d_lo = (std::uint64_t)dv; d_hi = (std::uint64_t)(dv >> 64); }
1028 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
1029 tx = (t4 >> 48); t4 &= (
M52 >> 4);
1035 __asm__ __volatile__(
1036 "xor %%ecx, %%ecx\n\t"
1037 "mov %[a0], %%rdx\n\t"
1038 "mulxq %[a0], %[sl], %[sh]\n\t"
1039 "adox %[sl], %[cl]\n\t"
1040 "adox %[sh], %[ch]\n\t"
1041 "lea (%[a1], %[a1]), %%rdx\n\t"
1042 "mulxq %[a4], %[sl], %[sh]\n\t"
1043 "adcx %[sl], %[dl]\n\t"
1044 "adcx %[sh], %[dh]\n\t"
1045 "lea (%[a2], %[a2]), %%rdx\n\t"
1046 "mulxq %[a3], %[sl], %[sh]\n\t"
1047 "adcx %[sl], %[dl]\n\t"
1048 "adcx %[sh], %[dh]\n\t"
1049 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
1050 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
1051 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
1052 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4)
1053 :
"rdx",
"rcx",
"cc"
1056 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
1057 u0 = (u0 << 4) | tx;
1058 { u128 cv = ((u128)c_hi << 64) | c_lo;
1059 cv += (u128)u0 * (R52 >> 4);
1060 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
1062 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
1067 __asm__ __volatile__(
1068 "xor %%ecx, %%ecx\n\t"
1069 "lea (%[a0], %[a0]), %%rdx\n\t"
1070 "mulxq %[a1], %[sl], %[sh]\n\t"
1071 "adox %[sl], %[cl]\n\t"
1072 "adox %[sh], %[ch]\n\t"
1073 "lea (%[a2], %[a2]), %%rdx\n\t"
1074 "mulxq %[a4], %[sl], %[sh]\n\t"
1075 "adcx %[sl], %[dl]\n\t"
1076 "adcx %[sh], %[dh]\n\t"
1077 "mov %[a3], %%rdx\n\t"
1078 "mulxq %[a3], %[sl], %[sh]\n\t"
1079 "adcx %[sl], %[dl]\n\t"
1080 "adcx %[sh], %[dh]\n\t"
1081 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
1082 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
1083 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
1084 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4)
1085 :
"rdx",
"rcx",
"cc"
1087 { std::uint64_t d_masked = d_lo &
M52;
1088 u128 cv = ((u128)c_hi << 64) | c_lo;
1089 cv += (u128)d_masked * R52;
1090 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
1091 d_lo = (d_lo >> 52) | (d_hi << 12); d_hi >>= 52;
1093 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
1098 __asm__ __volatile__(
1099 "xor %%ecx, %%ecx\n\t"
1100 "lea (%[a0], %[a0]), %%rdx\n\t"
1101 "mulxq %[a2], %[sl], %[sh]\n\t"
1102 "adox %[sl], %[cl]\n\t"
1103 "adox %[sh], %[ch]\n\t"
1104 "lea (%[a3], %[a3]), %%rdx\n\t"
1105 "mulxq %[a4], %[sl], %[sh]\n\t"
1106 "adcx %[sl], %[dl]\n\t"
1107 "adcx %[sh], %[dh]\n\t"
1108 "mov %[a1], %%rdx\n\t"
1109 "mulxq %[a1], %[sl], %[sh]\n\t"
1110 "adox %[sl], %[cl]\n\t"
1111 "adox %[sh], %[ch]\n\t"
1112 : [dl]
"+&r"(d_lo), [dh]
"+&r"(d_hi),
1113 [cl]
"+&r"(c_lo), [ch]
"+&r"(c_hi),
1114 [sl]
"=&r"(sl), [sh]
"=&r"(sh)
1115 : [a0]
"r"(a0), [a1]
"r"(a1), [a2]
"r"(a2), [a3]
"r"(a3), [a4]
"r"(a4)
1116 :
"rdx",
"rcx",
"cc"
1118 { u128 cv = ((u128)c_hi << 64) | c_lo;
1119 cv += (u128)R52 * d_lo;
1120 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
1121 d_lo = d_hi; d_hi = 0;
1123 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
1126 { u128 cv = ((u128)c_hi << 64) | c_lo;
1127 cv += (u128)(R52 << 12) * d_lo;
1129 c_lo = (std::uint64_t)cv; c_hi = (std::uint64_t)(cv >> 64); }
1131 c_lo = (c_lo >> 52) | (c_hi << 12); c_hi >>= 52;
1135 using u128 =
unsigned __int128;
1137 std::uint64_t t3 = 0, t4 = 0, tx = 0, u0 = 0;
1138 const std::uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
1141 d = (u128)(a0 * 2) * a3
1142 + (u128)(a1 * 2) * a2;
1144 d += (u128)R52 * (std::uint64_t)c;
1146 t3 = (std::uint64_t)d & M52;
1150 d += (u128)(a0 * 2) * a4
1151 + (u128)(a1 * 2) * a3
1153 d += (u128)(R52 << 12) * (std::uint64_t)c;
1154 t4 = (std::uint64_t)d & M52;
1156 tx = (t4 >> 48); t4 &= (
M52 >> 4);
1160 d += (u128)(a1 * 2) * a4
1161 + (u128)(a2 * 2) * a3;
1162 u0 = (std::uint64_t)d & M52;
1164 u0 = (u0 << 4) | tx;
1165 c += (u128)u0 * (R52 >> 4);
1166 r[0] = (std::uint64_t)c & M52;
1170 c += (u128)(a0 * 2) * a1;
1171 d += (u128)(a2 * 2) * a4
1173 c += (u128)((std::uint64_t)d &
M52) * R52;
1175 r[1] = (std::uint64_t)c & M52;
1179 c += (u128)(a0 * 2) * a2
1181 d += (u128)(a3 * 2) * a4;
1182 c += (u128)R52 * (std::uint64_t)d;
1184 r[2] = (std::uint64_t)c & M52;
1188 c += (u128)(R52 << 12) * (std::uint64_t)d;
1190 r[3] = (std::uint64_t)c & M52;
1193 r[4] = (std::uint64_t)c;
1201SECP256K1_FE52_FORCE_INLINE
1202void fe52_normalize_weak(std::uint64_t* r)
noexcept {
1203 std::uint64_t t0 = r[0], t1 = r[1], t2 = r[2], t3 = r[3], t4 = r[4];
1207 t1 += (t0 >> 52); t0 &=
M52;
1208 t2 += (t1 >> 52); t1 &=
M52;
1209 t3 += (t2 >> 52); t2 &=
M52;
1210 t4 += (t3 >> 52); t3 &=
M52;
1212 std::uint64_t
const x = t4 >> 48;
1214 t0 += x * 0x1000003D1ULL;
1216 t1 += (t0 >> 52); t0 &=
M52;
1217 t2 += (t1 >> 52); t1 &=
M52;
1218 t3 += (t2 >> 52); t2 &=
M52;
1219 t4 += (t3 >> 52); t3 &=
M52;
1220 r[0] = t0; r[1] = t1; r[2] = t2; r[3] = t3; r[4] = t4;
1229SECP256K1_FE52_FORCE_INLINE
1232 fe52_mul_inner(r.n, n, rhs.n);
1236SECP256K1_FE52_FORCE_INLINE
1239 fe52_sqr_inner(r.n,
n);
1243SECP256K1_FE52_FORCE_INLINE
1245 fe52_mul_inner(n, n, rhs.n);
1248SECP256K1_FE52_FORCE_INLINE
1250 fe52_sqr_inner(
n,
n);
1255SECP256K1_FE52_FORCE_INLINE
1258 r.
n[0] = n[0] + rhs.n[0];
1259 r.n[1] = n[1] + rhs.n[1];
1260 r.n[2] = n[2] + rhs.n[2];
1261 r.n[3] = n[3] + rhs.n[3];
1262 r.n[4] = n[4] + rhs.n[4];
1266SECP256K1_FE52_FORCE_INLINE
1277SECP256K1_FE52_FORCE_INLINE
1279 using namespace fe52_constants;
1281 const std::uint64_t m1 =
static_cast<std::uint64_t
>(magnitude) + 1ULL;
1282 r.
n[0] = m1 *
P0 - n[0];
1283 r.n[1] = m1 *
P1 - n[1];
1284 r.n[2] = m1 *
P2 - n[2];
1285 r.n[3] = m1 *
P3 - n[3];
1286 r.n[4] = m1 *
P4 - n[4];
1290SECP256K1_FE52_FORCE_INLINE
1292 const std::uint64_t m1 =
static_cast<std::uint64_t
>(magnitude) + 1ULL;
1293 n[0] = m1 *
P0 - n[0];
1294 n[1] = m1 *
P1 - n[1];
1295 n[2] = m1 *
P2 - n[2];
1296 n[3] = m1 *
P3 - n[3];
1297 n[4] = m1 *
P4 - n[4];
1303SECP256K1_FE52_FORCE_INLINE
1305 const auto mask =
static_cast<std::uint64_t
>(
static_cast<std::int64_t
>(sign_mask));
1307 const std::uint64_t neg0 = 2ULL *
P0 - n[0];
1308 const std::uint64_t neg1 = 2ULL *
P1 - n[1];
1309 const std::uint64_t neg2 = 2ULL *
P2 - n[2];
1310 const std::uint64_t neg3 = 2ULL *
P3 - n[3];
1311 const std::uint64_t neg4 = 2ULL *
P4 - n[4];
1313 n[0] ^= (n[0] ^ neg0) & mask;
1314 n[1] ^= (n[1] ^ neg1) & mask;
1315 n[2] ^= (n[2] ^ neg2) & mask;
1316 n[3] ^= (n[3] ^ neg3) & mask;
1317 n[4] ^= (n[4] ^ neg4) & mask;
1322SECP256K1_FE52_FORCE_INLINE
1324 fe52_normalize_weak(
n);
1335SECP256K1_FE52_FORCE_INLINE
1337 const std::uint64_t* src =
n;
1338 const std::uint64_t
one = 1ULL;
1339 const std::uint64_t mask = (0ULL - (src[0] &
one)) >> 12;
1342 const std::uint64_t t0 = src[0] + (0xFFFFEFFFFFC2FULL & mask);
1343 const std::uint64_t t1 = src[1] + mask;
1344 const std::uint64_t t2 = src[2] + mask;
1345 const std::uint64_t t3 = src[3] + mask;
1346 const std::uint64_t t4 = src[4] + (mask >> 4);
1353 r.
n[0] = (t0 >> 1) + ((t1 &
one) << 51);
1354 r.n[1] = (t1 >> 1) + ((t2 &
one) << 51);
1355 r.n[2] = (t2 >> 1) + ((t3 &
one) << 51);
1356 r.n[3] = (t3 >> 1) + ((t4 &
one) << 51);
1361SECP256K1_FE52_FORCE_INLINE
1363 const std::uint64_t
one = 1ULL;
1364 const std::uint64_t mask = (0ULL - (
n[0] &
one)) >> 12;
1366 const std::uint64_t t0 =
n[0] + (0xFFFFEFFFFFC2FULL & mask);
1367 const std::uint64_t t1 =
n[1] + mask;
1368 const std::uint64_t t2 =
n[2] + mask;
1369 const std::uint64_t t3 =
n[3] + mask;
1370 const std::uint64_t t4 =
n[4] + (mask >> 4);
1373 n[0] = (t0 >> 1) + ((t1 &
one) << 51);
1374 n[1] = (t1 >> 1) + ((t2 &
one) << 51);
1375 n[2] = (t2 >> 1) + ((t3 &
one) << 51);
1376 n[3] = (t3 >> 1) + ((t4 &
one) << 51);
1384SECP256K1_FE52_FORCE_INLINE
1397SECP256K1_FE52_FORCE_INLINE
1398static void fe52_normalize_inline(std::uint64_t* r)
noexcept {
1399 std::uint64_t t0 = r[0], t1 = r[1], t2 = r[2], t3 = r[3], t4 = r[4];
1403 std::uint64_t m = 0;
1404 std::uint64_t x = t4 >> 48; t4 &=
M48;
1407 t0 += x * 0x1000003D1ULL;
1408 t1 += (t0 >> 52); t0 &=
M52;
1409 t2 += (t1 >> 52); t1 &=
M52; m = t1;
1410 t3 += (t2 >> 52); t2 &=
M52; m &= t2;
1411 t4 += (t3 >> 52); t3 &=
M52; m &= t3;
1417 x = (t4 >> 48) | ((t4 == M48) & (m ==
M52)
1418 & (t0 >= 0xFFFFEFFFFFC2FULL));
1421 t0 += x * 0x1000003D1ULL;
1422 t1 += (t0 >> 52); t0 &=
M52;
1423 t2 += (t1 >> 52); t1 &=
M52;
1424 t3 += (t2 >> 52); t2 &=
M52;
1425 t4 += (t3 >> 52); t3 &=
M52;
1428 r[0] = t0; r[1] = t1; r[2] = t2; r[3] = t3; r[4] = t4;
1433SECP256K1_FE52_FORCE_INLINE
1435 fe52_normalize_inline(
n);
1447SECP256K1_FE52_FORCE_INLINE
1449 std::uint64_t t[5] = {
n[0],
n[1],
n[2],
n[3],
n[4]};
1450 fe52_normalize_inline(t);
1451 return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0;
1468SECP256K1_FE52_FORCE_INLINE
1470 using namespace fe52_constants;
1471 std::uint64_t t0 =
n[0], t4 =
n[4];
1476 const std::uint64_t x = t4 >> 48;
1477 t0 += x * 0x1000003D1ULL;
1482 std::uint64_t z0 = t0 &
M52;
1483 std::uint64_t z1 = z0 ^ 0x1000003D0ULL;
1486 if ((z0 != 0ULL) & (z1 != M52)) {
1491 std::uint64_t t1 =
n[1], t2 =
n[2], t3 =
n[3];
1495 t2 += (t1 >> 52); t1 &=
M52; z0 |= t1; z1 &= t1;
1496 t3 += (t2 >> 52); t2 &=
M52; z0 |= t2; z1 &= t2;
1497 t4 += (t3 >> 52); t3 &=
M52; z0 |= t3; z1 &= t3;
1498 z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
1500 return (z0 == 0) | (z1 ==
M52);
1505SECP256K1_FE52_FORCE_INLINE
1507 const auto& L = fe.limbs();
1509 r.
n[0] = L[0] &
M52;
1510 r.n[1] = (L[0] >> 52) | ((L[1] & 0xFFFFFFFFFFULL) << 12);
1511 r.n[2] = (L[1] >> 40) | ((L[2] & 0xFFFFFFFULL) << 24);
1512 r.n[3] = (L[2] >> 28) | ((L[3] & 0xFFFFULL) << 36);
1513 r.n[4] = L[3] >> 16;
1519SECP256K1_FE52_FORCE_INLINE
1521 FieldElement52 tmp = *
this;
1522 fe52_normalize_inline(tmp.n);
1525 L[0] = tmp.n[0] | (tmp.n[1] << 52);
1526 L[1] = (tmp.n[1] >> 12) | (tmp.n[2] << 40);
1527 L[2] = (tmp.n[2] >> 24) | (tmp.n[3] << 28);
1528 L[3] = (tmp.n[3] >> 36) | (tmp.n[4] << 16);
1533SECP256K1_FE52_FORCE_INLINE
1537 FieldElement52 tmp = *
this;
1538 fe52_normalize_inline(tmp.n);
1540 out[ 0] =
static_cast<std::uint8_t
>(tmp.n[4] >> 40);
1541 out[ 1] =
static_cast<std::uint8_t
>(tmp.n[4] >> 32);
1542 out[ 2] =
static_cast<std::uint8_t
>(tmp.n[4] >> 24);
1543 out[ 3] =
static_cast<std::uint8_t
>(tmp.n[4] >> 16);
1544 out[ 4] =
static_cast<std::uint8_t
>(tmp.n[4] >> 8);
1545 out[ 5] =
static_cast<std::uint8_t
>(tmp.n[4] );
1546 out[ 6] =
static_cast<std::uint8_t
>(tmp.n[3] >> 44);
1547 out[ 7] =
static_cast<std::uint8_t
>(tmp.n[3] >> 36);
1548 out[ 8] =
static_cast<std::uint8_t
>(tmp.n[3] >> 28);
1549 out[ 9] =
static_cast<std::uint8_t
>(tmp.n[3] >> 20);
1550 out[10] =
static_cast<std::uint8_t
>(tmp.n[3] >> 12);
1551 out[11] =
static_cast<std::uint8_t
>(tmp.n[3] >> 4);
1552 out[12] =
static_cast<std::uint8_t
>(((tmp.n[2] >> 48) & 0xF) | ((tmp.n[3] & 0xF) << 4));
1553 out[13] =
static_cast<std::uint8_t
>(tmp.n[2] >> 40);
1554 out[14] =
static_cast<std::uint8_t
>(tmp.n[2] >> 32);
1555 out[15] =
static_cast<std::uint8_t
>(tmp.n[2] >> 24);
1556 out[16] =
static_cast<std::uint8_t
>(tmp.n[2] >> 16);
1557 out[17] =
static_cast<std::uint8_t
>(tmp.n[2] >> 8);
1558 out[18] =
static_cast<std::uint8_t
>(tmp.n[2] );
1559 out[19] =
static_cast<std::uint8_t
>(tmp.n[1] >> 44);
1560 out[20] =
static_cast<std::uint8_t
>(tmp.n[1] >> 36);
1561 out[21] =
static_cast<std::uint8_t
>(tmp.n[1] >> 28);
1562 out[22] =
static_cast<std::uint8_t
>(tmp.n[1] >> 20);
1563 out[23] =
static_cast<std::uint8_t
>(tmp.n[1] >> 12);
1564 out[24] =
static_cast<std::uint8_t
>(tmp.n[1] >> 4);
1565 out[25] =
static_cast<std::uint8_t
>(((tmp.n[0] >> 48) & 0xF) | ((tmp.n[1] & 0xF) << 4));
1566 out[26] =
static_cast<std::uint8_t
>(tmp.n[0] >> 40);
1567 out[27] =
static_cast<std::uint8_t
>(tmp.n[0] >> 32);
1568 out[28] =
static_cast<std::uint8_t
>(tmp.n[0] >> 24);
1569 out[29] =
static_cast<std::uint8_t
>(tmp.n[0] >> 16);
1570 out[30] =
static_cast<std::uint8_t
>(tmp.n[0] >> 8);
1571 out[31] =
static_cast<std::uint8_t
>(tmp.n[0] );
1577SECP256K1_FE52_FORCE_INLINE
1580 std::uint64_t L0 = n[0] | (n[1] << 52);
1581 std::uint64_t L1 = (n[1] >> 12) | (n[2] << 40);
1582 std::uint64_t L2 = (n[2] >> 24) | (n[3] << 28);
1583 std::uint64_t L3 = (n[3] >> 36) | (n[4] << 16);
1586#if defined(__GNUC__) || defined(__clang__)
1587 L3 = __builtin_bswap64(L3); L2 = __builtin_bswap64(L2);
1588 L1 = __builtin_bswap64(L1); L0 = __builtin_bswap64(L0);
1589#elif defined(_MSC_VER)
1590 L3 = _byteswap_uint64(L3); L2 = _byteswap_uint64(L2);
1591 L1 = _byteswap_uint64(L1); L0 = _byteswap_uint64(L0);
1594 auto bswap64 = [](std::uint64_t v) -> std::uint64_t {
1595 v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);
1596 v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16);
1597 return (v >> 32) | (v << 32);
1599 L3 = bswap64(L3); L2 = bswap64(L2);
1600 L1 = bswap64(L1); L0 = bswap64(L0);
1602 std::memcpy(out, &L3, 8);
1603 std::memcpy(out + 8, &L2, 8);
1604 std::memcpy(out + 16, &L1, 8);
1605 std::memcpy(out + 24, &L0, 8);
1612SECP256K1_FE52_FORCE_INLINE
1615 r.
n[0] = L[0] &
M52;
1616 r.n[1] = (L[0] >> 52) | ((L[1] & 0xFFFFFFFFFFULL) << 12);
1617 r.n[2] = (L[1] >> 40) | ((L[2] & 0xFFFFFFFULL) << 24);
1618 r.n[3] = (L[2] >> 28) | ((L[3] & 0xFFFFULL) << 36);
1619 r.n[4] = L[3] >> 16;
1626SECP256K1_FE52_FORCE_INLINE
1630 for (
int i = 0; i < 4; ++i) {
1631 std::uint64_t limb = 0;
1632 for (
int j = 0; j < 8; ++j) {
1633 limb = (limb << 8) | static_cast<std::uint64_t>(bytes[i * 8 + j]);
1639 static constexpr std::uint64_t P[4] = {
1640 0xFFFFFFFEFFFFFC2FULL, 0xFFFFFFFFFFFFFFFFULL,
1641 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
1647 for (
int i = 3; i >= 0; --i) {
1648 if (L[i] < P[i]) { ge_p =
false;
break; }
1649 if (L[i] > P[i]) {
break; }
1653 unsigned __int128 acc =
static_cast<unsigned __int128
>(L[0]) + (~P[0]) + 1;
1654 L[0] =
static_cast<std::uint64_t
>(acc);
1655 acc =
static_cast<unsigned __int128
>(L[1]) + (~P[1]) + (acc >> 64);
1656 L[1] =
static_cast<std::uint64_t
>(acc);
1657 acc =
static_cast<unsigned __int128
>(L[2]) + (~P[2]) + (acc >> 64);
1658 L[2] =
static_cast<std::uint64_t
>(acc);
1659 L[3] = L[3] + (~P[3]) +
static_cast<std::uint64_t
>(acc >> 64);
1661 return from_4x64_limbs(L);
1664SECP256K1_FE52_FORCE_INLINE
1666 return from_bytes(bytes.data());
1673SECP256K1_FE52_FORCE_INLINE
1681 FieldElement52 tmp = *
this;
1682 fe52_normalize_inline(tmp.n);
1684 fe52_inverse_safegcd_var(tmp.n, r.n);
1690#if defined(__GNUC__)
1691#pragma GCC diagnostic pop
1695#undef SECP256K1_FE52_FORCE_INLINE
std::array< std::uint64_t, 4 > limbs_type
static FieldElement from_limbs_raw(const limbs_type &limbs) noexcept
secp256k1::fast::FieldElement FieldElement
constexpr std::uint32_t P0
constexpr std::uint32_t P4
constexpr std::uint32_t P2
constexpr std::uint32_t P1
constexpr std::uint32_t P3
constexpr std::uint64_t M48
constexpr std::uint64_t M52
void add_assign(const FieldElement52 &rhs) noexcept
void half_assign() noexcept
static FieldElement52 from_fe(const FieldElement &fe) noexcept
void store_b32_prenorm(std::uint8_t *out) const noexcept
void mul_assign(const FieldElement52 &rhs) noexcept
FieldElement52 inverse_safegcd() const noexcept
static FieldElement52 from_bytes(const std::array< std::uint8_t, 32 > &bytes) noexcept
FieldElement to_fe() const noexcept
void mul_int_assign(std::uint32_t a) noexcept
void negate_assign(unsigned magnitude) noexcept
bool normalizes_to_zero() const noexcept
void normalize() noexcept
static FieldElement52 from_4x64_limbs(const std::uint64_t *limbs) noexcept
static FieldElement52 zero() noexcept
void conditional_negate_assign(std::int32_t sign_mask) noexcept
FieldElement52 operator*(const FieldElement52 &rhs) const noexcept
FieldElement52 half() const noexcept
void to_bytes_into(std::uint8_t *out) const noexcept
void square_inplace() noexcept
bool normalizes_to_zero_var() const noexcept
FieldElement52 square() const noexcept
void normalize_weak() noexcept
FieldElement52 negate(unsigned magnitude) const noexcept
static FieldElement52 one() noexcept
FieldElement52 operator+(const FieldElement52 &rhs) const noexcept