UltrafastSecp256k1 3.50.0
Ultra high-performance secp256k1 elliptic curve cryptography library
Loading...
Searching...
No Matches
field_optimal.hpp
Go to the documentation of this file.
1#ifndef SECP256K1_FIELD_OPTIMAL_HPP
2#define SECP256K1_FIELD_OPTIMAL_HPP
3#pragma once
4
5// ============================================================================
6// Compile-Time Optimal Field Element Selection
7// ============================================================================
8//
9// Selects the best FieldElement representation per platform based on
10// measured benchmarks (Release builds, Feb 2026).
11//
12// KEY PRINCIPLE: Only activate alternative representations where they
13// actually beat the native FieldElement (4x64 + platform asm).
14// Where hand-tuned asm already exists and is faster, stay native.
15//
16// Decision matrix (measured, all verified Feb 2026):
17//
18// Platform Native asm? Optimal Mul Winner (verified)
19// -------------------------------------------------------------------------
20// x86-64 (BMI2/ADX) YES (GAS/MASM) FieldElement52 5x52 21ns vs native 44ns (+2.1x)
21// ARM64 (RK3588) YES (MUL/UMULH) FieldElement26 10x26 73ns vs native 85ns (+1.16x)
22// RISC-V 64 (RVV) YES (RV asm) FieldElement52 5x52 142ns vs native 173ns (+1.22x)
23// STM32 (Cortex-M4) YES (Comba) FieldElement native 15.4us vs 10x26 18.1us
24// ESP32-S3 (LX7) NO FieldElement native 6.0us vs 10x26 8.6us
25// ESP32-PICO (LX6) NO FieldElement26 10x26 6.4us vs native 7.1us (+1.11x)
26// MSVC x64 varies FieldElement no __int128
27// ARM32 generic NO FieldElement26 10x26 expected
28//
29// USAGE:
30// #include "secp256k1/field_optimal.hpp"
31// using FE = secp256k1::fast::OptimalFieldElement;
32//
33// FE a = secp256k1::fast::to_optimal(some_4x64_element);
34// FE c = a * b; // uses best mul for this platform
35// FieldElement result = secp256k1::fast::from_optimal(c);
36//
37// API contract (all representations provide):
38// operator*, square(), operator+, add_assign(), negate(), half(),
39// normalize(), normalize_weak(), is_zero(), operator==,
40// from_fe()/to_fe() for FieldElement <-> optimal conversion.
41// ============================================================================
42
43#include "secp256k1/config.hpp"
44#include "secp256k1/field.hpp"
45
46// -- Step 1: Platforms where native asm already beats alternatives ------------
47//
48// On these platforms, hand-tuned assembly in FieldElement is already the
49// fastest option. Do NOT override with 5x52 or 10x26 -- it would be slower.
50//
51// SECP256K1_OPTIMAL_TIER_52 -> 5x52 limbs (64-bit + __int128)
52// SECP256K1_OPTIMAL_TIER_26 -> 10x26 limbs (32-bit, native 32x32->64)
53// SECP256K1_OPTIMAL_TIER_64 -> 4x64 limbs (native asm / fallback)
54//
55
56#if defined(SECP256K1_PLATFORM_STM32)
57 // -- STM32 Cortex-M with Comba asm: native FieldElement wins --
58 // Measured: native Mul 15.4us vs 10x26 Mul 18.1us (-18%)
59 // Native 32-bit Comba (esp32_mul_mod) is already optimal.
60 #define SECP256K1_OPTIMAL_TIER_64 1
61
62#elif defined(SECP256K1_HAS_ARM64_ASM) || defined(__aarch64__) || defined(_M_ARM64)
63 // -- ARM64 (MUL/UMULH asm): 10x26 wins everything --
64 // Verified on RK3588 (Cortex-A55/A76), Feb 2026:
65 // 10x26 Mul 73ns vs native 85ns vs 5x52 100ns -> 10x26 best
66 // 10x26 Sqr 53ns vs native 66ns vs 5x52 72ns -> 10x26 best
67 // 10x26 Add 7ns vs native 19ns vs 5x52 11ns -> 10x26 best
68 // Despite having 64-bit MUL, the 10x26 two-accumulator algorithm
69 // wins because it avoids __int128 overhead on AArch64 compilers.
70 #define SECP256K1_OPTIMAL_TIER_26 1
71 #define SECP256K1_OPTIMAL_ARM64 1
72 #include "secp256k1/field_26.hpp"
73
74// -- Step 2: Platforms where 5x52 wins ---------------------------------------
75
76#elif !defined(SECP256K1_NO_INT128) && \
77 (defined(__SIZEOF_INT128__) || \
78 (defined(__GNUC__) && !defined(__i386__) && !defined(__arm__) && !defined(__xtensa__)))
79 // -- 64-bit with __int128 (x86-64, RISC-V 64): 5x52 wins --
80 // Verified Feb 2026:
81 // x86-64: Mul 21ns vs native 44ns (+2.1x), Sqr 15ns vs 40ns (+2.7x)
82 // RISC-V: Mul 142ns vs native 173ns (+1.2x), Sqr 102ns vs 160ns (+1.6x)
83 // 5x52 lazy-reduction with __int128 beats native asm.
84 #define SECP256K1_OPTIMAL_TIER_52 1
85 #include "secp256k1/field_52.hpp"
86
87#elif defined(SECP256K1_32BIT)
88 // 32-bit platform -- per-MCU selection
89 #if defined(CONFIG_IDF_TARGET_ESP32S3) || defined(SECP256K1_FORCE_TIER_64)
90 // -- ESP32-S3 (LX7): native 4x64 emulated mul wins --
91 // Verified: Mul 5,818ns vs 10x26 8,639ns (+1.49x)
92 // Sqr 4,799ns vs 10x26 5,229ns (+1.09x)
93 #define SECP256K1_OPTIMAL_TIER_64 1
94 #else
95 // -- ESP32-PICO (LX6), generic ARM32, RISC-V 32 --
96 // Verified PICO: Mul 6,405ns vs native 7,120ns (+1.11x)
97 // Sqr 3,972ns vs native 6,366ns (+1.60x)
98 // Add 408ns vs native 980ns (+2.40x)
99 #define SECP256K1_OPTIMAL_TIER_26 1
100 #include "secp256k1/field_26.hpp"
101 #endif
102
103#else
104 // 64-bit without __int128 (MSVC x64, or explicit SECP256K1_NO_INT128)
105 #define SECP256K1_OPTIMAL_TIER_64 1
106#endif
107
108
109namespace secp256k1::fast {
110
111// -- The type alias ----------------------------------------------------------
112#if defined(SECP256K1_OPTIMAL_TIER_52)
113 using OptimalFieldElement = FieldElement52;
114#elif defined(SECP256K1_OPTIMAL_TIER_26)
115 using OptimalFieldElement = FieldElement26;
116#else
118#endif
119
120// -- Compile-time tag --------------------------------------------------------
121enum class FieldTier : std::uint8_t { FE64, FE52, FE26 };
122
123#if defined(SECP256K1_OPTIMAL_TIER_52)
124 inline constexpr FieldTier kOptimalTier = FieldTier::FE52;
125 inline constexpr const char* kOptimalTierName = "5x52 (64-bit, __int128)";
126#elif defined(SECP256K1_OPTIMAL_TIER_26)
127 inline constexpr FieldTier kOptimalTier = FieldTier::FE26;
128 #if defined(SECP256K1_OPTIMAL_ARM64)
129 inline constexpr const char* kOptimalTierName = "10x26 (ARM64 -- wins all field ops)";
130 #else
131 inline constexpr const char* kOptimalTierName = "10x26 (32-bit native)";
132 #endif
133#else
135 #if defined(SECP256K1_PLATFORM_STM32)
136 inline constexpr const char* kOptimalTierName = "4x64 (STM32 Comba -- already optimal)";
137 #elif defined(CONFIG_IDF_TARGET_ESP32S3)
138 inline constexpr const char* kOptimalTierName = "4x64 (ESP32-S3 -- native wins mul)";
139 #else
140 inline constexpr const char* kOptimalTierName = "4x64 (baseline)";
141 #endif
142#endif
143
144// -- Conversion helpers ------------------------------------------------------
145// Zero-cost when OptimalFieldElement == FieldElement (identity).
146// Otherwise, bit-rearrangement (cheap compared to any mul/sqr).
147
148inline OptimalFieldElement to_optimal(const FieldElement& fe) noexcept {
149#if defined(SECP256K1_OPTIMAL_TIER_52)
150 return FieldElement52::from_fe(fe);
151#elif defined(SECP256K1_OPTIMAL_TIER_26)
152 return FieldElement26::from_fe(fe);
153#else
154 return fe;
155#endif
156}
157
158inline FieldElement from_optimal(const OptimalFieldElement& ofe) noexcept {
159#if defined(SECP256K1_OPTIMAL_TIER_52)
160 return ofe.to_fe();
161#elif defined(SECP256K1_OPTIMAL_TIER_26)
162 return ofe.to_fe();
163#else
164 return ofe;
165#endif
166}
167
168} // namespace secp256k1::fast
169
170#endif // SECP256K1_FIELD_OPTIMAL_HPP
constexpr const char * kOptimalTierName
FieldElement from_optimal(const OptimalFieldElement &ofe) noexcept
OptimalFieldElement to_optimal(const FieldElement &fe) noexcept
constexpr FieldTier kOptimalTier
FieldElement OptimalFieldElement
static FieldElement26 from_fe(const FieldElement &fe) noexcept
static FieldElement52 from_fe(const FieldElement &fe) noexcept