UltrafastSecp256k1 3.50.0
Ultra high-performance secp256k1 elliptic curve cryptography library
Loading...
Searching...
No Matches
point.hpp
Go to the documentation of this file.
1#ifndef C870F4A3_192C_4B96_9AE6_497D1885C5D9
2#define C870F4A3_192C_4B96_9AE6_497D1885C5D9
3
4#include <array>
5#include <cstddef>
6#include <cstdint>
7#include <string>
8#include <utility>
9#include <vector>
10#include "field.hpp"
11#include "scalar.hpp"
12
13// On 5x52-capable platforms, Point stores FieldElement52 internally
14// for zero-conversion-overhead point arithmetic.
15// FE52 native storage: only on 64-bit platforms with __int128
16// Excluded on Emscripten/WASM: wasm32 emulates __int128 via compiler intrinsics,
17// which is correct but gives no speed benefit over 4x64 FieldElement.
18// The 52-bit dual_scalar_mul_gen_point also builds huge static tables (8192 entries)
19// that are unnecessary for WASM targets.
20#if defined(__SIZEOF_INT128__) && !defined(SECP256K1_PLATFORM_ESP32) && !defined(SECP256K1_PLATFORM_STM32) && !defined(__EMSCRIPTEN__)
21 #ifndef SECP256K1_FAST_52BIT
22 #define SECP256K1_FAST_52BIT 1
23 #endif
24 #include "field_52.hpp"
25#endif
26
27namespace secp256k1::fast {
28
29// Platform-optimal default GLV window width for k*P (scalar_mul_with_plan).
30//
31// Larger window = fewer point additions in the wNAF loop, but larger precompute
32// table (2^(w-2) entries, each requiring a mixed add + z-ratio tracking).
33//
34// Tradeoff by platform (BIP-352 pipeline benchmark, 10K ops, median):
35// w=4: table=4 entries, ~33 adds per 128-bit GLV half-scalar
36// w=5: table=8 entries, ~26 adds per 128-bit GLV half-scalar
37// w=6: table=16 entries, ~21 adds (diminishing returns, precompute dominates)
38//
39// On in-order / narrow OoO cores (RISC-V U74, ARM Cortex-A55) where point_add
40// is expensive relative to precompute, w=5 saves more than it costs.
41// On wide OoO x86-64 with fast MULX, the tradeoff is roughly neutral for
42// single k*P but w=5 still wins the full pipeline.
43//
44// Override at call site: KPlan::from_scalar(k, 6) for batch-heavy workloads
45// where precompute is amortized across many points.
46#if defined(SECP256K1_GLV_WINDOW_WIDTH)
47 // CMake override: -DSECP256K1_GLV_WINDOW_WIDTH=6
48 // Or direct compiler flag: -DSECP256K1_GLV_WINDOW_WIDTH=6
49 static_assert(SECP256K1_GLV_WINDOW_WIDTH >= 4 && SECP256K1_GLV_WINDOW_WIDTH <= 7,
50 "SECP256K1_GLV_WINDOW_WIDTH must be in [4,7]");
51 inline constexpr uint8_t kDefaultGlvWindow = SECP256K1_GLV_WINDOW_WIDTH;
52#elif defined(__riscv) || defined(__aarch64__) || defined(_M_ARM64)
53 inline constexpr uint8_t kDefaultGlvWindow = 5;
54#elif defined(__x86_64__) || defined(_M_X64)
55 inline constexpr uint8_t kDefaultGlvWindow = 5;
56#else
57 inline constexpr uint8_t kDefaultGlvWindow = 4; // ESP32, WASM, unknown
58#endif
59
60// Fixed K x Variable Q optimization plan
61// Caches all K-dependent work: GLV decomposition + wNAF computation
62// Use this when you need to multiply many different points Q by the same scalar K
63// Maximum wNAF buffer length for a 256-bit scalar: 256 bits + 1 extra digit + 3 padding
64constexpr std::size_t kWnafBufLen = 260;
65
66struct KPlan {
67 uint8_t window_width; // wNAF window size (see kDefaultGlvWindow)
68 Scalar k1; // Decomposed scalar k1
69 Scalar k2; // Decomposed scalar k2
70 // wNAF digits stored in fixed-size stack buffers — no heap allocation per plan
71 std::array<int32_t, kWnafBufLen> wnaf1{};
72 std::size_t wnaf1_len{0};
73 std::array<int32_t, kWnafBufLen> wnaf2{};
74 std::size_t wnaf2_len{0};
75 bool neg1; // Sign flag for k1
76 bool neg2; // Sign flag for k2
77
78 // Factory: Create plan from scalar K
79 // w: wNAF window width (default: platform-optimal kDefaultGlvWindow)
80 static KPlan from_scalar(const Scalar& k, uint8_t w = kDefaultGlvWindow);
81};
82
83class Point {
84public:
86
87 static Point generator();
88 static Point infinity();
89 static Point from_affine(const FieldElement& x, const FieldElement& y);
90
91 // Developer-friendly: Create from hex strings (64 hex chars each)
92 // Example: Point::from_hex("09af57f4...", "0947e4f9...")
93 static Point from_hex(const std::string& x_hex, const std::string& y_hex);
94
95 FieldElement x() const;
96 FieldElement y() const;
97 bool is_infinity() const noexcept { return infinity_; }
98 bool is_gen() const noexcept { return is_generator_; }
99
100 // Split x-coordinate helpers for split-keys database format
101 // x_first_half(): returns first 16 bytes of x-coordinate
102 // x_second_half(): returns last 16 bytes of x-coordinate
103 std::array<uint8_t, 16> x_first_half() const;
104 std::array<uint8_t, 16> x_second_half() const;
105
106 // Direct access to Jacobian coordinates (for batch processing)
107#if defined(SECP256K1_FAST_52BIT)
108 FieldElement X() const noexcept;
109 FieldElement Y() const noexcept;
110 FieldElement z() const noexcept;
111 // Direct access to 5x52 internals (for hot paths)
112 const FieldElement52& X52() const noexcept { return x_; }
113 const FieldElement52& Y52() const noexcept { return y_; }
114 const FieldElement52& Z52() const noexcept { return z_; }
115#else
116 const FieldElement& X() const noexcept { return x_; }
117 const FieldElement& Y() const noexcept { return y_; }
118 const FieldElement& z() const noexcept { return z_; }
119#endif
120
121 Point add(const Point& other) const;
122 Point dbl() const;
123 Point scalar_mul(const Scalar& scalar) const;
124
125 // Optimized: Q * K where K is precomputed constant
126 // This will use GLV decomposition and precomputed tables
128
129 // Optimized: Q * K with pre-decomposed K (k1, k2, signs)
130 // Use this when K decomposition is done once at startup
131 // Runtime only computes: Q*k1 + phi(Q)*k2
133 bool neg1, bool neg2) const;
134
135 // Optimized: Q * K with precomputed wNAF digits
136 // This is the fastest version - all K-related work is done at compile time
137 // Runtime only does: table generation + interleaved addition
138 Point scalar_mul_precomputed_wnaf(const std::vector<int32_t>& wnaf1,
139 const std::vector<int32_t>& wnaf2,
140 bool neg1, bool neg2) const;
141 // Raw-pointer overload — no heap access, used by KPlan hot path
142 Point scalar_mul_precomputed_wnaf(const int32_t* wnaf1, std::size_t len1,
143 const int32_t* wnaf2, std::size_t len2,
144 bool neg1, bool neg2) const;
145
146 // Fixed K x Variable Q: Use precomputed KPlan for maximum speed
147 // All K-dependent work (GLV + wNAF) is cached in the plan
148 // Runtime only: phi(Q), table generation, Shamir's trick
149 Point scalar_mul_with_plan(const KPlan& plan) const;
150
151 // Negation: returns the opposite point on the curve
152 Point negate() const; // -(x, y) = (x, -y)
153
154 // Fast increment/decrement by generator (optimized with precomputed -G)
155 Point next() const; // this + G (returns new Point)
156 Point prev() const; // this - G (returns new Point)
157
158 // In-place mutable versions (modify this object directly)
159 // In-place variants: modify this directly (same perf as immutable next/prev)
160 void next_inplace(); // this += G (modifies this)
161 void prev_inplace(); // this -= G (modifies this)
162 void add_inplace(const Point& other); // this += other (modifies this, no allocation)
163 void sub_inplace(const Point& other); // this -= other (modifies this, no allocation)
164 // Branchless mixed-add against affine point (z=1): avoids runtime checks
165 void add_mixed_inplace(const FieldElement& ax, const FieldElement& ay);
166 void sub_mixed_inplace(const FieldElement& ax, const FieldElement& ay);
167#if defined(SECP256K1_FAST_52BIT)
168 // FE52-native mixed-add: avoids FE52->FE->FE52 roundtrip in hot loops.
169 // Used by effective-affine Strauss MSM where precomp is stored as FE52.
170 void add_mixed52_inplace(const FieldElement52& ax, const FieldElement52& ay);
171#endif
172 void dbl_inplace(); // this = 2*this (modifies this, no allocation)
173 void negate_inplace(); // this = -this (modifies this, no allocation)
174 // Optimized repeated addition by a fixed affine point (z=1)
176
177 // Y-parity check (single inversion, no full serialization)
178 bool has_even_y() const;
179
180 // Combined: returns (x_bytes, y_is_odd) with a single field inversion
181 std::pair<std::array<uint8_t, 32>, bool> x_bytes_and_parity() const;
182
183 // Fast x-only: 32-byte big-endian x-coordinate (no Y recovery).
184 // Saves one multiply vs x_bytes_and_parity() by skipping Z^(-3)*Y.
185 // Use when only x is needed (e.g. BIP-352 SHA-256 input, BIP-340 x-only).
186 std::array<uint8_t, 32> x_only_bytes() const;
187
188 // Batch normalize: convert N Jacobian points to affine with ONE inversion
189 // via Montgomery's trick. Cost: 1 inversion + 3(N-1) multiplications.
190 // For N=2048: ~9.5 ns/point vs ~1000 ns/point individually.
191 // out_x, out_y: output affine coordinates (caller-owned, size >= n).
192 // Skips infinity points (leaves output zero-filled).
193 static void batch_normalize(const Point* points, size_t n,
194 FieldElement* out_x, FieldElement* out_y);
195
196 // Batch to_compressed: serialize N Jacobian points using ONE inversion.
197 // out: caller-owned array of 33-byte compressed pubkeys, size >= n.
198 static void batch_to_compressed(const Point* points, size_t n,
199 std::array<uint8_t, 33>* out);
200
201 // Batch x_only_bytes: extract N x-coordinates using ONE inversion.
202 // out: caller-owned array of 32-byte x coords, size >= n.
203 static void batch_x_only_bytes(const Point* points, size_t n,
204 std::array<uint8_t, 32>* out);
205
206 // Normalize: convert Jacobian -> affine (Z=1) with ONE field inversion.
207 // After this call, all serialization methods become O(1) byte copies.
208 // Called automatically by scalar_mul/generator_mul/dual_scalar_mul.
209 void normalize();
210 bool is_normalized() const noexcept { return z_one_; }
211
212 // Dual scalar multiplication: a*G + b*P (4-stream GLV Shamir)
213 // Much faster than separate generator_mul(a) + scalar_mul(b) + add
214 static Point dual_scalar_mul_gen_point(const Scalar& a, const Scalar& b, const Point& P);
215
216 std::array<std::uint8_t, 33> to_compressed() const;
217 std::array<std::uint8_t, 65> to_uncompressed() const;
218
219#if defined(SECP256K1_FAST_52BIT)
220 FieldElement x_raw() const noexcept;
221 FieldElement y_raw() const noexcept;
222 FieldElement z_raw() const noexcept;
223#else
224 const FieldElement& x_raw() const noexcept { return x_; }
225 const FieldElement& y_raw() const noexcept { return y_; }
226 const FieldElement& z_raw() const noexcept { return z_; }
227#endif
228
230#if defined(SECP256K1_FAST_52BIT)
231 // Zero-conversion factory: constructs Point directly from FE52 Jacobian coords
232 static Point from_jacobian52(const FieldElement52& x, const FieldElement52& y, const FieldElement52& z, bool infinity);
233 // Zero-conversion affine construction: (x, y, z=1) directly in FE52
234 static Point from_affine52(const FieldElement52& x, const FieldElement52& y);
235#endif
236
237private:
238 Point(const FieldElement& x, const FieldElement& y, const FieldElement& z, bool infinity);
239#if defined(SECP256K1_FAST_52BIT)
240 // Zero-conversion constructor: directly initializes FE52 members
241 Point(const FieldElement52& x, const FieldElement52& y, const FieldElement52& z, bool infinity, bool is_gen);
242
243 // Convert z_ (FE52) -> normalized FieldElement + check for zero.
244 // Returns true if z is nonzero (normal case); false if z is zero.
245 // On true: out_z_fe contains the normalized 4x64 FieldElement.
246 // Used by x(), y(), to_compressed(), to_uncompressed(), has_even_y(),
247 // x_bytes_and_parity() to avoid duplicating the defensive Z=0 guard.
248 bool z_fe_nonzero(FieldElement& out_z_fe) const noexcept;
249#endif
250
251#if defined(SECP256K1_FAST_52BIT)
255#else
256 FieldElement x_;
257 FieldElement y_;
258 FieldElement z_;
259#endif
260 bool infinity_;
261 bool is_generator_;
262 bool z_one_ = false; // true when Z == 1 (point is affine-normalized)
263};
264
265// Self-test: Verify arithmetic correctness with known test vectors
266// Returns true if all tests pass, false otherwise
267// Run this after any code changes to ensure math is correct!
268// Set verbose=true to see detailed output for each test
269bool Selftest(bool verbose = false);
270
271} // namespace secp256k1::fast
272
273
274#endif /* C870F4A3_192C_4B96_9AE6_497D1885C5D9 */
static Point from_hex(const std::string &x_hex, const std::string &y_hex)
static Point from_affine(const FieldElement &x, const FieldElement &y)
static Point dual_scalar_mul_gen_point(const Scalar &a, const Scalar &b, const Point &P)
const FieldElement & z_raw() const noexcept
Definition point.hpp:226
static Point from_jacobian_coords(const FieldElement &x, const FieldElement &y, const FieldElement &z, bool infinity)
static void batch_x_only_bytes(const Point *points, size_t n, std::array< uint8_t, 32 > *out)
std::array< std::uint8_t, 65 > to_uncompressed() const
void add_affine_constant_inplace(const FieldElement &ax, const FieldElement &ay)
Point negate() const
const FieldElement & Y() const noexcept
Definition point.hpp:117
std::pair< std::array< uint8_t, 32 >, bool > x_bytes_and_parity() const
static void batch_to_compressed(const Point *points, size_t n, std::array< uint8_t, 33 > *out)
bool is_gen() const noexcept
Definition point.hpp:98
std::array< uint8_t, 32 > x_only_bytes() const
static void batch_normalize(const Point *points, size_t n, FieldElement *out_x, FieldElement *out_y)
Point scalar_mul(const Scalar &scalar) const
void sub_mixed_inplace(const FieldElement &ax, const FieldElement &ay)
void add_inplace(const Point &other)
bool is_infinity() const noexcept
Definition point.hpp:97
Point scalar_mul_with_plan(const KPlan &plan) const
bool is_normalized() const noexcept
Definition point.hpp:210
FieldElement x() const
FieldElement y() const
Point add(const Point &other) const
const FieldElement & z() const noexcept
Definition point.hpp:118
const FieldElement & y_raw() const noexcept
Definition point.hpp:225
void add_mixed_inplace(const FieldElement &ax, const FieldElement &ay)
bool has_even_y() const
static Point generator()
Point scalar_mul_precomputed_wnaf(const std::vector< int32_t > &wnaf1, const std::vector< int32_t > &wnaf2, bool neg1, bool neg2) const
Point scalar_mul_precomputed_wnaf(const int32_t *wnaf1, std::size_t len1, const int32_t *wnaf2, std::size_t len2, bool neg1, bool neg2) const
const FieldElement & x_raw() const noexcept
Definition point.hpp:224
Point scalar_mul_predecomposed(const Scalar &k1, const Scalar &k2, bool neg1, bool neg2) const
void sub_inplace(const Point &other)
Point scalar_mul_precomputed_k(const Scalar &k) const
const FieldElement & X() const noexcept
Definition point.hpp:116
static Point infinity()
std::array< uint8_t, 16 > x_first_half() const
std::array< std::uint8_t, 33 > to_compressed() const
std::array< uint8_t, 16 > x_second_half() const
bool Selftest(bool verbose)
constexpr std::size_t kWnafBufLen
Definition point.hpp:64
constexpr uint8_t kDefaultGlvWindow
Definition point.hpp:57
std::array< int32_t, kWnafBufLen > wnaf2
Definition point.hpp:73
static KPlan from_scalar(const Scalar &k, uint8_t w=kDefaultGlvWindow)
std::array< int32_t, kWnafBufLen > wnaf1
Definition point.hpp:71
std::size_t wnaf1_len
Definition point.hpp:72
std::size_t wnaf2_len
Definition point.hpp:74