51 static constexpr int BATCH = 62;
78 "REDUCE_INTERVAL too large for reduce_to_canonical iteration bound");
84 :
l{ x.
data[0], x.data[1], x.data[2], x.data[3], 0 }
95 bool is_zero() const noexcept {
return (
l[0] |
l[1] |
l[2] |
l[3] |
l[4]) == 0; }
100 for (
int i = 0; i <
N; ++i) {
102 c = (c && v == 0) ? 1 : 0;
134 u64 p_inv_mod_2k)
noexcept;
140 return (0ULL - r_inv) & ((1ULL <<
BATCH) - 1);
144 static constexpr int N = 5;
150 for (
int i = 0; i <
N; ++i) {
151 __uint128_t s = (__uint128_t)
l[i] +
b.l[i] + c;
159 for (
int i = 0; i <
N; ++i) {
160 __uint128_t s = (__uint128_t)
l[i] - (__uint128_t)
b.l[i] - borrow;
162 borrow = ((
i64)(s >> 64) < 0) ? 1 : 0;
168 if (a_top != b_top) {
169 return a_top > b_top;
171 for (
int i =
N - 2; i >= 0; --i) {
172 if (
l[i] !=
b.l[i]) {
173 return l[i] >
b.l[i];
187 for (
int i = 0; i < 4; ++i) {
188 c += (__int128)
a * (__int128)(
u64)x.l[i] + (__int128)
b * (__int128)(
u64)y.l[i];
192 c += (__int128)
a * (__int128)(
i64)x.l[4] + (__int128)
b * (__int128)(
i64)y.l[4];
194 out[5] = (
u64)(c >> 64);
206 for (
int i = 0; i < 4; ++i) {
207 r.
l[i] = (t[i] >> 62) | (t[i + 1] << 2);
209 r.
l[4] = (t[4] >> 62) | (t[5] << 2);
227 i64 u = 1, v = 0, q = 0, r = 1;
228 for (
int i = 0; i <
BATCH; ++i) {
231 u64 nf = g_lo, ng = (g_lo - f_lo) >> 1;
232 i64 nu = q << 1, nv = r << 1, nq = q - u, nr = r - v;
241 g_lo = (g_lo + f_lo) >> 1;
255 return { u, v, q, r };
264 u64 p_inv_mod_2k)
noexcept
266 constexpr u64 MASK_BATCH = (1ULL <<
BATCH) - 1;
278 u64 k = ((0ULL - t[0]) * p_inv_mod_2k) & MASK_BATCH;
281 for (
int i = 0; i < 5; ++i) {
282 __uint128_t prod = (__uint128_t)k * (
u64)p.l[i] + carry;
284 carry = (
u64)(prod >> 64);
288 for (
int i = 0; i < 6; ++i) {
289 __uint128_t s = (__uint128_t)t[i] + kp[i] + c;
296 apply_corrected_row(m.u, d, m.v, e, nd);
297 apply_corrected_row(m.q, d, m.r, e, ne);
309using State = Wasm9x29;
324template <
class S = State>
330 S P(p), f = P, g(
a), d, e = S::one();
333 for (
int i = 0; i < S::NUM_ITERATIONS; ++i) {
334 DivstepMatrix m = S::compute_divstep_matrix(delta, f.low_64(), g.low_64());
335 S::apply_divstep_matrix(m, f, g, d, e, P, p_inv_mod_2k);
339 if ((i + 1) % S::REDUCE_INTERVAL == 0) {
340 d.reduce_to_canonical(P);
341 e.reduce_to_canonical(P);
344 d.reduce_to_canonical(P);
345 if (f.is_negative()) {
347 d.reduce_to_canonical(P);
349 return d.to_uint256();
363#if defined(__SIZEOF_INT128__) || defined(__wasm__)
364 T::modulus_3 < (1ULL << 63);
static DivstepMatrix compute_divstep_matrix(i64 &delta, u64 f_lo, u64 g_lo) noexcept
bool is_zero() const noexcept
void sub_inplace(const Native5x64 &b) noexcept
static constexpr u64 p_inv_mod_2k_from_montgomery_r_inv(u64 r_inv) noexcept
Native5x64(const uint256_t &x) noexcept
void reduce_to_canonical(const Native5x64 &p) noexcept
static Native5x64 one() noexcept
void add_inplace(const Native5x64 &b) noexcept
static constexpr int REDUCE_INTERVAL
bool ge(const Native5x64 &b) const noexcept
u64 low_64() const noexcept
static constexpr int NUM_ITERATIONS
bool is_negative() const noexcept
uint256_t to_uint256() const noexcept
static void apply_divstep_matrix(const DivstepMatrix &m, Native5x64 &f, Native5x64 &g, Native5x64 &d, Native5x64 &e, const Native5x64 &p, u64 p_inv_mod_2k) noexcept
static constexpr int REDUCE_TO_CANONICAL_MAX_ITERS
static constexpr int BATCH
constexpr bool supported_v
constexpr u64 p_inv_mod_2k_from_montgomery_r_inv(u64 r_inv) noexcept
uint256_t invert_vartime(const uint256_t &a, const uint256_t &p, u64 p_inv_mod_2k) noexcept
Variable-time safegcd inverse (Bernstein-Yang TCHES 2019, Pornin 2020 §4).
static Native5x64 arithmetic_shift_by_batch(const u64 t[6]) noexcept
static void signed_linear_combination(i64 a, const Native5x64 &x, i64 b, const Native5x64 &y, u64 out[6]) noexcept