14#include <gtest/gtest.h>
23constexpr size_t LIMB_BITS_U64 = 64;
24constexpr size_t NUM_LIMBS_U64 = 4;
25constexpr size_t NUM_LIMBS_U32 = 8;
26constexpr size_t MAX_BITS = 256;
43uint32_t reference_packed_digit(
const uint64_t* scalar_data,
size_t bit_offset,
size_t window_bits)
45 auto bit_at = [&](int64_t i) -> uint64_t {
46 if (i < 0 ||
static_cast<size_t>(i) >= MAX_BITS) {
49 return (scalar_data[
static_cast<size_t>(i) / LIMB_BITS_U64] >> (
static_cast<size_t>(i) % LIMB_BITS_U64)) &
53 for (
size_t k = 0; k <= window_bits; ++k) {
54 const int64_t bit_idx =
static_cast<int64_t
>(bit_offset) +
static_cast<int64_t
>(k) - 1;
55 raw |=
static_cast<uint32_t
>(bit_at(bit_idx)) << k;
57 const uint32_t neg = (raw >> window_bits) & 1U;
58 const uint32_t val_mask = (uint32_t{ 1 } << window_bits) - 1;
59 const uint32_t encode = (raw + 1) >> 1;
60 const uint32_t bucket = ((encode - neg) ^ (uint32_t{ 0 } - neg)) & val_mask;
61 return (neg << 31) | bucket;
70 for (
size_t i = 0; i < NUM_LIMBS_U64; ++i) {
71 out[i] =
engine.get_random_uint64();
80 return reinterpret_cast<const uint32_t*
>(s.data());
87uint32_t production_scalar_path(
const uint64_t* scalar_data,
size_t bit_offset,
size_t window_bits)
97 sp.slice_localised_to_one_u64,
111 const cnst::SimdU32x4 lo_mask_v{ sp.lo_mask, sp.lo_mask, sp.lo_mask, sp.lo_mask };
112 const cnst::SimdU32x4 hi_mask_v{ sp.hi_mask, sp.hi_mask, sp.hi_mask, sp.hi_mask };
114 const uint32_t val_mask_scalar = (uint32_t{ 1 } << window_bits) - 1;
115 const cnst::SimdU32x4 val_mask{ val_mask_scalar, val_mask_scalar, val_mask_scalar, val_mask_scalar };
117 const uint32_t* s0 = as_u32(scalars[0]);
118 const uint32_t* s1 = as_u32(scalars[1]);
119 const uint32_t* s2 = as_u32(scalars[2]);
120 const uint32_t* s3 = as_u32(scalars[3]);
122 const uint32_t wb_u32 =
static_cast<uint32_t
>(window_bits);
124 case cnst::ConstantineSlicePath::Localised:
126 out, s0, s1, s2, s3, sp.lo_limb, sp.lo_off, lo_mask_v, one_v, val_mask, wb_u32);
128 case cnst::ConstantineSlicePath::Bottom:
130 out, s0, s1, s2, s3, sp.hi_limb, sp.lo_bits, hi_mask_v, one_v, val_mask, wb_u32);
132 case cnst::ConstantineSlicePath::Boundary:
157TEST(PippengerConstantine, ScalarMatchesReferenceOracleAllWindowBits)
159 constexpr size_t TRIALS_PER_SHAPE = 32;
164 for (
size_t window_bits = 1; window_bits <= 19; ++window_bits) {
165 for (
size_t bit_offset = 0; bit_offset <= 255; ++bit_offset) {
166 for (
size_t t = 0; t < TRIALS_PER_SHAPE; ++t) {
167 const auto s = random_scalar_limbs();
168 const uint32_t got = production_scalar_path(s.data(), bit_offset, window_bits);
169 const uint32_t want = reference_packed_digit(s.data(), bit_offset, window_bits);
170 ASSERT_EQ(got, want) <<
"window_bits=" << window_bits <<
" bit_offset=" << bit_offset <<
" trial=" << t;
182TEST(PippengerConstantine, SimdX4MatchesScalarPathLanewise)
184 constexpr size_t TRIALS_PER_SHAPE = 16;
185 bool saw_localised =
false;
186 bool saw_bottom =
false;
187 bool saw_boundary =
false;
192 for (
size_t window_bits = 1; window_bits <= 19; ++window_bits) {
193 for (
size_t bit_offset = 0; bit_offset <= 255; ++bit_offset) {
196 case cnst::ConstantineSlicePath::Localised:
197 saw_localised =
true;
199 case cnst::ConstantineSlicePath::Bottom:
202 case cnst::ConstantineSlicePath::Boundary:
206 for (
size_t t = 0; t < TRIALS_PER_SHAPE; ++t) {
208 random_scalar_limbs(), random_scalar_limbs(), random_scalar_limbs(), random_scalar_limbs()
210 alignas(16) std::array<uint32_t, 4> got_simd{};
211 production_simd_path(scalars.data(), bit_offset, window_bits, got_simd.data());
212 for (
size_t lane = 0; lane < 4; ++lane) {
213 const uint32_t want = production_scalar_path(scalars[lane].
data(), bit_offset, window_bits);
214 ASSERT_EQ(got_simd[lane], want)
215 <<
"window_bits=" << window_bits <<
" bit_offset=" << bit_offset <<
" lane=" << lane;
222 EXPECT_TRUE(saw_localised);
223 EXPECT_TRUE(saw_bottom);
224 EXPECT_TRUE(saw_boundary);
234TEST(PippengerConstantine, RoundTripIdentityMatchesScalarMod2N)
236 constexpr size_t TOTAL_BITS = 254;
237 constexpr size_t TRIALS = 64;
240 for (
size_t window_bits = 1; window_bits <= 19; ++window_bits) {
241 for (
size_t t = 0; t < TRIALS; ++t) {
242 const auto s = random_scalar_limbs();
252 size_t bit_offset = 0;
253 size_t bits_remaining = TOTAL_BITS + 2;
254 while (bits_remaining > 0) {
255 const size_t wb = std::min(window_bits, bits_remaining);
256 const uint32_t packed = production_scalar_path(s.data(), bit_offset, wb);
257 const uint32_t neg = packed >> 31;
258 const uint32_t bucket = packed & ((uint32_t{ 1 } << wb) - 1);
259 const int32_t signed_val = (neg != 0U) ? -
static_cast<int32_t
>(bucket) :
static_cast<int32_t
>(bucket);
260 signed_digits.emplace_back(signed_val, bit_offset);
262 bits_remaining -= wb;
268 for (
const auto& [v, off] : signed_digits) {
278 EXPECT_EQ(acc, scalar_val) <<
"window_bits=" << window_bits <<
" trial=" << t;
287TEST(PippengerConstantine, EdgeCases)
293 for (
size_t wb = 1; wb <= 19; ++wb) {
294 for (
size_t off = 0; off <= 255; ++off) {
295 EXPECT_EQ(production_scalar_path(zero.data(), off, wb), uint32_t{ 0 })
296 <<
"zero scalar wb=" << wb <<
" off=" << off;
303 EXPECT_TRUE(sp_bottom.is_bottom_window);
311 auto top_aligned = random_scalar_limbs();
312 constexpr size_t window_bits = 12;
313 for (
size_t bit_offset = 240; bit_offset <= 255; ++bit_offset) {
314 const uint32_t got = production_scalar_path(top_aligned.data(), bit_offset, window_bits);
315 const uint32_t want = reference_packed_digit(top_aligned.data(), bit_offset, window_bits);
316 EXPECT_EQ(got, want) <<
"top window bit_offset=" << bit_offset;
324 EXPECT_TRUE(sp_local.slice_localised_to_one_u64);
330 EXPECT_FALSE(sp_boundary.slice_localised_to_one_u64);
344TEST(PippengerConstantine, NamedSliceShapes)
360 {
"bottom_wb12", 0, 12, cnst::ConstantineSlicePath::Bottom,
false },
361 {
"bottom_wb2", 0, 2, cnst::ConstantineSlicePath::Bottom,
false },
362 {
"bottom_wb19", 0, 19, cnst::ConstantineSlicePath::Bottom,
false },
364 {
"local_lo_u32", 10, 12, cnst::ConstantineSlicePath::Localised,
true },
367 {
"local_u64_boundary_u32", 31, 12, cnst::ConstantineSlicePath::Boundary,
true },
369 {
"boundary_u64_at_63", 60, 12, cnst::ConstantineSlicePath::Boundary,
false },
370 {
"boundary_u64_at_127", 124, 12, cnst::ConstantineSlicePath::Boundary,
false },
371 {
"boundary_u64_at_191", 188, 12, cnst::ConstantineSlicePath::Boundary,
false },
373 {
"boundary_u32_at_31", 30, 4, cnst::ConstantineSlicePath::Boundary,
true },
376 {
"top_clamped_wb12", 246, 12, cnst::ConstantineSlicePath::Boundary,
false },
378 {
"top_wb1_final", 254, 1, cnst::ConstantineSlicePath::Localised,
true },
380 {
"local_mid_u64", 80, 12, cnst::ConstantineSlicePath::Localised,
true },
383 auto s = random_scalar_limbs();
384 for (
const auto& c : cases) {
388 EXPECT_EQ(sp_u64.slice_localised_to_one_u64, c.u64_localised) <<
"case=" << c.name;
391 const uint32_t got = production_scalar_path(s.data(), c.bit_offset, c.window_bits);
392 const uint32_t want = reference_packed_digit(s.data(), c.bit_offset, c.window_bits);
393 EXPECT_EQ(got, want) <<
"case=" << c.name;
408TEST(PippengerConstantine, ParamClassifierU64U32Consistency)
410 for (
size_t wb = 1; wb <= 19; ++wb) {
411 for (
size_t bit_offset = 0; bit_offset <= 255; ++bit_offset) {
417 const bool u64_says_bottom = (sp_u64.lo_mask == 0);
418 EXPECT_EQ(u64_says_bottom, sp_u32.is_bottom_window)
419 <<
"bottom classification disagrees at bit_offset=" << bit_offset <<
" wb=" << wb;
424 if (!sp_u32.is_bottom_window) {
425 const size_t u64_lookback = sp_u64.lo_limb * 64 + sp_u64.lo_off;
426 const size_t u32_lookback = sp_u32.lo_limb * 32 + sp_u32.lo_off;
427 EXPECT_EQ(u64_lookback, u32_lookback)
428 <<
"lookback bit disagrees at bit_offset=" << bit_offset <<
" wb=" << wb;
429 EXPECT_EQ(u64_lookback, bit_offset - 1)
430 <<
"lookback bit ≠ bit_offset-1 at bit_offset=" << bit_offset <<
" wb=" << wb;
437 if (sp_u64.slice_localised_to_one_u64 && bit_offset > 0) {
439 <<
"u64-localised but u32 classifier says Bottom at bit_offset=" << bit_offset <<
" wb=" << wb;
uint32_t get_constantine_packed_digit(const uint64_t *scalar_data, uint32_t lo_limb, uint32_t hi_limb, uint32_t lo_off, uint32_t lo_bits, uint32_t lo_mask, uint32_t hi_mask, bool slice_localised_to_one_u64, size_t window_bits) noexcept
Read (window_bits+1) bits from scalar_data (uint64 limbs) using precomputed slice params and apply Co...
ConstantineSlicePath classify_slice_path_u32(const ConstantineSliceParamsU32 &sp) noexcept
void store_constantine_packed_digits_x4_bottom(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t hi_limb, uint32_t lo_bits, SimdU32x4 hi_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
void store_constantine_packed_digits_x4_boundary(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t lo_limb, uint32_t hi_limb, uint32_t lo_off, uint32_t lo_bits, SimdU32x4 lo_mask_v, SimdU32x4 hi_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
uint32_t __attribute__((vector_size(16))) SimdU32x4
ConstantineSliceParams compute_constantine_slice_params(size_t bit_offset, size_t window_bits, size_t num_uint64_limbs) noexcept
void store_constantine_packed_digits_x4_localised(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t lo_limb, uint32_t lo_off, SimdU32x4 lo_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
ConstantineSliceParamsU32 compute_constantine_slice_params_u32(size_t bit_offset, size_t window_bits, size_t num_u32_limbs) noexcept
field< Bn254FrParams > fr
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
TEST(PippengerConstantine, ScalarMatchesReferenceOracleAllWindowBits)