45template <
typename Run>
double median_ns(Run&& run,
size_t iters)
48 for (
size_t i = 0; i < iters; ++i) {
49 const auto t0 = std::chrono::steady_clock::now();
51 const auto t1 = std::chrono::steady_clock::now();
52 samples[i] =
static_cast<double>(std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0).count());
54 std::sort(samples.begin(), samples.end());
55 return samples[samples.size() / 2];
60size_t pick_iters(
size_t n)
83void print_matrix_header(
const std::vector<size_t>& ns)
93void print_matrix_row(
const char* label,
const std::vector<double>& ns_per_run,
const std::vector<bool>& mask)
96 for (
size_t i = 0; i < ns_per_run.size(); ++i) {
111 std::printf(
"\n=== MIN_JACOBIAN_SIZE crossover sweep (single-threaded straus_msm vs jac_fast, ns) ===\n\n");
112 std::printf(
"%-8s %12s %12s %10s\n",
"N",
"straus",
"jac_st",
"delta_%");
114 size_t crossover = 0;
115 constexpr size_t REPEATS = 3;
116 for (
size_t n = 32; n <= 64; n += 2) {
119 const size_t iters = pick_iters(n);
123 for (
size_t r = 0; r < REPEATS; ++r) {
124 straus_samples[r] = median_ns(
126 volatile auto v = Element::straus_msm(points, scalars_view);
130 jac_samples[r] = median_ns(
133 bb::scalar_multiplication::round_parallel_detail::pippenger_round_parallel_jacobian_fast<Curve>(
134 scalars_view, points, SIZE_MAX);
139 std::sort(straus_samples.begin(), straus_samples.end());
140 std::sort(jac_samples.begin(), jac_samples.end());
141 const double straus = straus_samples[REPEATS / 2];
142 const double jac = jac_samples[REPEATS / 2];
143 const double delta_pct = 100.0 * (jac - straus) / straus;
144 std::printf(
"%-8zu %12.0f %12.0f %+10.2f\n", n, straus, jac, delta_pct);
145 if (crossover == 0 && jac < straus) {
149 if (crossover != 0) {
150 std::printf(
"\nFirst N where jac_fast_st_always beats straus_msm: %zu\n", crossover);
152 std::printf(
"\nstraus_msm wins across the entire 32..64 sweep.\n");
159 constexpr size_t MAX_N = 1U << 14;
163 auto srs = bb::srs::get_crs_factory<Curve>()->get_crs(MAX_N);
167 std::vector<Fr> scalars(MAX_N);
168 for (
auto& s : scalars) {
173 (void)&run_crossover_sweep;
180 const std::vector<size_t> ns = {
181 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96,
182 128, 192, 256, 384, 512, 768, 1024, 2048, 4096, 8192, 12288, 16384,
188 std::vector<bool> straus_mask(ns.size());
189 std::vector<bool> internal_mask(ns.size());
190 for (
size_t i = 0; i < ns.size(); ++i) {
191 straus_mask[i] = (ns[i] < 256);
192 internal_mask[i] = (ns[i] >= 64);
194 std::vector<bool> all_mask(ns.size(),
true);
202 for (
size_t col = 0; col < ns.size(); ++col) {
203 const size_t n = ns[col];
209 const size_t iters = pick_iters(n);
211 row_jac_mt[col] = median_ns(
214 bb::scalar_multiplication::round_parallel_detail::pippenger_round_parallel_jacobian_fast<Curve>(
215 scalars_view, points, 1);
220 row_jac_st[col] = median_ns(
223 bb::scalar_multiplication::round_parallel_detail::pippenger_round_parallel_jacobian_fast<Curve>(
224 scalars_view, points, SIZE_MAX);
229 row_threaded[col] = median_ns(
231 volatile auto r = bb::scalar_multiplication::trivial_msm_threaded<Curve>(poly_scalars, points);
236 if (straus_mask[col]) {
237 row_straus[col] = median_ns(
239 volatile auto r = Element::straus_msm(points, scalars_view);
245 if (internal_mask[col]) {
246 row_internal[col] = median_ns(
249 bb::scalar_multiplication::pippenger_round_parallel<Curve>(mut_poly_scalars, points);
256 std::printf(
"\n=== small-MSM crossover matrix (median wall-clock ns per run, BN254) ===\n\n");
257 print_matrix_header(ns);
258 print_matrix_row(
"jac_fast_mt_always", row_jac_mt, all_mask);
259 print_matrix_row(
"jac_fast_st_always", row_jac_st, all_mask);
260 print_matrix_row(
"small_mul_threaded", row_threaded, all_mask);
261 print_matrix_row(
"straus_msm", row_straus, straus_mask);
262 print_matrix_row(
"pippenger_internal", row_internal, internal_mask);
266 for (
size_t i = 0; i < ns.size(); ++i) {
273 {
"jac_st", row_jac_st[i],
true },
274 {
"threaded", row_threaded[i],
true },
275 {
"straus", row_straus[i], straus_mask[i] },
276 {
"internal", row_internal[i], internal_mask[i] } } };
277 const Cand* best =
nullptr;
278 for (
const Cand& cand : c) {
279 if (cand.active && (best ==
nullptr || cand.v < best->v)) {
283 std::printf(
" N=%-6zu best=%-12s (%.0f ns)\n", ns[i], best->name, best->v);
typename Group::element Element
typename Group::affine_element AffineElement
RNG & get_debug_randomness(bool reset, std::uint_fast64_t seed)
std::filesystem::path bb_crs_path()
void init_file_crs_factory(const std::filesystem::path &path)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept