14#include <benchmark/benchmark.h>
22using namespace benchmark;
30class PippengerBench :
public benchmark::Fixture {
32 static constexpr size_t MAX_POINTS = 1 << 22;
34 std::vector<Fr> scalars;
37 void SetUp([[maybe_unused]] const ::benchmark::State& state)
override
40 srs = bb::srs::get_crs_factory<Curve>()->get_crs(MAX_POINTS);
42 scalars.resize(MAX_POINTS);
43 for (
auto& x : scalars) {
51BENCHMARK_DEFINE_F(PippengerBench, PippengerUnsafe)(benchmark::State& state)
53 const size_t num_points =
static_cast<size_t>(state.range(0));
58 for (
auto _ : state) {
60 bb::scalar_multiplication::pippenger_unsafe<Curve>(poly_scalars, points);
66BENCHMARK_DEFINE_F(PippengerBench, PippengerRoundParallel)(benchmark::State& state)
68 const size_t num_threads =
static_cast<size_t>(state.range(0));
69 const size_t num_points =
static_cast<size_t>(state.range(1));
77 for (
auto _ : state) {
79 bb::scalar_multiplication::pippenger_round_parallel<Curve>(poly_scalars, points);
85BENCHMARK_DEFINE_F(PippengerBench, PippengerUnsafeThreads)(benchmark::State& state)
87 const size_t num_threads =
static_cast<size_t>(state.range(0));
88 const size_t num_points =
static_cast<size_t>(state.range(1));
96 for (
auto _ : state) {
98 bb::scalar_multiplication::pippenger_unsafe<Curve>(poly_scalars, points);
106BENCHMARK_DEFINE_F(PippengerBench, BatchMSM)(benchmark::State& state)
108 const size_t num_polys =
static_cast<size_t>(state.range(0));
109 const size_t poly_size =
static_cast<size_t>(state.range(1));
115 for (
size_t i = 0; i < num_polys; ++i) {
116 all_scalars[i].resize(poly_size);
117 for (
auto& s : all_scalars[i]) {
123 for (
auto _ : state) {
151struct BatchScenario {
162 for (
size_t m = 0; m < k; ++m) {
164 for (
size_t i = 0; i < n; ++i) {
165 const bool zero = zero_density > 0.0 && (
static_cast<double>(
engine.get_random_uint32() & 0xFFFFFU) /
166 static_cast<double>(0x100000U)) < zero_density;
167 out[m][i] = zero ?
Fr::zero() : scalar_pool[(m * n + i) % scalar_pool.size()];
174BENCHMARK_DEFINE_F(PippengerBench, BatchedChonk)(benchmark::State& state)
176 const size_t scenario_idx =
static_cast<size_t>(state.range(0));
196 {
"Translator_K10_2_17", 10, 1U << 17, 0.0 },
197 {
"MegaOink_K17_2_17", 17, 1U << 17, 0.0 },
198 {
"DatabusOnly_K10_2_14_mostly0", 10, 1U << 14, 0.75 },
208 {
"ECCVM_dense_K60_2_15", 60, 1U << 15, 0.0 },
209 {
"ECCVM_transcript_K25_2_15", 25, 1U << 15, 0.875 },
211 const auto& sc = scenarios[scenario_idx];
212 state.SetLabel(sc.name);
214 auto all_scalars = build_batch_scalars(sc.k, sc.n, sc.zero_density,
engine, scalars);
217 for (
size_t m = 0; m < sc.k; ++m) {
221 for (
auto _ : state) {
227BENCHMARK_DEFINE_F(PippengerBench, PerMsmChonk)(benchmark::State& state)
229 const size_t scenario_idx =
static_cast<size_t>(state.range(0));
249 {
"Translator_K10_2_17", 10, 1U << 17, 0.0 },
250 {
"MegaOink_K17_2_17", 17, 1U << 17, 0.0 },
251 {
"DatabusOnly_K10_2_14_mostly0", 10, 1U << 14, 0.75 },
261 {
"ECCVM_dense_K60_2_15", 60, 1U << 15, 0.0 },
262 {
"ECCVM_transcript_K25_2_15", 25, 1U << 15, 0.875 },
264 const auto& sc = scenarios[scenario_idx];
265 state.SetLabel(sc.name);
267 auto all_scalars = build_batch_scalars(sc.k, sc.n, sc.zero_density,
engine, scalars);
270 for (
auto _ : state) {
272 for (
size_t m = 0; m < sc.k; ++m) {
274 (void)bb::scalar_multiplication::pippenger_round_parallel<Curve>(sp, points);
289BENCHMARK_DEFINE_F(PippengerBench, BatchMSM_1656)(benchmark::State& state)
291 const size_t num_threads =
static_cast<size_t>(state.range(0));
292 const size_t msm_size =
static_cast<size_t>(state.range(1));
294 std::vector<Fr> msm_scalars(msm_size);
295 for (
auto& s : msm_scalars) {
307 for (
auto _ : state) {
333enum class SparsityProfile : uint8_t { Dense80 = 0, DupHeavy = 1 };
337 return static_cast<double>(
engine.get_random_uint32()) /
static_cast<double>(std::numeric_limits<uint32_t>::max());
342 std::vector<Fr> out(n);
343 if (profile == SparsityProfile::Dense80) {
344 for (
size_t i = 0; i < n; ++i) {
350 for (
size_t i = 0; i < n; ++i) {
351 const double r = uniform01(
engine);
354 }
else if (r < 0.45) {
356 }
else if (r < 0.50) {
367BENCHMARK_DEFINE_F(PippengerBench, PippengerSparsity)(benchmark::State& state)
369 const auto profile =
static_cast<SparsityProfile
>(state.range(0));
370 const size_t num_points =
static_cast<size_t>(state.range(1));
371 const bool dedup_hint = (profile == SparsityProfile::DupHeavy);
372 state.SetLabel(profile == SparsityProfile::Dense80 ?
"Dense80" :
"DupHeavy");
386 std::vector<Fr> msm_scalars = build_sparsity_scalars(profile, num_points, case_engine);
390 for (
auto _ : state) {
392 (void)bb::scalar_multiplication::pippenger_round_parallel<Curve>(poly_scalars, points, dedup_hint);
399BENCHMARK_REGISTER_F(PippengerBench, PippengerUnsafe)
400 ->Unit(benchmark::kMillisecond)
402 ->Range(1 << 14, 1 << 20);
405BENCHMARK_REGISTER_F(PippengerBench, PippengerSparsity)
406 ->Unit(benchmark::kMillisecond)
407 ->ArgsProduct({ { 0, 1 }, { 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19 } });
411BENCHMARK_REGISTER_F(PippengerBench, BatchMSM)
412 ->Unit(benchmark::kMillisecond)
413 ->Args({ 32, 1 << 19 })
414 ->Args({ 32, 1 << 21 });
417BENCHMARK_REGISTER_F(PippengerBench, BatchMSM_1656)
418 ->Unit(benchmark::kMillisecond)
419 ->Args({ 256, 1 << 16 })
420 ->Args({ 256, 1 << 20 });
424BENCHMARK_REGISTER_F(PippengerBench, BatchedChonk)->Unit(benchmark::kMillisecond)->DenseRange(0, 4, 1);
425BENCHMARK_REGISTER_F(PippengerBench, PerMsmChonk)->Unit(benchmark::kMillisecond)->DenseRange(0, 4, 1);
429BENCHMARK_REGISTER_F(PippengerBench, PippengerRoundParallel)
430 ->Unit(benchmark::kMillisecond)
431 ->ArgsProduct({ { 1, 4, 8, 12, 16, 32, 64, 128 },
451BENCHMARK_REGISTER_F(PippengerBench, PippengerUnsafeThreads)
452 ->Unit(benchmark::kMillisecond)
453 ->ArgsProduct({ { 1, 4, 8, 12, 16, 32, 64, 128 },
typename Group::affine_element AffineElement
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< const AffineElement > points, std::span< PolynomialSpan< ScalarField > > scalars, bool handle_edge_cases=true, std::span< const uint8_t > dedup_hints={}) noexcept
#define GOOGLE_BB_BENCH_REPORTER(state)
RNG & get_debug_randomness(bool reset, std::uint_fast64_t seed)
std::filesystem::path bb_crs_path()
void init_file_crs_factory(const std::filesystem::path &path)
void set_parallel_for_concurrency(size_t num_cores)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
static constexpr field zero()