Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
pippenger_fallbacks.hpp
Go to the documentation of this file.
1#pragma once
2
3// Implementation fragment included from scalar_multiplication_fast.cpp inside
4// bb::scalar_multiplication.
5
6// Trivial-N fallback. For small n the Pippenger scaffolding (digit extraction, bucket
7// scratch allocation, parallel_for dispatch, GLV split, etc.) costs many times more
8// than running a Straus-style simultaneous double-and-add in Jacobian. Delegates to
9// `Element::straus_msm`, which on endomorphism curves builds a per-point WNAF lookup
10// table and amortises ~128 doublings across all N inputs (vs N×128 for naive
11// per-point operator*). Robust to all edge cases (zero scalars, points at infinity)
12// so this also covers `handle_edge_cases=true` for trivially small N. The single
13// Jacobian→affine inversion at the caller boundary (when `MSM_fast<>::msm` constructs an
14// `AffineElement` from the returned `Element`) is the only inversion paid.
15template <typename Curve>
16typename Curve::Element trivial_msm(PolynomialSpan<const typename Curve::ScalarField> scalars_span,
18{
19 using Element = typename Curve::Element;
20 using AffineElement = typename Curve::AffineElement;
21 using ScalarField = typename Curve::ScalarField;
22
23 const size_t n = scalars_span.size();
24 if (n == 0) {
25 return Curve::Group::point_at_infinity;
26 }
27 BB_ASSERT_GTE(all_points.size(), scalars_span.start_index + n);
28 std::span<const AffineElement> points_view(&all_points[scalars_span.start_index], n);
29 std::span<const ScalarField> scalars_view(scalars_span.span.data(), n);
30 return Element::straus_msm(points_view, scalars_view);
31}
32
41template <typename Curve>
42typename Curve::Element trivial_msm_threaded(PolynomialSpan<const typename Curve::ScalarField> scalars_span,
44{
45 using Element = typename Curve::Element;
46 using AffineElement = typename Curve::AffineElement;
47 using ScalarField = typename Curve::ScalarField;
48 const size_t n = scalars_span.size();
49 if (n == 0) {
50 return Curve::Group::point_at_infinity;
51 }
52 BB_ASSERT_GTE(all_points.size(), scalars_span.start_index + n);
53
54 // Strip zero-scalar entries before dispatching to straus_msm. straus_msm has
55 // non-trivial per-scalar fixed cost (per-window bias decode + bucket scatter), and
56 // when this function fires from the n_active-based fallback in
57 // pippenger_round_parallel the input span often contains many zeros (the
58 // dispatch fired precisely because n_active << n). Compacting once up front saves
59 // straus_msm one pass over the dead entries on every worker slice.
60 std::vector<ScalarField> compact_scalars;
61 std::vector<AffineElement> compact_points;
62 compact_scalars.reserve(n);
63 compact_points.reserve(n);
64 const ScalarField* src_scalars = scalars_span.span.data();
65 const AffineElement* src_points = all_points.data() + scalars_span.start_index;
66 for (size_t i = 0; i < n; ++i) {
67 if (!src_scalars[i].is_zero()) {
68 compact_scalars.push_back(src_scalars[i]);
69 compact_points.push_back(src_points[i]);
70 }
71 }
72 const size_t n_active = compact_scalars.size();
73 if (n_active == 0) {
74 return Curve::Group::point_at_infinity;
75 }
76
77 // Cap at `bb::get_num_cpus()` rather than `bb::get_num_cpus()`:
78 // 1. Want one task per OS worker, not lmul-oversubscribed — straus_msm slices
79 // have non-trivial fixed cost so dynamic-claim averaging isn't worth the
80 // extra dispatch tax at the trivial-MSM_fast sizes this function handles.
81 // 2. `bb::get_num_cpus() <= 1` is the chonk-batch-verifier serial gate; the
82 // `<= 1` early-return below preserves that contract regardless of pool.
83 const size_t max_threads = bb::get_num_cpus();
84 const size_t num_threads = std::min(n_active, max_threads);
85 if (num_threads <= 1) {
86 std::span<const AffineElement> pts(compact_points.data(), n_active);
87 std::span<const ScalarField> scs(compact_scalars.data(), n_active);
88 return Element::straus_msm(pts, scs);
89 }
90
91 // Each worker runs `Element::straus_msm` over its slice. Note that straus_msm
92 // accepts Montgomery-form scalars (it converts internally), so callers must pass
93 // Montgomery-form scalars on entry to this function.
94 std::vector<Element> partials(num_threads, Curve::Group::point_at_infinity);
95 bb::parallel_for(num_threads, [&](size_t tid) {
96 const size_t lo = (tid * n_active) / num_threads;
97 const size_t hi = ((tid + 1) * n_active) / num_threads;
98 const size_t slice_n = hi - lo;
99 if (slice_n == 0) {
100 return;
101 }
102 std::span<const AffineElement> pts(compact_points.data() + lo, slice_n);
103 std::span<const ScalarField> scs(compact_scalars.data() + lo, slice_n);
104 partials[tid] = Element::straus_msm(pts, scs);
105 });
106 Element total_result = partials[0];
107 for (size_t t = 1; t < num_threads; ++t) {
108 total_result += partials[t];
109 }
110 return total_result;
111}
#define BB_ASSERT_GTE(left, right,...)
Definition assert.hpp:128
typename Group::element Element
Definition bn254.hpp:21
typename Group::affine_element AffineElement
Definition bn254.hpp:22
bb::fr ScalarField
Definition bn254.hpp:18
size_t get_num_cpus()
Definition thread.cpp:33
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
Definition thread.cpp:111
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
Curve::Element trivial_msm_threaded(PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
Multi-threaded straus_msm driver for very-small MSMs.
Curve::Element trivial_msm(PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
Curve::Element Element