Barretenberg: src/barretenberg/ecc/scalar_multiplication/pippenger_fallbacks.hpp Source File

#pragma once


// Implementation fragment included from scalar_multiplication_fast.cpp inside

// bb::scalar_multiplication.


// Trivial-N fallback. For small n the Pippenger scaffolding (digit extraction, bucket

// scratch allocation, parallel_for dispatch, GLV split, etc.) costs many times more

// than running a Straus-style simultaneous double-and-add in Jacobian. Delegates to

// `Element::straus_msm`, which on endomorphism curves builds a per-point WNAF lookup

// table and amortises ~128 doublings across all N inputs (vs N×128 for naive

// per-point operator*). Robust to all edge cases (zero scalars, points at infinity)

// so this also covers `handle_edge_cases=true` for trivially small N. The single

// Jacobian→affine inversion at the caller boundary (when `MSM_fast<>::msm` constructs an

// `AffineElement` from the returned `Element`) is the only inversion paid.

template <typename Curve>


typename Curve::Element trivial_msm(PolynomialSpan<const typename Curve::ScalarField> scalars_span,

                                    std::span<const typename Curve::AffineElement> all_points) noexcept

{

    using Element = typename Curve::Element;

    using AffineElement = typename Curve::AffineElement;

    using ScalarField = typename Curve::ScalarField;


    const size_t n = scalars_span.size();

    if (n == 0) {

        return Curve::Group::point_at_infinity;

    }

    BB_ASSERT_GTE(all_points.size(), scalars_span.start_index + n);

    std::span<const AffineElement> points_view(&all_points[scalars_span.start_index], n);

    std::span<const ScalarField> scalars_view(scalars_span.span.data(), n);

    return Element::straus_msm(points_view, scalars_view);

}


template <typename Curve>


typename Curve::Element trivial_msm_threaded(PolynomialSpan<const typename Curve::ScalarField> scalars_span,

                                             std::span<const typename Curve::AffineElement> all_points) noexcept

{

    using Element = typename Curve::Element;

    using AffineElement = typename Curve::AffineElement;

    using ScalarField = typename Curve::ScalarField;

    const size_t n = scalars_span.size();

    if (n == 0) {

        return Curve::Group::point_at_infinity;

    }

    BB_ASSERT_GTE(all_points.size(), scalars_span.start_index + n);


    // Strip zero-scalar entries before dispatching to straus_msm. straus_msm has

    // non-trivial per-scalar fixed cost (per-window bias decode + bucket scatter), and

    // when this function fires from the n_active-based fallback in

    // pippenger_round_parallel the input span often contains many zeros (the

    // dispatch fired precisely because n_active << n). Compacting once up front saves

    // straus_msm one pass over the dead entries on every worker slice.

    std::vector<ScalarField> compact_scalars;

    std::vector<AffineElement> compact_points;

    compact_scalars.reserve(n);

    compact_points.reserve(n);

    const ScalarField* src_scalars = scalars_span.span.data();

    const AffineElement* src_points = all_points.data() + scalars_span.start_index;

    for (size_t i = 0; i < n; ++i) {

        if (!src_scalars[i].is_zero()) {

            compact_scalars.push_back(src_scalars[i]);

            compact_points.push_back(src_points[i]);

        }

    }

    const size_t n_active = compact_scalars.size();

    if (n_active == 0) {

        return Curve::Group::point_at_infinity;

    }


    // Cap at `bb::get_num_cpus()` rather than `bb::get_num_cpus()`:

    //   1. Want one task per OS worker, not lmul-oversubscribed — straus_msm slices

    //      have non-trivial fixed cost so dynamic-claim averaging isn't worth the

    //      extra dispatch tax at the trivial-MSM_fast sizes this function handles.

    //   2. `bb::get_num_cpus() <= 1` is the chonk-batch-verifier serial gate; the

    //      `<= 1` early-return below preserves that contract regardless of pool.

    const size_t max_threads = bb::get_num_cpus();

    const size_t num_threads = std::min(n_active, max_threads);

    if (num_threads <= 1) {

        std::span<const AffineElement> pts(compact_points.data(), n_active);

        std::span<const ScalarField> scs(compact_scalars.data(), n_active);

        return Element::straus_msm(pts, scs);

    }


    // Each worker runs `Element::straus_msm` over its slice. Note that straus_msm

    // accepts Montgomery-form scalars (it converts internally), so callers must pass

    // Montgomery-form scalars on entry to this function.

    std::vector<Element> partials(num_threads, Curve::Group::point_at_infinity);

    bb::parallel_for(num_threads, [&](size_t tid) {

        const size_t lo = (tid * n_active) / num_threads;

        const size_t hi = ((tid + 1) * n_active) / num_threads;

        const size_t slice_n = hi - lo;

        if (slice_n == 0) {

            return;

        }

        std::span<const AffineElement> pts(compact_points.data() + lo, slice_n);

        std::span<const ScalarField> scs(compact_scalars.data() + lo, slice_n);

        partials[tid] = Element::straus_msm(pts, scs);

    });

    Element total_result = partials[0];

    for (size_t t = 1; t < num_threads; ++t) {

        total_result += partials[t];

    }

    return total_result;

}


BB_ASSERT_GTE
#define BB_ASSERT_GTE(left, right,...)
Definition assert.hpp:128

bb::curve::BN254::Element
typename Group::element Element
Definition bn254.hpp:21

bb::curve::BN254::AffineElement
typename Group::affine_element AffineElement
Definition bn254.hpp:22

bb::curve::BN254::ScalarField
bb::fr ScalarField
Definition bn254.hpp:18

bb::get_num_cpus
size_t get_num_cpus()
Definition thread.cpp:33

bb::parallel_for
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
Definition thread.cpp:111

std::get
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13

trivial_msm_threaded
Curve::Element trivial_msm_threaded(PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
Multi-threaded straus_msm driver for very-small MSMs.
Definition pippenger_fallbacks.hpp:42

trivial_msm
Curve::Element trivial_msm(PolynomialSpan< const typename Curve::ScalarField > scalars_span, std::span< const typename Curve::AffineElement > all_points) noexcept
Definition pippenger_fallbacks.hpp:16

Element
Curve::Element Element
Definition small_msm_matrix.bench.cpp:40