24 std::span<typename Curve::AffineElement>
doubled;
37template <
typename Curve>
49 const size_t K = scalar_arrays.size();
51 out_results.assign(K, Curve::Group::point_at_infinity);
53 auto hint_for = [&](
size_t m)
noexcept ->
bool {
return m < dedup_hints.size() && dedup_hints[m] != 0; };
59 const size_t n = std::min(scalar_arrays[0].size(), point_arrays[0].size());
63 PolynomialSpan<const ScalarField> sp(0, std::span<const ScalarField>(scalar_arrays[0].
data(), n));
64 out_results[0] = pippenger_round_parallel<Curve>(sp, point_arrays[0], hint_for(0));
68 std::vector<size_t> n_input(K);
69 for (
size_t m = 0; m < K; ++m) {
70 n_input[m] = std::min(scalar_arrays[m].size(), point_arrays[m].size());
79 auto find_or_create_group = [&](
const AffineElement* base_ptr,
size_t n) ->
size_t {
80 for (
size_t g = 0;
g < glv_groups.size(); ++
g) {
81 if (glv_groups[g].base_ptr == base_ptr) {
87 g.base_ptr = base_ptr;
90 return glv_groups.size() - 1;
93 std::vector<size_t> msm_to_group(K, std::numeric_limits<size_t>::max());
94 for (
size_t m = 0; m < K; ++m) {
95 if (n_input[m] == 0) {
98 const size_t g = find_or_create_group(point_arrays[m].
data(), n_input[m]);
99 glv_groups[
g].member_msms.push_back(m);
103 std::vector<bool> group_uses_glv(glv_groups.size(),
false);
104 for (
size_t g = 0;
g < glv_groups.size(); ++
g) {
109 group_uses_glv[
g] = glv_groups[
g].group_max_n <= round_parallel_detail::GLV_SMALL_N_THRESHOLD;
123 BB_BENCH_NAME(
"MSM_fast::pippenger_round_parallel_batched/glv_double_points");
125 const AffineElement* min_base =
nullptr;
126 for (
size_t g = 0;
g < glv_groups.size(); ++
g) {
127 glv_groups[
g].doubled = {};
128 if (!group_uses_glv[g]) {
132 min_base = glv_groups[
g].base_ptr;
136 if (min_base !=
nullptr) {
137 const auto min_addr =
reinterpret_cast<uintptr_t
>(min_base);
138 size_t max_extent_units = 0;
139 for (
size_t g = 0;
g < glv_groups.size(); ++
g) {
140 if (!group_uses_glv[g]) {
143 const auto base_addr =
reinterpret_cast<uintptr_t
>(glv_groups[
g].base_ptr);
144 const uintptr_t offset_bytes =
base_addr - min_addr;
147 "GLV group base_ptr not aligned to AffineElement boundary "
148 "(point spans must be subranges of a contiguous AffineElement array)");
149 const size_t offset_units = offset_bytes /
sizeof(AffineElement);
150 const size_t end_units = offset_units + glv_groups[
g].group_max_n;
151 max_extent_units =
std::max(max_extent_units, end_units);
155 2 * max_extent_units);
156 AffineElement*
const master_buf = master_doubled_owner.get();
157 const BaseField beta = BaseField::cube_root_of_unity();
159 for (
size_t i : chunk.range(max_extent_units)) {
160 master_buf[2 * i] = min_base[i];
161 master_buf[(2 * i) + 1].x = min_base[i].x * beta;
162 master_buf[(2 * i) + 1].y = -min_base[i].y;
166 for (
size_t g = 0;
g < glv_groups.size(); ++
g) {
167 if (!group_uses_glv[g]) {
170 const auto base_addr =
reinterpret_cast<uintptr_t
>(glv_groups[
g].base_ptr);
171 const size_t offset_units = (
base_addr - min_addr) /
sizeof(AffineElement);
172 glv_groups[
g].doubled =
183 size_t shared_arena_bytes = 0;
184 for (
size_t m = 0; m < K; ++m) {
185 if (n_input[m] == 0) {
188 const size_t g = msm_to_group[m];
190 g != std::numeric_limits<size_t>::max() && group_uses_glv[
g] && !glv_groups[
g].doubled.empty();
194 const bool dedup_active_m = hint_for(m);
195 const size_t bytes = compute_arena_bytes_for_msm<Curve>(n_input[m], ext_glv, dedup_active_m);
196 shared_arena_bytes =
std::max(shared_arena_bytes, bytes);
200 if (shared_arena_bytes > 0) {
209 for (
size_t m = 0; m < K; ++m) {
210 const size_t n = n_input[m];
214 PolynomialSpan<const ScalarField> sp(0, std::span<const ScalarField>(scalar_arrays[m].
data(), n));
216 const size_t g = msm_to_group[m];
217 std::span<const AffineElement> external_glv;
218 if (g != std::numeric_limits<size_t>::max() && group_uses_glv[g]) {
222 external_glv = std::span<const AffineElement>(glv_groups[g].doubled.data(), 2 * n);
225 out_results[m] = pippenger_round_parallel<Curve>(sp, point_arrays[m], hint_for(m), external_glv, shared_arena);
230template <
typename Curve>
234 bool handle_edge_cases,
238 const size_t k = scalars.size();
247 point_subspans.reserve(k);
248 scalar_subspans.reserve(k);
249 for (
size_t i = 0; i < k; ++i) {
250 const size_t start_i = scalars[i].start_index;
251 BB_ASSERT_LTE(start_i, points.size(),
"scalars[m].start_index exceeds shared points span");
252 point_subspans.push_back(points.subspan(start_i, points.size() - start_i));
253 scalar_subspans.push_back(scalars[i].span);
256 auto hint_for = [&](
size_t m)
noexcept ->
bool {
return m < dedup_hints.size() && dedup_hints[m] != 0; };
258 if (handle_edge_cases) {
259 std::vector<AffineElement> results(k);
260 for (
size_t i = 0; i < k; ++i) {
261 const size_t n = std::min(point_subspans[i].size(), scalar_subspans[i].size());
263 std::span<const ScalarField>(scalar_subspans[i].
data(), n));
265 AffineElement(pippenger_fast<Curve>(scalar_span, point_subspans[i], handle_edge_cases, hint_for(i)));
271 pippenger_round_parallel_batched<Curve>(scalar_subspans, point_subspans, per_msm_jac, dedup_hints);
273 std::vector<AffineElement> results(k);
274 for (
size_t i = 0; i < k; ++i) {
#define BB_ASSERT_EQ(actual, expected,...)
#define BB_ASSERT_LTE(left, right,...)
#define BB_BENCH_NAME(name)
typename Group::affine_element AffineElement
typename Curve::AffineElement AffineElement
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
constexpr void g(state_array &state, size_t a, size_t b, size_t c, size_t d, uint32_t x, uint32_t y)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
std::vector< size_t > member_msms
const Curve::AffineElement * base_ptr
std::span< typename Curve::AffineElement > doubled