14#include <gtest/gtest.h>
26bool pippenger_bn254_arena_layout_fits_for_test(
size_t n_input,
27 bool external_glv_provided =
false,
28 bool dedup_active =
false,
29 size_t effective_num_bits_for_test = 0) noexcept
35 namespace rpd = scalar_multiplication::round_parallel_detail;
37 constexpr size_t FULL_NUM_BITS = ScalarField::modulus.get_msb() + 1;
42 const bool use_glv = external_glv_provided || (n_input <= rpd::GLV_SMALL_N_THRESHOLD);
43 const bool inline_glv_double = use_glv && !external_glv_provided;
44 const size_t n = use_glv ? 2 * n_input : n_input;
45 const size_t NUM_BITS = use_glv ?
size_t{ 128 } : FULL_NUM_BITS;
46 const size_t arena_capacity =
47 scalar_multiplication::compute_arena_bytes_for_msm<Curve>(n_input, external_glv_provided, dedup_active);
48 if (arena_capacity == 0) {
52 const size_t actual_num_bits = (effective_num_bits_for_test == 0 || effective_num_bits_for_test > NUM_BITS)
54 : effective_num_bits_for_test;
55 const size_t num_logical_threads_for_c =
57 const size_t window_bits = rpd::choose_window_bits(n, actual_num_bits, n_input, num_logical_threads_for_c);
58 const auto sched = rpd::build_var_window_schedule(actual_num_bits, window_bits);
59 const size_t num_buckets = (
size_t{ 1 } << (window_bits - 1)) + 1;
61 using rpd::BATCH_CAPACITY;
67 const size_t max_threads_for_min_batch =
std::max<size_t>(1, n / MIN_BATCH_CAPACITY);
68 const size_t num_threads = std::min(desired_threads, max_threads_for_min_batch);
70 const size_t worker_total = num_threads;
72 size_t B_eff = num_buckets;
73 for (
size_t w = 0; w < sched.num_windows; ++w) {
74 B_eff =
std::max(B_eff,
static_cast<size_t>(sched.num_buckets[w]));
76 const size_t dense_stride_est =
78 const size_t bucket_partials_per_window_max = (B_eff > 0) ? (B_eff - 1 + num_threads - 1) : 0;
79 const size_t hist_h_bytes_pw_shared = (
size_t{ 4 } * num_threads * B_eff);
80 const size_t hist_o_bytes_pw_shared =
81 (
sizeof(rpd::ChunkOutput<Curve>) * num_threads) + (
size_t{ 96 } * num_threads);
82 const size_t hist_slot_bytes_pw_shared =
std::max(hist_h_bytes_pw_shared, hist_o_bytes_pw_shared);
83 const size_t dense_slot_bytes_pw_shared = (
size_t{ 65 } * bucket_partials_per_window_max);
84 const size_t per_window_bytes_shared =
85 hist_slot_bytes_pw_shared + dense_slot_bytes_pw_shared + (
size_t{ 8 } * (B_eff + 1)) +
86 (
size_t{ 8 } * (num_threads + 1)) + (
size_t{ 8 } * (num_threads + 1)) + (
size_t{ 8 } * num_threads) +
87 (
size_t{ 8 } * num_threads) + (
size_t{ 8 } * num_threads) + (
size_t{ 16 } * worker_total) +
88 (
size_t{ 8 } * num_threads) + (
size_t{ 87 } * worker_total * dense_stride_est);
89 const size_t capacity_lo = n;
90 const size_t per_window_bytes_lo = (
size_t{ 4 } * capacity_lo) + per_window_bytes_shared;
92 const size_t global_max_chunk_len = (n + num_threads - 1) / num_threads;
93 const size_t global_max_overflow_per_window =
95 const size_t chunk_capacity =
std::max(SUBCHUNK_ENTRIES_CAP, 2 * global_max_overflow_per_window);
97 const size_t phase_a_cluster_members_cap = std::min(rpd::DEDUP_MAX_MEMBERS, n);
98 const size_t phase_a_cluster_offsets_cap = (rpd::DEDUP_MAX_CLUSTERS / num_threads) + 2;
100 const size_t phase_one_prologue_bytes = n + (use_glv ?
size_t{ 32 } * n :
size_t{ 0 }) +
101 (inline_glv_double ?
size_t{ 64 } * n :
size_t{ 0 }) +
102 (profile_threads *
size_t{ 1024 });
104 const rpd::PerWorkerArenaLayout<Curve> budget_layout(
105 SUBCHUNK_ENTRIES_CAP,
106 global_max_overflow_per_window,
108 phase_a_cluster_members_cap,
109 phase_a_cluster_offsets_cap,
112 const size_t worker_union_bytes_for_budget = budget_layout.per_worker_union_bytes;
113 const size_t fixed_overhead = (worker_union_bytes_for_budget * worker_total) +
114 (
size_t{ 96 } * rpd::VAR_WINDOW_MAX_WINDOWS) + (
size_t{ 8 } * (num_threads + 1)) +
115 phase_one_prologue_bytes;
116 const size_t available_budget =
117 (
BATCH_MEM_BUDGET > fixed_overhead) ? (BATCH_MEM_BUDGET - fixed_overhead) :
size_t{ 0 };
118 const size_t windows_per_batch = (per_window_bytes_lo == 0 || available_budget == 0)
121 static_cast<size_t>(sched.num_windows));
123 auto align_up = [](
size_t off,
size_t align) ->
size_t {
return (off + align - 1) & ~(align - 1); };
124 auto layout_add = [&](
size_t& off,
size_t bytes,
size_t align) { off = align_up(off, align) + bytes; };
125 auto bump_fits = [&](
size_t count,
131 size_t base_misalign) {
132 const size_t cur_addr_mod = (base_misalign + base_offset +
cursor) & (align - 1);
133 const size_t align_delta = (cur_addr_mod == 0) ?
size_t{ 0 } : (align - cur_addr_mod);
134 const size_t aligned_local =
cursor + align_delta;
135 const size_t bytes = count * size;
136 if (aligned_local + bytes > bound) {
139 cursor = aligned_local + bytes;
143 for (
size_t base_misalign = 0; base_misalign <
alignof(AffineElement); ++base_misalign) {
144 size_t arena_cursor = 0;
145 if (!bump_fits(n,
sizeof(uint8_t),
alignof(uint8_t), arena_cursor, arena_capacity, 0, base_misalign)) {
148 if (!bump_fits(profile_threads,
159 n,
sizeof(ScalarField),
alignof(ScalarField), arena_cursor, arena_capacity, 0, base_misalign)) {
162 if (inline_glv_double &&
164 n,
sizeof(AffineElement),
alignof(AffineElement), arena_cursor, arena_capacity, 0, base_misalign)) {
168 const size_t bytes_P_prefix = arena_cursor;
170 const rpd::PerWorkerArenaLayout<Curve> worker_layout(chunk_capacity,
171 global_max_overflow_per_window,
173 phase_a_cluster_members_cap,
174 phase_a_cluster_offsets_cap,
177 constexpr size_t WORKER_SLAB_ALIGN = rpd::PerWorkerArenaLayout<Curve>::WORKER_SLAB_ALIGN;
178 const size_t per_worker_bytes = worker_layout.per_worker_bytes;
180 size_t bytes_P_extra_layout = 0;
181 layout_add(bytes_P_extra_layout,
sizeof(
Element) * rpd::VAR_WINDOW_MAX_WINDOWS,
alignof(
Element));
183 layout_add(bytes_P_extra_layout,
sizeof(uint32_t) * n,
alignof(uint32_t));
184 layout_add(bytes_P_extra_layout,
sizeof(AffineElement) * rpd::DEDUP_MAX_CLUSTERS,
alignof(AffineElement));
186 const size_t bytes_P_min = align_up(bytes_P_prefix,
alignof(
Element)) + bytes_P_extra_layout;
187 const size_t bytes_P = align_up(bytes_P_min + base_misalign, WORKER_SLAB_ALIGN) - base_misalign;
188 const size_t bytes_W = per_worker_bytes * worker_total;
189 if (bytes_P + bytes_W > arena_capacity) {
192 const size_t bytes_S_total = arena_capacity - bytes_P - bytes_W;
193 size_t zone_S_cursor = 0;
194 const size_t zone_S_base = bytes_P + bytes_W;
196 const size_t schedule_total = windows_per_batch * capacity_lo;
197 if (!bump_fits(schedule_total,
206 const size_t hist_h_bytes_total =
size_t{ 4 } * windows_per_batch * num_threads * B_eff;
207 size_t o_layout_cur = 0;
208 o_layout_cur = align_up(o_layout_cur,
alignof(rpd::ChunkOutput<Curve>));
209 o_layout_cur +=
sizeof(rpd::ChunkOutput<Curve>) * windows_per_batch * num_threads;
210 o_layout_cur = align_up(o_layout_cur,
alignof(
Element));
211 o_layout_cur +=
sizeof(
Element) * num_threads * windows_per_batch;
212 const size_t hist_slot_cells =
213 (
std::max(hist_h_bytes_total, o_layout_cur) +
sizeof(AffineElement) - 1) /
sizeof(AffineElement);
214 const size_t dense_slot_cells =
215 ((
size_t{ 65 } * windows_per_batch * bucket_partials_per_window_max) +
sizeof(AffineElement) - 1) /
216 sizeof(AffineElement);
217 if (!bump_fits(hist_slot_cells,
218 sizeof(AffineElement),
219 alignof(AffineElement),
224 !bump_fits(dense_slot_cells,
225 sizeof(AffineElement),
226 alignof(AffineElement),
231 !bump_fits(windows_per_batch * (B_eff + 1),
238 !bump_fits(windows_per_batch * (num_threads + 1),
245 !bump_fits(windows_per_batch * (num_threads + 1),
252 !bump_fits(windows_per_batch * num_threads,
259 !bump_fits((num_threads * windows_per_batch) + 1,
266 !bump_fits(num_threads + 1,
273 !bump_fits(windows_per_batch * num_threads,
280 !bump_fits(windows_per_batch * num_threads,
313 "test_batch_multi_scalar_mul can exceed num_points; "
314 "raise num_points or lower kMaxBatchMSMs / kMaxBatchPointsPerMSM");
317 static inline std::vector<ScalarField>
scalars{};
321 size_t total_points = input_scalars.size();
323 std::vector<Element> expected_accs(num_threads);
324 size_t range_per_thread = (total_points + num_threads - 1) / num_threads;
327 expected_thread_acc.self_set_infinity();
328 size_t start = thread_idx * range_per_thread;
329 size_t end = ((thread_idx + 1) * range_per_thread > total_points) ? total_points
330 : (thread_idx + 1) * range_per_thread;
331 bool skip = start >= total_points;
333 for (
size_t i = start; i < end; ++i) {
334 expected_thread_acc += input_points[i] * input_scalars[i];
337 expected_accs[thread_idx] = expected_thread_acc;
341 expected_acc.self_set_infinity();
342 for (
auto& acc : expected_accs) {
350 std::vector<AffineElement> points(num_pts);
351 for (
size_t i = 0; i < num_pts; ++i) {
362 for (
size_t i = start; i < end; ++i) {
380 EXPECT_EQ(result, expected);
388 std::vector<AffineElement> expected(num_msms);
391 std::vector<size_t> start_indices(num_msms);
394 size_t vector_offset = 0;
395 for (
size_t k = 0; k < num_msms; ++k) {
398 ASSERT_LT(vector_offset + num_pts,
num_points);
400 batch_scalars_copies[k].resize(num_pts);
401 for (
size_t i = 0; i < num_pts; ++i) {
402 batch_scalars_copies[k][i] =
scalars[vector_offset + i];
405 start_indices[k] = vector_offset;
407 vector_offset += num_pts;
409 std::span<const AffineElement> batch_points(&
generators[start_indices[k]], num_pts);
410 expected[k] =
naive_msm(batch_scalars_copies[k], batch_points);
413 std::vector<AffineElement> result =
416 EXPECT_EQ(result, expected);
421 const size_t num_msms = 10;
422 std::vector<AffineElement> expected(num_msms);
427 for (
size_t k = 0; k < num_msms; ++k) {
428 const size_t num_pts = 33;
429 auto& test_scalars = batch_scalars[k];
431 test_scalars.resize(num_pts);
433 size_t fixture_offset = k * num_pts;
435 std::span<const AffineElement> batch_points(&
generators[fixture_offset], num_pts);
436 for (
size_t i = 0; i < 13; ++i) {
439 for (
size_t i = 13; i < 23; ++i) {
440 test_scalars[i] =
scalars[fixture_offset + i + 13];
442 for (
size_t i = 23; i < num_pts; ++i) {
447 expected[k] =
naive_msm(batch_scalars[k], batch_points);
450 std::vector<AffineElement> result =
453 EXPECT_EQ(result, expected);
461 constexpr size_t num_msms = 4;
462 constexpr size_t per_msm_n = 1 << 13;
464 std::vector<AffineElement> expected(num_msms);
468 for (
size_t k = 0; k < num_msms; ++k) {
469 batch_scalars[k].resize(per_msm_n);
470 for (
size_t i = 0; i < per_msm_n; ++i) {
476 std::span<const AffineElement> pts(&
generators[0], per_msm_n);
478 expected[k] =
naive_msm(batch_scalars[k], pts);
481 std::vector<AffineElement> result =
484 for (
size_t k = 0; k < num_msms; ++k) {
485 EXPECT_EQ(result[k], expected[k]) <<
"MSM " << k <<
" mismatched";
493 const std::vector<size_t> sizes = { 16384, 4096, 8192, 1024, 12000 };
494 const size_t num_msms = sizes.size();
496 std::vector<AffineElement> expected(num_msms);
500 for (
size_t k = 0; k < num_msms; ++k) {
501 const size_t n = sizes[k];
502 batch_scalars[k].resize(n);
503 for (
size_t i = 0; i < n; ++i) {
504 if ((k == 1 || k == 3) && (i % 4 != 0)) {
505 batch_scalars[k][i] = ScalarField::zero();
510 std::span<const AffineElement> pts(&
generators[0], n);
512 expected[k] =
naive_msm(batch_scalars[k], pts);
515 std::vector<AffineElement> result =
518 for (
size_t k = 0; k < num_msms; ++k) {
519 EXPECT_EQ(result[k], expected[k]) <<
"MSM " << k <<
" (n=" << sizes[k] <<
") mismatched";
525 const size_t start_index = 1234;
526 const size_t num_pts =
num_points - start_index;
534 EXPECT_EQ(result, expected);
539 const size_t start_index = 1234;
540 const size_t num_pts =
num_points - start_index;
541 std::vector<ScalarField> test_scalars(num_pts, ScalarField::zero());
546 EXPECT_EQ(result, Group::affine_point_at_infinity);
551 std::vector<ScalarField> test_scalars;
552 std::vector<AffineElement> input_points;
556 EXPECT_EQ(result, Group::affine_point_at_infinity);
561 const size_t num_pts = 100;
562 std::vector<ScalarField> test_scalars(num_pts);
563 std::vector<ScalarField> scalars_copy(num_pts);
565 for (
size_t i = 0; i < num_pts; ++i) {
567 scalars_copy[i] = test_scalars[i];
570 std::span<const AffineElement> points(&
generators[0], num_pts);
575 for (
size_t i = 0; i < num_pts; ++i) {
576 EXPECT_EQ(test_scalars[i], scalars_copy[i]) <<
"Scalar at index " << i <<
" was modified";
582 const size_t num_msms = 3;
583 const size_t num_pts = 100;
589 for (
size_t k = 0; k < num_msms; ++k) {
590 batch_scalars[k].resize(num_pts);
591 scalars_copies[k].resize(num_pts);
593 for (
size_t i = 0; i < num_pts; ++i) {
594 batch_scalars[k][i] =
scalars[k * num_pts + i];
595 scalars_copies[k][i] = batch_scalars[k][i];
603 for (
size_t k = 0; k < num_msms; ++k) {
604 for (
size_t i = 0; i < num_pts; ++i) {
605 EXPECT_EQ(batch_scalars[k][i], scalars_copies[k][i])
606 <<
"Scalar at MSM " << k <<
", index " << i <<
" was modified";
614 GTEST_SKIP() <<
"WASM GLV threshold exceeds the fixture size; non-GLV restoration is native-only here.";
616 namespace rpd = scalar_multiplication::round_parallel_detail;
617 const size_t num_pts = rpd::GLV_SMALL_N_THRESHOLD + 257;
620 std::vector<ScalarField> test_scalars(num_pts);
621 std::vector<ScalarField> scalars_copy(num_pts);
622 for (
size_t i = 0; i < num_pts; ++i) {
624 scalars_copy[i] = test_scalars[i];
627 std::span<const AffineElement> points(&
generators[0], num_pts);
631 for (
size_t i = 0; i < num_pts; ++i) {
632 EXPECT_EQ(test_scalars[i], scalars_copy[i]) <<
"non-GLV scalar at index " << i <<
" was modified";
639 const size_t num_pts = 5;
640 std::vector<ScalarField> test_scalars(num_pts, ScalarField::one());
641 std::span<const AffineElement> points(&
generators[0], num_pts);
647 expected.self_set_infinity();
648 for (
size_t i = 0; i < num_pts; ++i) {
649 expected += points[i];
657 const size_t num_pts = 5;
658 std::vector<ScalarField> test_scalars(num_pts, -ScalarField::one());
659 std::span<const AffineElement> points(&
generators[0], num_pts);
665 expected.self_set_infinity();
666 for (
size_t i = 0; i < num_pts; ++i) {
667 expected -= points[i];
675 std::vector<ScalarField> test_scalars = {
scalars[0] };
676 std::span<const AffineElement> points(&
generators[0], 1);
682 EXPECT_EQ(result, expected);
687 std::vector<size_t> test_sizes = { 1, 2, 15, 16, 17, 50, 127, 128, 129, 256, 512 };
689 for (
size_t num_pts : test_sizes) {
692 std::vector<ScalarField> test_scalars(num_pts);
693 for (
size_t i = 0; i < num_pts; ++i) {
697 std::span<const AffineElement> points(&
generators[0], num_pts);
703 EXPECT_EQ(result, expected) <<
"Failed for size " << num_pts;
710 const size_t num_pts = 32;
713 std::vector<AffineElement> points(num_pts, base_point);
714 std::vector<ScalarField> test_scalars(num_pts);
717 for (
size_t i = 0; i < num_pts; ++i) {
719 scalar_sum += test_scalars[i];
728 EXPECT_EQ(result, expected);
733 const size_t num_pts = 100;
734 std::vector<ScalarField> test_scalars(num_pts);
736 expected.self_set_infinity();
738 for (
size_t i = 0; i < num_pts; ++i) {
740 test_scalars[i] = ScalarField::zero();
747 std::span<const AffineElement> points(&
generators[0], num_pts);
756 const size_t num_pts = 200;
757 std::vector<ScalarField> test_scalars(num_pts);
758 for (
size_t i = 0; i < num_pts; ++i) {
762 std::span<const AffineElement> points(&
generators[0], num_pts);
765 auto result = scalar_multiplication::pippenger<Curve>(scalar_span, points);
773 const size_t num_pts = 200;
774 std::vector<ScalarField> test_scalars(num_pts);
775 for (
size_t i = 0; i < num_pts; ++i) {
779 std::span<const AffineElement> points(&
generators[0], num_pts);
782 auto result = scalar_multiplication::pippenger_unsafe<Curve>(scalar_span, points);
798 std::vector<ScalarField> test_scalars(n_total);
799 std::vector<AffineElement> input_points(start_index + n_used);
800 for (
size_t i = 0; i < n_total; ++i) {
801 test_scalars[i] = ScalarField::random_element(&rng);
803 for (
size_t i = 0; i < input_points.size(); ++i) {
804 input_points[i] =
AffineElement(Element::random_element(&rng));
808 start_index, std::span<const ScalarField>{ test_scalars.
data() + start_index, n_used }
811 Element actual = scalar_multiplication::pippenger_unsafe<Curve>(scalar_span, input_points);
814 expected.self_set_infinity();
815 for (
size_t i = 0; i < n_used; ++i) {
816 expected += input_points[start_index + i] * test_scalars[start_index + i];
819 <<
"Offset MSM mismatch at n_total=" << n_total <<
" start_index=" << start_index <<
" n_used=" << n_used;
830 std::vector<AffineElement> points(num_pts);
831 std::vector<ScalarField> test_scalars(num_pts);
832 for (
size_t i = 0; i < num_pts; ++i) {
834 test_scalars[i] = ScalarField::random_element(&rng);
840 EXPECT_EQ(result, expected);
857 const size_t num_pts = 100000;
859 std::vector<AffineElement> points(num_pts);
860 for (
size_t i = 0; i < num_pts; ++i) {
863 std::vector<ScalarField> uniform_scalars(num_pts,
ScalarField(7));
869 EXPECT_EQ(result, expected);
891 const size_t num_pts = 50000;
895 std::vector<ScalarField> uniform_scalars(num_pts, val);
900 points, scalar_span,
false,
true);
904 EXPECT_EQ(result, expected);
917 constexpr size_t NUM_CLUSTERS = 12000;
918 constexpr size_t CLUSTER_SIZE = 3;
919 const size_t num_pts = NUM_CLUSTERS * CLUSTER_SIZE;
921 std::vector<ScalarField>
scalars;
923 const uint256_t high_bit(0, 0, 0, uint64_t{ 1 } << (200 - 192));
924 for (
size_t i = 0; i < NUM_CLUSTERS; ++i) {
926 for (
size_t j = 0; j < CLUSTER_SIZE; ++j) {
937 EXPECT_EQ(result, expected);
979 ASSERT_LE(start_index + n,
num_points) << label;
982 std::span<const AffineElement> point_subspan(&
generators[0], start_index + n);
985 Element actual = scalar_multiplication::pippenger_round_parallel<Curve>(scalar_span, point_subspan);
988 expected.self_set_infinity();
989 for (
size_t i = 0; i < n; ++i) {
990 expected += point_subspan[start_index + i] * scalar_subspan[i];
994 << label <<
" (n=" << n <<
", start_index=" << start_index <<
")";
1005 std::span<const AffineElement> empty_points;
1008 Element r = scalar_multiplication::pippenger_round_parallel<Curve>(empty_span, empty_points);
1009 EXPECT_TRUE(r.is_point_at_infinity());
1015 for (
size_t n : {
size_t{ 1 },
1042 for (
size_t n : { kThreshold + 1,
size_t{ 32 },
size_t{ 33 },
size_t{ 50 },
size_t{ 100 } }) {
1053 for (
size_t threads : {
size_t{ 2 },
size_t{ 4 },
size_t{ 8 },
size_t{ 16 } }) {
1056 const size_t boundary = threads * kThreshold;
1057 for (
size_t n : { boundary - 1, boundary, boundary + 1 }) {
1084 std::vector<ScalarField> saved(
scalars.begin(),
scalars.begin() + 1024);
1085 for (
size_t i = 0; i < saved.size(); ++i) {
1086 scalars[i] = ScalarField::zero();
1088 for (
size_t n : {
size_t{ 1 },
size_t{ 24 },
size_t{ 100 },
size_t{ 1000 } }) {
1090 std::span<const AffineElement> pts(&
generators[0], n);
1092 Element r = scalar_multiplication::pippenger_round_parallel<Curve>(sp, pts);
1093 EXPECT_TRUE(r.is_point_at_infinity()) <<
"all-zero n=" << n;
1096 for (
size_t i = 0; i < saved.size(); ++i) {
1106 std::vector<ScalarField> saved(
scalars.begin(),
scalars.begin() + 1024);
1108 for (
size_t i = 0; i < 1024; i += 2) {
1109 scalars[i] = ScalarField::zero();
1111 for (
size_t n : {
size_t{ 24 },
size_t{ 100 },
size_t{ 1024 } }) {
1115 for (
size_t i = 0; i < saved.size(); ++i) {
1126 std::vector<ScalarField> saved(
scalars.begin(),
scalars.begin() + 256);
1129 for (
auto& s : saved) {
1132 for (
size_t i = 0; i < 256; ++i) {
1133 scalars[i] = ScalarField::one();
1138 for (
size_t i = 0; i < 256; ++i) {
1139 scalars[i] = -ScalarField::one();
1144 for (
size_t i = 0; i < saved.size(); ++i) {
1154 for (
size_t threads : {
size_t{ 1 },
size_t{ 2 },
size_t{ 4 },
size_t{ 8 } }) {
1156 for (
size_t n : {
size_t{ 1 },
size_t{ 2 },
size_t{ 8 },
size_t{ 32 },
size_t{ 80 },
size_t{ 160 } }) {
1158 std::span<const AffineElement> pts(&
generators[0], n);
1160 Element actual = scalar_multiplication::trivial_msm_threaded<Curve>(sp, pts);
1162 expected.self_set_infinity();
1163 for (
size_t i = 0; i < n; ++i) {
1164 expected += pts[i] * sub[i];
1167 <<
"trivial_msm_threaded threads=" << threads <<
" n=" << n;
1180 constexpr size_t glv_threshold =
size_t{ 1 } << 16;
1182 constexpr size_t glv_threshold =
size_t{ 1 } << 13;
1185 GTEST_SKIP() <<
"GLV threshold " << glv_threshold <<
" not exercisable with " <<
num_points
1186 <<
" precomputed points";
1210 for (
size_t n : { kThreshold + 1,
size_t{ 50 },
size_t{ 100 },
size_t{ 256 } }) {
1212 std::span<const AffineElement> point_subspan(&
generators[0], n);
1215 constexpr size_t kArenaCapacity =
size_t{ 64 } * 1024 * 1024;
1218 const auto base =
reinterpret_cast<uintptr_t
>(raw.data());
1219 const uintptr_t aligned32 = (base + 31) & ~uintptr_t{ 31 };
1220 std::byte* misaligned = raw.data() + (aligned32 - base) + 16;
1221 ASSERT_EQ(
reinterpret_cast<uintptr_t
>(misaligned) % 32,
size_t{ 16 });
1224 Element actual = scalar_multiplication::pippenger_round_parallel<Curve>(
1225 scalar_span, point_subspan,
false, {}, external_arena);
1228 expected.self_set_infinity();
1229 for (
size_t i = 0; i < n; ++i) {
1230 expected += point_subspan[i] * scalar_subspan[i];
1258 const std::vector<size_t> sizes = { 64 };
1261 const std::vector<size_t> sizes = { 64, 3000 };
1263 for (
size_t n : sizes) {
1264 std::vector<AffineElement> points(n);
1265 std::vector<ScalarField> test_scalars(n);
1266 for (
size_t i = 0; i < n; ++i) {
1268 test_scalars[i] = ScalarField::random_element(&rng);
1270 for (
size_t idx : {
size_t{ 0 }, n / 2, n - 1 }) {
1271 points[idx].self_set_infinity();
1277 EXPECT_EQ(result, expected) <<
"point-at-infinity inputs (n=" << n <<
")";
1289 const std::vector<size_t> pair_counts = { 40 };
1291 const std::vector<size_t> pair_counts = { 40, 800 };
1293 for (
size_t pairs : pair_counts) {
1294 const size_t n = (2 * pairs) + 3;
1295 std::vector<AffineElement> points(n);
1296 std::vector<ScalarField> test_scalars(n);
1297 for (
size_t p = 0; p < pairs; ++p) {
1302 const ScalarField s = ScalarField::random_element(&rng);
1304 points[(2 * p) + 1] = neg_r;
1305 test_scalars[2 * p] = s;
1306 test_scalars[(2 * p) + 1] = s;
1308 for (
size_t i = 2 * pairs; i < n; ++i) {
1310 test_scalars[i] = ScalarField::random_element(&rng);
1316 EXPECT_EQ(result, expected) <<
"inverse-pair bucket collisions (pairs=" << pairs <<
")";
1329 const BaseField beta = BaseField::cube_root_of_unity();
1330 namespace rpd = scalar_multiplication::round_parallel_detail;
1331 const std::vector<size_t> sizes = { 50, 1000, rpd::GLV_SMALL_N_THRESHOLD + 64 };
1332 for (
size_t n : sizes) {
1334 std::span<const AffineElement> points(&
generators[0], n);
1335 std::vector<ScalarField> test_scalars(n);
1336 for (
size_t i = 0; i < n; ++i) {
1339 std::vector<AffineElement> doubled(2 * n);
1340 for (
size_t i = 0; i < n; ++i) {
1341 doubled[2 * i] = points[i];
1342 doubled[(2 * i) + 1].x = points[i].x * beta;
1343 doubled[(2 * i) + 1].y = -points[i].y;
1347 scalar_multiplication::pippenger_round_parallel<Curve>(scalar_span,
1350 std::span<const AffineElement>(doubled));
1352 EXPECT_EQ(
AffineElement(result), expected) <<
"external_glv_doubled (n=" << n <<
")";
1376 namespace rpd = scalar_multiplication::round_parallel_detail;
1380 constexpr size_t n = 600;
1381 static_assert(n <= 256 + 4096,
"keep n below the native GLV threshold");
1383 const uint256_t r = ScalarField::modulus;
1386 std::vector<ScalarField> probes;
1387 probes.push_back(-ScalarField::one());
1391 for (
size_t bit : {
size_t{ 126 },
size_t{ 127 },
size_t{ 128 },
size_t{ 129 },
size_t{ 253 } }) {
1398 for (uint64_t delta = 0; delta < 8; ++delta) {
1404 for (uint64_t i = 1; i <= 16; ++i) {
1405 const uint64_t h0 = i * uint64_t{ 0x9E3779B97F4A7C15ULL };
1406 const uint64_t h1 = (i + 1) * uint64_t{ 0xC2B2AE3D27D4EB4FULL };
1407 const uint64_t h2 = (i + 2) * uint64_t{ 0x165667B19E3779F9ULL };
1408 const uint64_t h3 = (i + 3) * uint64_t{ 0xD6E8FEB86659FD93ULL };
1419 const BaseField beta = BaseField::cube_root_of_unity();
1422 phi.x = base.x * beta;
1425 const ScalarField canonical = s.from_montgomery_form_reduced();
1426 const auto split = ScalarField::split_into_endomorphism_scalars(canonical);
1432 <<
"GLV split half exceeded 128-bit storage or mis-signed";
1439 std::vector<ScalarField> saved(
scalars.begin(),
scalars.begin() + n);
1440 for (
size_t p = 0; p < probes.size(); ++p) {
1441 for (
size_t i = 0; i < n; ++i) {
1448 for (
size_t i = 0; i < n; ++i) {
1449 scalars[i] = (i & 1) ? -ScalarField::one() : probes[probes.size() - 1 - (i % probes.size())];
1453 for (
size_t i = 0; i < saved.size(); ++i) {
1477 GTEST_SKIP() <<
"non-GLV mid-band (2^13<n<2^17) does not exist on wasm";
1480 namespace rpd = scalar_multiplication::round_parallel_detail;
1481 const size_t glv_threshold = rpd::GLV_SMALL_N_THRESHOLD;
1484 const std::vector<size_t> sizes = { glv_threshold + 1,
size_t{ 1 } << 14 };
1486 const std::vector<size_t> bit_widths = { 1, 64, 120 };
1488 size_t max_size = 0;
1489 for (
size_t s : sizes) {
1496 for (
size_t n : sizes) {
1500 for (
size_t bits : bit_widths) {
1502 for (
size_t i = 0; i < n; ++i) {
1506 rng.get_random_uint64(),
1507 rng.get_random_uint64(),
1508 rng.get_random_uint64());
1510 if (i == 0 && bits >= 1) {
1518 for (
size_t i = 0; i < saved.size(); ++i) {
1547 const std::vector<size_t> cluster_sizes = { 2100 };
1551 const std::vector<size_t> cluster_sizes = { 2100, 5000 };
1553 for (
size_t cluster : cluster_sizes) {
1560 const size_t singles = 400;
1561 const size_t medium = 600;
1562 const size_t n = cluster + medium + singles;
1565 std::vector<ScalarField> test_scalars(n);
1566 std::vector<AffineElement> points(n);
1568 const ScalarField s_big = ScalarField::random_element(&rng);
1569 const ScalarField s_med = ScalarField::random_element(&rng);
1570 ASSERT_NE(s_big, s_med);
1572 for (
size_t i = 0; i < cluster; ++i) {
1574 test_scalars[i] = s_big;
1576 for (
size_t i = 0; i < medium; ++i) {
1577 points[cluster + i] =
generators[cluster + i];
1578 test_scalars[cluster + i] = s_med;
1580 for (
size_t i = 0; i < singles; ++i) {
1581 points[cluster + medium + i] =
generators[cluster + medium + i];
1582 test_scalars[cluster + medium + i] = ScalarField::random_element(&rng);
1589 points, scalar_span,
false,
true);
1591 EXPECT_EQ(deduped, expected) <<
"dedup large-cluster carry/caps (cluster=" << cluster <<
", n=" << n <<
")";
1597 points, scalar_span2,
false,
false);
1598 EXPECT_EQ(deduped, undeduped) <<
"dedup vs no-dedup divergence (cluster=" << cluster <<
")";
1619 const std::vector<size_t> sizes = { 0, 1, 5, 0, 4096, 33, 0, 16384, 64, 2 };
1620 const size_t num_msms = sizes.size();
1625 std::vector<AffineElement> expected(num_msms);
1626 std::vector<uint8_t> dedup_hints(num_msms, 0);
1629 for (
size_t k = 0; k < num_msms; ++k) {
1630 const size_t n = sizes[k];
1632 batch_scalars[k].resize(n);
1633 scalar_copies[k].resize(n);
1634 const bool all_zero = (k % 4 == 2);
1635 for (
size_t i = 0; i < n; ++i) {
1636 batch_scalars[k][i] = all_zero ? ScalarField::zero() :
scalars[
offset + i];
1637 scalar_copies[k][i] = batch_scalars[k][i];
1639 dedup_hints[k] =
static_cast<uint8_t
>((k % 2 == 0) ? 1 : 0);
1642 expected[k] =
naive_msm(batch_scalars[k], pts);
1649 ASSERT_EQ(result.size(), num_msms);
1650 for (
size_t k = 0; k < num_msms; ++k) {
1651 EXPECT_EQ(result[k], expected[k]) <<
"batch MSM " << k <<
" (n=" << sizes[k]
1652 <<
", handle_edge_cases=" << handle_edge_cases <<
") mismatched";
1653 EXPECT_EQ(batch_scalars[k], scalar_copies[k]) <<
"batch MSM " << k <<
" scalars were not restored";
1660using CurveTypes = ::testing::Types<bb::curve::BN254, bb::curve::Grumpkin>;
1663TEST(ScalarMultiplicationArenaTest, LargeBn254RecursionVkShapeFitsComputedArena)
1672 constexpr size_t schedule_slots =
size_t{ 26454272 } /
sizeof(uint32_t);
1673 constexpr std::array<size_t, 8> candidate_window_batches{ 1, 2, 4, 8, 13, 16, 26, 32 };
1674 for (
const size_t threads : {
size_t{ 4 },
size_t{ 32 } }) {
1676 for (
const size_t windows_per_batch : candidate_window_batches) {
1677 const size_t n = schedule_slots / windows_per_batch;
1678 for (
size_t effective_num_bits = 1; effective_num_bits <= 254; ++effective_num_bits) {
1679 EXPECT_TRUE(pippenger_bn254_arena_layout_fits_for_test(
1680 n,
false,
false, effective_num_bits))
1681 <<
"threads=" << threads <<
" windows_per_batch=" << windows_per_batch <<
" n=" << n
1682 <<
" effective_num_bits=" << effective_num_bits;
1698TEST(ScalarMultiplicationArenaTest, ArenaLayoutFitsAcrossDispatchSpace)
1702 constexpr std::array<size_t, 6> thread_counts{ 1, 3, 8, 16, 32, 64 };
1705 constexpr std::array<size_t, 21> ns{ 4, 5, 23, 24, 25, 31, 32, 33, 63, 64, 65,
1706 255, 256, 257, 4095, 4096, 4097, 8191, 8192, 8193, 262144 };
1707 constexpr std::array<size_t, 4> bit_budgets{ 0, 1, 128, 254 };
1709 for (
const size_t threads : thread_counts) {
1711 for (
const size_t n : ns) {
1712 for (
const bool ext_glv : {
false,
true }) {
1713 for (
const bool dedup : {
false,
true }) {
1714 for (
const size_t bits : bit_budgets) {
1715 EXPECT_TRUE(pippenger_bn254_arena_layout_fits_for_test(n, ext_glv, dedup, bits))
1716 <<
"threads=" << threads <<
" n=" << n <<
" ext_glv=" << ext_glv <<
" dedup=" << dedup
1717 <<
" bits=" << bits;
1727 for (
const size_t threads : {
size_t{ 4 },
size_t{ 8 },
size_t{ 16 },
size_t{ 32 } }) {
1729 for (
size_t i = 1; i <= 32; ++i) {
1730 const size_t n = 4 + ((i *
size_t{ 2654435761ULL }) % (
size_t{ 1 } << 20));
1731 for (
const bool dedup : {
false,
true }) {
1732 EXPECT_TRUE(pippenger_bn254_arena_layout_fits_for_test(n,
false, dedup, 254))
1733 <<
"random n=" << n <<
" threads=" << threads <<
" dedup=" << dedup;
1745 this->test_pippenger_low_memory();
1749 this->test_batch_multi_scalar_mul();
1753 this->test_batch_multi_scalar_mul_sparse();
1757 this->test_batch_multi_scalar_mul_large_dense();
1761 this->test_batch_multi_scalar_mul_ragged();
1769 this->test_msm_all_zeroes();
1773 this->test_msm_empty_polynomial();
1777 this->test_scalars_unchanged_after_msm();
1781 this->test_scalars_unchanged_after_batch_multi_scalar_mul();
1785 this->test_scalars_unchanged_after_large_non_glv_msm();
1789 this->test_scalar_one();
1793 this->test_scalar_minus_one();
1797 this->test_single_point();
1801 this->test_size_thresholds();
1805 this->test_duplicate_points();
1809 this->test_mixed_zero_scalars();
1813 this->test_pippenger_free_function();
1817 this->test_pippenger_unsafe_free_function();
1821 this->test_offset_span(4096, 7, 512, 0x5eedu + 33);
1822 this->test_offset_span(8192, 4097, 2048, 0x5eedu + 34);
1827 GTEST_SKIP() <<
"Large synthetic MSM coverage is native-only; WASM coverage comes from integration flows.";
1829 this->test_large_n_non_glv();
1834 GTEST_SKIP() <<
"Large synthetic MSM coverage is native-only; WASM coverage comes from integration flows.";
1836 this->test_msm_single_digit_mega_run();
1841 GTEST_SKIP() <<
"Large synthetic MSM coverage is native-only; WASM coverage comes from integration flows.";
1843 this->test_msm_dedup_cap_and_carry();
1848 GTEST_SKIP() <<
"Large synthetic MSM coverage is native-only; WASM coverage comes from integration flows.";
1850 this->test_msm_dedup_many_small_clusters_cap();
1856 this->test_pippenger_internal_single_thread();
1860 this->test_pippenger_internal_single_thread_at_dispatch_threshold_plus_one();
1864 this->test_pippenger_internal_dispatch_threshold_per_thread_count();
1868 this->test_pippenger_internal_offset_span_dispatch();
1872 this->test_pippenger_internal_all_zero_scalars();
1876 this->test_pippenger_internal_mixed_zero_scalars();
1880 this->test_pippenger_internal_extreme_scalars();
1884 this->test_trivial_msm_threaded_per_worker_paths();
1888 this->test_pippenger_internal_glv_boundary();
1892 this->test_pippenger_internal_misaligned_external_arena();
1896 this->test_handle_edge_cases_point_at_infinity();
1900 this->test_handle_edge_cases_inverse_pairs();
1905 GTEST_SKIP() <<
"external_glv_doubled direct coverage is native-only; WASM coverage comes from batch flows.";
1907 this->test_external_glv_doubled_matches_naive();
1911 this->test_glv_extreme_magnitude_scalars();
1915 this->test_effective_num_bits_band_small_scalars();
1919 this->test_dedup_large_cluster_carry_and_caps();
1924 GTEST_SKIP() <<
"Large ragged batch coverage is native-only; WASM coverage comes from integration flows.";
1926 this->test_batch_driver_shared_path();
1953 std::vector<AffineElement> pts(n);
1955 for (
size_t i = s; i < e; ++i) {
1956 pts[i] = Group::one * Curve::ScalarField::random_element(&
engine);
1969 return ScalarField::random_element(&
engine);
1972 ScalarField canonical = r.from_montgomery_form_reduced();
1973 auto& d = canonical.data;
1974 size_t bits_remaining = bits;
1975 for (
size_t l = 0; l < 4; ++l) {
1977 const uint64_t mask = (take == 64) ? ~uint64_t{ 0 }
1978 : (take == 0) ? uint64_t{ 0 }
1979 : ((uint64_t{ 1 } << take) - 1);
1981 if (bits_remaining > take) {
1982 bits_remaining -= take;
1994 EXPECT_EQ(actual, expected);
1997 static constexpr size_t kN = 131072;
2002 constexpr size_t large_count = 16;
2004 std::vector<ScalarField> ss(
kN);
2005 for (
size_t i = 0; i <
kN - large_count; ++i) {
2008 for (
size_t i =
kN - large_count; i <
kN; ++i) {
2009 ss[i] = ScalarField::random_element(&
engine);
2018 std::vector<ScalarField> ss(
kN);
2019 for (
size_t k = 0; k <
kN / 2; ++k) {
2022 for (
size_t k =
kN / 2; k <
kN; ++k) {
2032 std::vector<ScalarField> ss(
kN);
2033 for (
size_t k = 0; k <
kN; ++k) {
2034 ss[k] = ScalarField::random_element(&
engine);
2042 std::vector<ScalarField> ss(
kN, ScalarField::zero());
2045 EXPECT_TRUE(actual.is_point_at_infinity());
2052 std::vector<ScalarField> ss(
kN, ScalarField::zero());
2053 for (
size_t k = 0; k <
kN / 2; ++k) {
2054 ss[k] = ScalarField::random_element(&
engine);
2063 std::vector<ScalarField> ss(
kN);
2064 for (
size_t k = 0; k <
kN; ++k) {
2065 ss[k] = ScalarField::random_element(&
engine);
2075 std::vector<ScalarField> ss(
kN);
2076 for (
size_t k = 0; k <
kN / 2; ++k) {
2079 for (
size_t k =
kN / 2; k <
kN; ++k) {
2080 ss[k] = ScalarField::random_element(&
engine);
2092 std::vector<ScalarField> ss(
kN);
2093 for (
size_t k = 0; k <
kN; ++k) {
2106 std::vector<ScalarField> ss(
kN);
2107 for (
size_t k = 0; k <
kN; ++k) {
2124 this->test_decaying();
2128 this->test_uniform_random();
2132 this->test_all_zero();
2136 this->test_half_zero();
2140 this->test_all_large();
2144 this->test_mid_distribution();
2148 this->test_below_192();
2152 this->test_force_split_bitwise_identity();
2157TEST(ScalarMultiplication, SmallInputsExplicit)
2159 uint256_t x0(0x68df84429941826a, 0xeb08934ed806781c, 0xc14b6a2e4f796a73, 0x08dc1a9a11a3c8db);
2160 uint256_t y0(0x8ae5c31aa997f141, 0xe85f20c504f2c11b, 0x81a94193f3b1ce2b, 0x26f2c37372adb5b7);
2161 uint256_t x1(0x80f5a592d919d32f, 0x1362652b984e51ca, 0xa0b26666f770c2a1, 0x142c6e1964e5c3c5);
2162 uint256_t y1(0xb6c322ebb5ae4bc5, 0xf9fef6c7909c00f8, 0xb37ca1cc9af3b421, 0x1e331c7fa73d6a59);
2163 uint256_t s0(0xe48bf12a24272e08, 0xf8dd0182577f3567, 0xec8fd222b8a6becb, 0x102d76b945612c9b);
2164 uint256_t s1(0x098ae8d69f1e4e9e, 0xb5c8313c0f6040ed, 0xf78041e30cc46c44, 0x1d1e6e0c21892e13);
#define BB_BENCH_NAME(name)
RAII helper to scope a bb::set_parallel_for_concurrency change to one test.
ConcurrencyScope & operator=(const ConcurrencyScope &)=delete
ConcurrencyScope & operator=(ConcurrencyScope &&)=delete
ConcurrencyScope(size_t n)
ConcurrencyScope(const ConcurrencyScope &)=delete
ConcurrencyScope(ConcurrencyScope &&)=delete
void test_batch_driver_shared_path()
void test_offset_span(size_t n_total, size_t start_index, size_t n_used, uint64_t seed)
Validate that a non-zero start_index in the PolynomialSpan is honoured.
void test_pippenger_low_memory()
void run_batch_driver_paths(bool handle_edge_cases)
typename Curve::ScalarField ScalarField
void test_pippenger_internal_dispatch_threshold_per_thread_count()
void test_pippenger_internal_extreme_scalars()
void test_pippenger_internal_single_thread()
void test_dedup_large_cluster_carry_and_caps()
static std::vector< AffineElement > generators
static constexpr size_t kMaxBatchMSMs
void test_mixed_zero_scalars()
void test_batch_multi_scalar_mul_sparse()
void test_duplicate_points()
void test_batch_multi_scalar_mul_large_dense()
void test_scalars_unchanged_after_batch_multi_scalar_mul()
void test_large_n_non_glv()
Coverage at very large N (exercises the non-GLV path on WASM, where n_input > 2^16 disables the GLV d...
void test_msm_all_zeroes()
void test_msm_dedup_many_small_clusters_cap()
Stress-test dedup cap fallback across many small clusters.
void test_pippenger_unsafe_free_function()
void test_batch_multi_scalar_mul()
static constexpr size_t num_points
void test_msm_dedup_cap_and_carry()
Stress-test the dedup pass's worst-case caps and the split-cluster carry.
static void SetUpTestSuite()
void test_scalar_minus_one()
void test_pippenger_internal_all_zero_scalars()
void test_glv_extreme_magnitude_scalars()
void test_handle_edge_cases_inverse_pairs()
typename Curve::Element Element
void test_external_glv_doubled_matches_naive()
static std::vector< AffineElement > make_repeated_test_points(size_t num_pts)
static constexpr size_t kMaxBatchPointsPerMSM
void test_msm_empty_polynomial()
void test_scalars_unchanged_after_large_non_glv_msm()
void test_pippenger_internal_misaligned_external_arena()
void test_pippenger_free_function()
typename Curve::Group Group
void test_scalars_unchanged_after_msm()
void test_pippenger_internal_glv_boundary()
void test_handle_edge_cases_point_at_infinity()
void test_pippenger_internal_offset_span_dispatch()
static std::vector< ScalarField > scalars
void test_pippenger_internal_mixed_zero_scalars()
typename Curve::AffineElement AffineElement
void test_effective_num_bits_band_small_scalars()
static AffineElement naive_msm(std::span< ScalarField > input_scalars, std::span< const AffineElement > input_points)
void test_batch_multi_scalar_mul_ragged()
void test_pippenger_internal_single_thread_at_dispatch_threshold_plus_one()
void test_msm_single_digit_mega_run()
Force every Pippenger window to contain a single mega-run of one digit.
void test_trivial_msm_threaded_per_worker_paths()
void check_internal_against_naive(size_t n, size_t start_index, const char *label)
void test_size_thresholds()
typename Curve::ScalarField ScalarField
static void check_against_naive(std::span< ScalarField > scalars, std::span< const AffineElement > points)
static constexpr size_t kN
void test_uniform_random()
void test_force_split_bitwise_identity()
static AffineElement naive_msm(std::span< ScalarField > input_scalars, std::span< const AffineElement > input_points)
typename Curve::AffineElement AffineElement
void test_mid_distribution()
static ScalarField scalar_below_2pow(size_t bits)
typename Curve::Element Element
static std::vector< AffineElement > make_points(size_t n)
typename Curve::Group Group
typename Group::element Element
typename grumpkin::g1 Group
typename Group::affine_element AffineElement
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
group_elements::affine_element< Fq, Fr, Params > affine_element
virtual uint8_t get_random_uint8()=0
virtual uint16_t get_random_uint16()=0
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< const AffineElement > points, std::span< PolynomialSpan< ScalarField > > scalars, bool handle_edge_cases=true, std::span< const uint8_t > dedup_hints={}) noexcept
static AffineElement msm(std::span< const AffineElement > points, PolynomialSpan< const ScalarField > scalars, bool handle_edge_cases=false, bool dedup_hint=false) noexcept
::testing::Types< bb::curve::BN254, bb::curve::Grumpkin > VariableWindowCurveTypes
RNG & get_debug_randomness(bool reset, std::uint_fast64_t seed)
constexpr size_t BATCH_MEM_BUDGET
constexpr size_t MIN_BATCH_CAPACITY
constexpr size_t SUBCHUNK_ENTRIES_CAP
constexpr size_t GLV_SMALL_N_THRESHOLD
constexpr size_t MIN_PTS_PER_THREAD_FOR_PIPPENGER
size_t window_bits_tuning_oversub_factor(size_t n_input)
N-dependent oversubscription factor used ONLY for choose_window_bits' target_load formula (not for ac...
Entry point for Barretenberg command-line interface.
TYPED_TEST_SUITE(CommitmentKeyTest, Curves)
::testing::Types< curve::BN254, curve::Grumpkin > CurveTypes
TYPED_TEST(CommitmentKeyTest, CommitToZeroPoly)
TEST(BoomerangMegaCircuitBuilder, BasicCircuit)
void set_parallel_for_concurrency(size_t num_cores)
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
constexpr field invert() const noexcept