29constexpr size_t LIMB_BITS_U64 = 64;
30constexpr size_t NUM_LIMBS_U64 = 4;
31constexpr size_t NUM_LIMBS_U32 = 8;
32constexpr size_t MAX_BITS = 256;
33constexpr size_t SCALAR_BYTES = 32;
35uint32_t reference_packed_digit(
const uint64_t* scalar_data,
size_t bit_offset,
size_t window_bits)
37 auto bit_at = [&](int64_t i) -> uint64_t {
38 if (i < 0 ||
static_cast<size_t>(i) >= MAX_BITS) {
41 return (scalar_data[
static_cast<size_t>(i) / LIMB_BITS_U64] >> (
static_cast<size_t>(i) % LIMB_BITS_U64)) &
45 for (
size_t k = 0; k <= window_bits; ++k) {
46 const int64_t bit_idx =
static_cast<int64_t
>(bit_offset) +
static_cast<int64_t
>(k) - 1;
47 raw |=
static_cast<uint32_t
>(bit_at(bit_idx)) << k;
49 const uint32_t neg = (raw >> window_bits) & 1U;
50 const uint32_t val_mask = (uint32_t{ 1 } << window_bits) - 1;
51 const uint32_t encode = (raw + 1) >> 1;
52 const uint32_t bucket = ((encode - neg) ^ (uint32_t{ 0 } - neg)) & val_mask;
53 return (neg << 31) | bucket;
56uint32_t production_scalar(
const uint64_t* scalar_data,
size_t bit_offset,
size_t window_bits)
66 sp.slice_localised_to_one_u64,
73 std::array<uint32_t, 4>& out)
76 const cnst::SimdU32x4 lo_mask_v{ sp.lo_mask, sp.lo_mask, sp.lo_mask, sp.lo_mask };
77 const cnst::SimdU32x4 hi_mask_v{ sp.hi_mask, sp.hi_mask, sp.hi_mask, sp.hi_mask };
79 const uint32_t val_mask_scalar = (uint32_t{ 1 } << window_bits) - 1;
80 const cnst::SimdU32x4 val_mask{ val_mask_scalar, val_mask_scalar, val_mask_scalar, val_mask_scalar };
81 const auto* s0 =
reinterpret_cast<const uint32_t*
>(scalars[0].data());
82 const auto* s1 =
reinterpret_cast<const uint32_t*
>(scalars[1].data());
83 const auto* s2 =
reinterpret_cast<const uint32_t*
>(scalars[2].data());
84 const auto* s3 =
reinterpret_cast<const uint32_t*
>(scalars[3].data());
85 const auto wb_u32 =
static_cast<uint32_t
>(window_bits);
88 case cnst::ConstantineSlicePath::Localised:
90 out.data(), s0, s1, s2, s3, sp.lo_limb, sp.lo_off, lo_mask_v, one_v, val_mask, wb_u32);
92 case cnst::ConstantineSlicePath::Bottom:
94 out.data(), s0, s1, s2, s3, sp.hi_limb, sp.lo_bits, hi_mask_v, one_v, val_mask, wb_u32);
96 case cnst::ConstantineSlicePath::Boundary:
121 constexpr size_t MIN_INPUT = 2 + (SCALAR_BYTES * 4);
129 const size_t window_bits = 1 + (buf[0] % 19);
133 const size_t bit_offset = buf[1] & 0xff;
136 for (
size_t lane = 0; lane < 4; ++lane) {
137 std::memcpy(scalars[lane].
data(), buf.data() + 2 + (lane * SCALAR_BYTES), SCALAR_BYTES);
141 for (
size_t lane = 0; lane < 4; ++lane) {
142 const uint32_t got = production_scalar(scalars[lane].
data(), bit_offset, window_bits);
143 const uint32_t want = reference_packed_digit(scalars[lane].
data(), bit_offset, window_bits);
150 alignas(16) std::array<uint32_t, 4> simd_out{};
151 production_simd(scalars, bit_offset, window_bits, simd_out);
152 for (
size_t lane = 0; lane < 4; ++lane) {
153 const uint32_t want = production_scalar(scalars[lane].
data(), bit_offset, window_bits);
154 if (simd_out[lane] != want) {
uint32_t get_constantine_packed_digit(const uint64_t *scalar_data, uint32_t lo_limb, uint32_t hi_limb, uint32_t lo_off, uint32_t lo_bits, uint32_t lo_mask, uint32_t hi_mask, bool slice_localised_to_one_u64, size_t window_bits) noexcept
Read (window_bits+1) bits from scalar_data (uint64 limbs) using precomputed slice params and apply Co...
ConstantineSlicePath classify_slice_path_u32(const ConstantineSliceParamsU32 &sp) noexcept
void store_constantine_packed_digits_x4_bottom(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t hi_limb, uint32_t lo_bits, SimdU32x4 hi_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
void store_constantine_packed_digits_x4_boundary(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t lo_limb, uint32_t hi_limb, uint32_t lo_off, uint32_t lo_bits, SimdU32x4 lo_mask_v, SimdU32x4 hi_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
uint32_t __attribute__((vector_size(16))) SimdU32x4
ConstantineSliceParams compute_constantine_slice_params(size_t bit_offset, size_t window_bits, size_t num_uint64_limbs) noexcept
void store_constantine_packed_digits_x4_localised(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t lo_limb, uint32_t lo_off, SimdU32x4 lo_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
ConstantineSliceParamsU32 compute_constantine_slice_params_u32(size_t bit_offset, size_t window_bits, size_t num_u32_limbs) noexcept
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)