33#ifdef __wasm_simd128__
34#include <wasm_simd128.h>
50 size_t bit_offset,
size_t window_bits,
size_t num_uint64_limbs)
noexcept
52 return compute_booth_slice_params(bit_offset, window_bits, num_uint64_limbs);
73 bool slice_localised_to_one_u64,
74 size_t window_bits)
noexcept
76 uint64_t raw_wide = 0;
77 if (slice_localised_to_one_u64) {
79 raw_wide = (scalar_data[lo_limb] >> lo_off) & lo_mask;
80 }
else if (lo_mask == 0) {
87 raw_wide = (scalar_data[hi_limb] & hi_mask) << lo_bits;
90 const uint64_t s_lo = scalar_data[lo_limb];
91 const uint64_t s_hi = scalar_data[hi_limb];
92 const uint64_t lo_part = (s_lo >> lo_off) & lo_mask;
93 const uint64_t hi_part = (s_hi & hi_mask) << lo_bits;
94 raw_wide = lo_part | hi_part;
97 const uint32_t raw =
static_cast<uint32_t
>(raw_wide);
111 const uint32_t neg = (raw >> window_bits) & uint32_t{ 1 };
112 const uint32_t neg_mask = uint32_t{ 0 } - neg;
113 const uint32_t val_mask = (uint32_t{ 1 } << window_bits) - 1;
114 const uint32_t encode = (raw + 1) >> 1;
115 const uint32_t bucket_idx = ((encode - neg) ^ neg_mask) & val_mask;
118 return (neg << 31) | bucket_idx;
159 size_t num_u32_limbs)
noexcept
161 constexpr size_t LIMB_BITS_U32 = 32;
163 if (bit_offset == 0) {
166 sp.
lo_off = LIMB_BITS_U32 - 1;
169 sp.
hi_mask = (uint32_t{ 1 } << window_bits) - 1;
173 const size_t lookback_bit = bit_offset - 1;
174 const size_t bits_to_read = window_bits + 1;
175 sp.
lo_limb =
static_cast<uint32_t
>(lookback_bit / LIMB_BITS_U32);
176 sp.
lo_off =
static_cast<uint32_t
>(lookback_bit & (LIMB_BITS_U32 - 1));
177 const uint32_t in_lo =
static_cast<uint32_t
>(LIMB_BITS_U32 - sp.
lo_off);
178 sp.
lo_bits = (in_lo < static_cast<uint32_t>(bits_to_read)) ? in_lo :
static_cast<uint32_t
>(bits_to_read);
179 const uint32_t hi_bits =
static_cast<uint32_t
>(bits_to_read) - sp.
lo_bits;
181 if (
static_cast<size_t>(sp.
lo_limb) + 1 >= num_u32_limbs) {
186 sp.
hi_mask = (uint32_t{ 1 } << hi_bits) - 1;
199 const uint32_t* p0,
const uint32_t* p1,
const uint32_t* p2,
const uint32_t* p3, uint32_t idx)
noexcept
201#ifdef __wasm_simd128__
202 v128_t v = wasm_i32x4_splat(0);
203 v = wasm_v128_load32_lane(p0 + idx, v, 0);
204 v = wasm_v128_load32_lane(p1 + idx, v, 1);
205 v = wasm_v128_load32_lane(p2 + idx, v, 2);
206 v = wasm_v128_load32_lane(p3 + idx, v, 3);
209 return SimdU32x4{ p0[idx], p1[idx], p2[idx], p3[idx] };
219#ifdef __wasm_simd128__
220 wasm_v128_store(dst,
reinterpret_cast<v128_t
>(v));
233 const uint32_t* scalar_data_0,
234 const uint32_t* scalar_data_1,
235 const uint32_t* scalar_data_2,
236 const uint32_t* scalar_data_3,
242 uint32_t window_bits)
noexcept
245 const SimdU32x4 raw = (lo >> lo_off) & lo_mask_v;
246 const SimdU32x4 neg = (raw >> window_bits) & one_v;
248 const SimdU32x4 encode = (raw + one_v) >> 1;
249 const SimdU32x4 bucket = ((encode - neg) ^ neg_mask) & val_mask;
250 const SimdU32x4 packed = (neg << 31) | bucket;
255 const uint32_t* scalar_data_0,
256 const uint32_t* scalar_data_1,
257 const uint32_t* scalar_data_2,
258 const uint32_t* scalar_data_3,
264 uint32_t window_bits)
noexcept
267 const SimdU32x4 raw = (hi & hi_mask_v) << lo_bits;
268 const SimdU32x4 neg = (raw >> window_bits) & one_v;
270 const SimdU32x4 encode = (raw + one_v) >> 1;
271 const SimdU32x4 bucket = ((encode - neg) ^ neg_mask) & val_mask;
272 const SimdU32x4 packed = (neg << 31) | bucket;
277 const uint32_t* scalar_data_0,
278 const uint32_t* scalar_data_1,
279 const uint32_t* scalar_data_2,
280 const uint32_t* scalar_data_3,
289 uint32_t window_bits)
noexcept
293 const SimdU32x4 lo_part = (lo >> lo_off) & lo_mask_v;
294 const SimdU32x4 hi_part = (hi & hi_mask_v) << lo_bits;
296 const SimdU32x4 neg = (raw >> window_bits) & one_v;
298 const SimdU32x4 encode = (raw + one_v) >> 1;
299 const SimdU32x4 bucket = ((encode - neg) ^ neg_mask) & val_mask;
300 const SimdU32x4 packed = (neg << 31) | bucket;
315 if (sp.is_bottom_window) {
318 if (sp.slice_localised_to_one_u32) {
__attribute__((section("__libfuzzer_extra_counters"))) uint8_t num_events
BoothSliceParams compute_booth_slice_params(size_t bit_offset, size_t window_bits, size_t num_uint64_limbs) noexcept
uint32_t get_constantine_packed_digit(const uint64_t *scalar_data, uint32_t lo_limb, uint32_t hi_limb, uint32_t lo_off, uint32_t lo_bits, uint32_t lo_mask, uint32_t hi_mask, bool slice_localised_to_one_u64, size_t window_bits) noexcept
Read (window_bits+1) bits from scalar_data (uint64 limbs) using precomputed slice params and apply Co...
ConstantineSlicePath classify_slice_path_u32(const ConstantineSliceParamsU32 &sp) noexcept
void store_constantine_packed_digits_x4_bottom(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t hi_limb, uint32_t lo_bits, SimdU32x4 hi_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
void store_constantine_packed_digits_x4_boundary(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t lo_limb, uint32_t hi_limb, uint32_t lo_off, uint32_t lo_bits, SimdU32x4 lo_mask_v, SimdU32x4 hi_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
SimdU32x4 gather_x4_u32(const uint32_t *p0, const uint32_t *p1, const uint32_t *p2, const uint32_t *p3, uint32_t idx) noexcept
uint32_t __attribute__((vector_size(16))) SimdU32x4
ConstantineSliceParams compute_constantine_slice_params(size_t bit_offset, size_t window_bits, size_t num_uint64_limbs) noexcept
void simd_u32x4_store(uint32_t *dst, SimdU32x4 v) noexcept
void store_constantine_packed_digits_x4_localised(uint32_t *dst, const uint32_t *scalar_data_0, const uint32_t *scalar_data_1, const uint32_t *scalar_data_2, const uint32_t *scalar_data_3, uint32_t lo_limb, uint32_t lo_off, SimdU32x4 lo_mask_v, SimdU32x4 one_v, SimdU32x4 val_mask, uint32_t window_bits) noexcept
ConstantineSliceParamsU32 compute_constantine_slice_params_u32(size_t bit_offset, size_t window_bits, size_t num_u32_limbs) noexcept
Per-window precomputed slice parameters for the carry-less signed-Booth window recoding....
bool slice_localised_to_one_u32