// Copyright 2025 the Fearless_SIMD Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

// This file is autogenerated by fearless_simd_gen

use crate::{Bytes, Select, Simd, SimdCvtFloat, SimdCvtTruncate, SimdFrom, SimdInto};
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct f32x4<S: Simd> {
    pub val: [f32; 4],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[f32; 4], S> for f32x4<S> {
    #[inline(always)]
    fn simd_from(val: [f32; 4], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize], val[2usize], val[3usize]],
            simd,
        }
    }
}
impl<S: Simd> From<f32x4<S>> for [f32; 4] {
    #[inline(always)]
    fn from(value: f32x4<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for f32x4<S> {
    type Target = [f32; 4];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for f32x4<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<f32, S> for f32x4<S> {
    #[inline(always)]
    fn simd_from(value: f32, simd: S) -> Self {
        simd.splat_f32x4(value)
    }
}
impl<S: Simd> Select<f32x4<S>> for mask32x4<S> {
    #[inline(always)]
    fn select(self, if_true: f32x4<S>, if_false: f32x4<S>) -> f32x4<S> {
        self.simd.select_f32x4(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for f32x4<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> f32x4<S> {
    #[inline(always)]
    pub fn abs(self) -> f32x4<S> {
        self.simd.abs_f32x4(self)
    }
    #[inline(always)]
    pub fn neg(self) -> f32x4<S> {
        self.simd.neg_f32x4(self)
    }
    #[inline(always)]
    pub fn sqrt(self) -> f32x4<S> {
        self.simd.sqrt_f32x4(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.add_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.sub_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.mul_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn div(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.div_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn copysign(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.copysign_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_lt_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_le_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_ge_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_gt_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.max_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.max_precise_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.min_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.min_precise_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn floor(self) -> f32x4<S> {
        self.simd.floor_f32x4(self)
    }
    #[inline(always)]
    pub fn fract(self) -> f32x4<S> {
        self.simd.fract_f32x4(self)
    }
    #[inline(always)]
    pub fn trunc(self) -> f32x4<S> {
        self.simd.trunc_f32x4(self)
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.combine_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_f64(self) -> f64x2<S> {
        self.simd.reinterpret_f64_f32x4(self)
    }
    #[inline(always)]
    pub fn reinterpret_i32(self) -> i32x4<S> {
        self.simd.reinterpret_i32_f32x4(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x16<S> {
        self.simd.reinterpret_u8_f32x4(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x4<S> {
        self.simd.reinterpret_u32_f32x4(self)
    }
    #[inline(always)]
    pub fn cvt_u32(self) -> u32x4<S> {
        self.simd.cvt_u32_f32x4(self)
    }
    #[inline(always)]
    pub fn cvt_i32(self) -> i32x4<S> {
        self.simd.cvt_i32_f32x4(self)
    }
}
impl<S: Simd> crate::SimdBase<f32, S> for f32x4<S> {
    const N: usize = 4;
    type Mask = mask32x4<S>;
    type Block = f32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[f32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [f32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[f32]) -> Self {
        let mut val = [0.0; 4];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: f32) -> Self {
        simd.splat_f32x4(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdFloat<f32, S> for f32x4<S> {
    #[inline(always)]
    fn abs(self) -> f32x4<S> {
        self.simd.abs_f32x4(self)
    }
    #[inline(always)]
    fn sqrt(self) -> f32x4<S> {
        self.simd.sqrt_f32x4(self)
    }
    #[inline(always)]
    fn copysign(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.copysign_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_lt_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_le_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_ge_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_gt_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.zip_low_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.zip_high_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.unzip_low_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.unzip_high_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.max_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.max_precise_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.min_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd.min_precise_f32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn madd(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd
            .madd_f32x4(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn msub(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f32x4<S> {
        self.simd
            .msub_f32x4(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn floor(self) -> f32x4<S> {
        self.simd.floor_f32x4(self)
    }
    #[inline(always)]
    fn fract(self) -> f32x4<S> {
        self.simd.fract_f32x4(self)
    }
    #[inline(always)]
    fn trunc(self) -> f32x4<S> {
        self.simd.trunc_f32x4(self)
    }
}
impl<S: Simd> SimdCvtFloat<u32x4<S>> for f32x4<S> {
    fn float_from(x: u32x4<S>) -> Self {
        x.simd.cvt_f32_u32x4(x)
    }
}
impl<S: Simd> SimdCvtFloat<i32x4<S>> for f32x4<S> {
    fn float_from(x: i32x4<S>) -> Self {
        x.simd.cvt_f32_i32x4(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct i8x16<S: Simd> {
    pub val: [i8; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i8; 16], S> for i8x16<S> {
    #[inline(always)]
    fn simd_from(val: [i8; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i8x16<S>> for [i8; 16] {
    #[inline(always)]
    fn from(value: i8x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i8x16<S> {
    type Target = [i8; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i8x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i8, S> for i8x16<S> {
    #[inline(always)]
    fn simd_from(value: i8, simd: S) -> Self {
        simd.splat_i8x16(value)
    }
}
impl<S: Simd> Select<i8x16<S>> for mask8x16<S> {
    #[inline(always)]
    fn select(self, if_true: i8x16<S>, if_false: i8x16<S>) -> i8x16<S> {
        self.simd.select_i8x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i8x16<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i8x16<S> {
    #[inline(always)]
    pub fn not(self) -> i8x16<S> {
        self.simd.not_i8x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.add_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.sub_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.mul_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.and_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.or_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.xor_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i8x16<S> {
        self.simd.shr_i8x16(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.shrv_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i8x16<S> {
        self.simd.shl_i8x16(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_eq_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_lt_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_le_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_ge_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_gt_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.min_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.max_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.combine_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i8x16<S> {
        self.simd.neg_i8x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x16<S> {
        self.simd.reinterpret_u8_i8x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x4<S> {
        self.simd.reinterpret_u32_i8x16(self)
    }
}
impl<S: Simd> crate::SimdBase<i8, S> for i8x16<S> {
    const N: usize = 16;
    type Mask = mask8x16<S>;
    type Block = i8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i8]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i8) -> Self {
        simd.splat_i8x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdInt<i8, S> for i8x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_eq_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_lt_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_le_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_ge_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_gt_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.zip_low_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.zip_high_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.unzip_low_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.unzip_high_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.min_i8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i8x16<S> {
        self.simd.max_i8x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct u8x16<S: Simd> {
    pub val: [u8; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u8; 16], S> for u8x16<S> {
    #[inline(always)]
    fn simd_from(val: [u8; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u8x16<S>> for [u8; 16] {
    #[inline(always)]
    fn from(value: u8x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u8x16<S> {
    type Target = [u8; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u8x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u8, S> for u8x16<S> {
    #[inline(always)]
    fn simd_from(value: u8, simd: S) -> Self {
        simd.splat_u8x16(value)
    }
}
impl<S: Simd> Select<u8x16<S>> for mask8x16<S> {
    #[inline(always)]
    fn select(self, if_true: u8x16<S>, if_false: u8x16<S>) -> u8x16<S> {
        self.simd.select_u8x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u8x16<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u8x16<S> {
    #[inline(always)]
    pub fn not(self) -> u8x16<S> {
        self.simd.not_u8x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.add_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.sub_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.mul_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.and_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.or_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.xor_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u8x16<S> {
        self.simd.shr_u8x16(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.shrv_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u8x16<S> {
        self.simd.shl_u8x16(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_eq_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_lt_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_le_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_ge_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_gt_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.min_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.max_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.combine_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x4<S> {
        self.simd.reinterpret_u32_u8x16(self)
    }
}
impl<S: Simd> crate::SimdBase<u8, S> for u8x16<S> {
    const N: usize = 16;
    type Mask = mask8x16<S>;
    type Block = u8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u8]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u8) -> Self {
        simd.splat_u8x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdInt<u8, S> for u8x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_eq_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_lt_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_le_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_ge_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_gt_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.zip_low_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.zip_high_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.unzip_low_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.unzip_high_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.min_u8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u8x16<S> {
        self.simd.max_u8x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct mask8x16<S: Simd> {
    pub val: [i8; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i8; 16], S> for mask8x16<S> {
    #[inline(always)]
    fn simd_from(val: [i8; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask8x16<S>> for [i8; 16] {
    #[inline(always)]
    fn from(value: mask8x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask8x16<S> {
    type Target = [i8; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask8x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i8, S> for mask8x16<S> {
    #[inline(always)]
    fn simd_from(value: i8, simd: S) -> Self {
        simd.splat_mask8x16(value)
    }
}
impl<S: Simd> Select<mask8x16<S>> for mask8x16<S> {
    #[inline(always)]
    fn select(self, if_true: mask8x16<S>, if_false: mask8x16<S>) -> mask8x16<S> {
        self.simd.select_mask8x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask8x16<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask8x16<S> {
    #[inline(always)]
    pub fn not(self) -> mask8x16<S> {
        self.simd.not_mask8x16(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.and_mask8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.or_mask8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.xor_mask8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_eq_mask8x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.combine_mask8x16(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i8, S> for mask8x16<S> {
    const N: usize = 16;
    type Mask = mask8x16<S>;
    type Block = mask8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i8]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i8) -> Self {
        simd.splat_mask8x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdMask<i8, S> for mask8x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x16<S> {
        self.simd.simd_eq_mask8x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct i16x8<S: Simd> {
    pub val: [i16; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i16; 8], S> for i16x8<S> {
    #[inline(always)]
    fn simd_from(val: [i16; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i16x8<S>> for [i16; 8] {
    #[inline(always)]
    fn from(value: i16x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i16x8<S> {
    type Target = [i16; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i16x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i16, S> for i16x8<S> {
    #[inline(always)]
    fn simd_from(value: i16, simd: S) -> Self {
        simd.splat_i16x8(value)
    }
}
impl<S: Simd> Select<i16x8<S>> for mask16x8<S> {
    #[inline(always)]
    fn select(self, if_true: i16x8<S>, if_false: i16x8<S>) -> i16x8<S> {
        self.simd.select_i16x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i16x8<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i16x8<S> {
    #[inline(always)]
    pub fn not(self) -> i16x8<S> {
        self.simd.not_i16x8(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.add_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.sub_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.mul_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.and_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.or_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.xor_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i16x8<S> {
        self.simd.shr_i16x8(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.shrv_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i16x8<S> {
        self.simd.shl_i16x8(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_eq_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_lt_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_le_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_ge_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_gt_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.min_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.max_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.combine_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i16x8<S> {
        self.simd.neg_i16x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x16<S> {
        self.simd.reinterpret_u8_i16x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x4<S> {
        self.simd.reinterpret_u32_i16x8(self)
    }
}
impl<S: Simd> crate::SimdBase<i16, S> for i16x8<S> {
    const N: usize = 8;
    type Mask = mask16x8<S>;
    type Block = i16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i16]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i16) -> Self {
        simd.splat_i16x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdInt<i16, S> for i16x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_eq_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_lt_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_le_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_ge_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_gt_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.zip_low_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.zip_high_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.unzip_low_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.unzip_high_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.min_i16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i16x8<S> {
        self.simd.max_i16x8(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct u16x8<S: Simd> {
    pub val: [u16; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u16; 8], S> for u16x8<S> {
    #[inline(always)]
    fn simd_from(val: [u16; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u16x8<S>> for [u16; 8] {
    #[inline(always)]
    fn from(value: u16x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u16x8<S> {
    type Target = [u16; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u16x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u16, S> for u16x8<S> {
    #[inline(always)]
    fn simd_from(value: u16, simd: S) -> Self {
        simd.splat_u16x8(value)
    }
}
impl<S: Simd> Select<u16x8<S>> for mask16x8<S> {
    #[inline(always)]
    fn select(self, if_true: u16x8<S>, if_false: u16x8<S>) -> u16x8<S> {
        self.simd.select_u16x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u16x8<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u16x8<S> {
    #[inline(always)]
    pub fn not(self) -> u16x8<S> {
        self.simd.not_u16x8(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.add_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.sub_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.mul_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.and_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.or_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.xor_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u16x8<S> {
        self.simd.shr_u16x8(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.shrv_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u16x8<S> {
        self.simd.shl_u16x8(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_eq_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_lt_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_le_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_ge_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_gt_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.min_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.max_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.combine_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x16<S> {
        self.simd.reinterpret_u8_u16x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x4<S> {
        self.simd.reinterpret_u32_u16x8(self)
    }
}
impl<S: Simd> crate::SimdBase<u16, S> for u16x8<S> {
    const N: usize = 8;
    type Mask = mask16x8<S>;
    type Block = u16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u16]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u16) -> Self {
        simd.splat_u16x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdInt<u16, S> for u16x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_eq_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_lt_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_le_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_ge_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_gt_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.zip_low_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.zip_high_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.unzip_low_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.unzip_high_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.min_u16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u16x8<S> {
        self.simd.max_u16x8(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct mask16x8<S: Simd> {
    pub val: [i16; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i16; 8], S> for mask16x8<S> {
    #[inline(always)]
    fn simd_from(val: [i16; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask16x8<S>> for [i16; 8] {
    #[inline(always)]
    fn from(value: mask16x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask16x8<S> {
    type Target = [i16; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask16x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i16, S> for mask16x8<S> {
    #[inline(always)]
    fn simd_from(value: i16, simd: S) -> Self {
        simd.splat_mask16x8(value)
    }
}
impl<S: Simd> Select<mask16x8<S>> for mask16x8<S> {
    #[inline(always)]
    fn select(self, if_true: mask16x8<S>, if_false: mask16x8<S>) -> mask16x8<S> {
        self.simd.select_mask16x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask16x8<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask16x8<S> {
    #[inline(always)]
    pub fn not(self) -> mask16x8<S> {
        self.simd.not_mask16x8(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.and_mask16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.or_mask16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.xor_mask16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_eq_mask16x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.combine_mask16x8(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i16, S> for mask16x8<S> {
    const N: usize = 8;
    type Mask = mask16x8<S>;
    type Block = mask16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i16]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i16) -> Self {
        simd.splat_mask16x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdMask<i16, S> for mask16x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x8<S> {
        self.simd.simd_eq_mask16x8(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct i32x4<S: Simd> {
    pub val: [i32; 4],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i32; 4], S> for i32x4<S> {
    #[inline(always)]
    fn simd_from(val: [i32; 4], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize], val[2usize], val[3usize]],
            simd,
        }
    }
}
impl<S: Simd> From<i32x4<S>> for [i32; 4] {
    #[inline(always)]
    fn from(value: i32x4<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i32x4<S> {
    type Target = [i32; 4];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i32x4<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i32, S> for i32x4<S> {
    #[inline(always)]
    fn simd_from(value: i32, simd: S) -> Self {
        simd.splat_i32x4(value)
    }
}
impl<S: Simd> Select<i32x4<S>> for mask32x4<S> {
    #[inline(always)]
    fn select(self, if_true: i32x4<S>, if_false: i32x4<S>) -> i32x4<S> {
        self.simd.select_i32x4(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i32x4<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i32x4<S> {
    #[inline(always)]
    pub fn not(self) -> i32x4<S> {
        self.simd.not_i32x4(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.add_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.sub_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.mul_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.and_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.or_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.xor_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i32x4<S> {
        self.simd.shr_i32x4(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.shrv_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i32x4<S> {
        self.simd.shl_i32x4(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_lt_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_le_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_ge_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_gt_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.min_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.max_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.combine_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i32x4<S> {
        self.simd.neg_i32x4(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x16<S> {
        self.simd.reinterpret_u8_i32x4(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x4<S> {
        self.simd.reinterpret_u32_i32x4(self)
    }
    #[inline(always)]
    pub fn cvt_f32(self) -> f32x4<S> {
        self.simd.cvt_f32_i32x4(self)
    }
}
impl<S: Simd> crate::SimdBase<i32, S> for i32x4<S> {
    const N: usize = 4;
    type Mask = mask32x4<S>;
    type Block = i32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i32]) -> Self {
        let mut val = [0; 4];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i32) -> Self {
        simd.splat_i32x4(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdInt<i32, S> for i32x4<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_lt_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_le_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_ge_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_gt_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.zip_low_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.zip_high_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.unzip_low_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.unzip_high_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.min_i32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i32x4<S> {
        self.simd.max_i32x4(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> SimdCvtTruncate<f32x4<S>> for i32x4<S> {
    fn truncate_from(x: f32x4<S>) -> Self {
        x.simd.cvt_i32_f32x4(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct u32x4<S: Simd> {
    pub val: [u32; 4],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u32; 4], S> for u32x4<S> {
    #[inline(always)]
    fn simd_from(val: [u32; 4], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize], val[2usize], val[3usize]],
            simd,
        }
    }
}
impl<S: Simd> From<u32x4<S>> for [u32; 4] {
    #[inline(always)]
    fn from(value: u32x4<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u32x4<S> {
    type Target = [u32; 4];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u32x4<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u32, S> for u32x4<S> {
    #[inline(always)]
    fn simd_from(value: u32, simd: S) -> Self {
        simd.splat_u32x4(value)
    }
}
impl<S: Simd> Select<u32x4<S>> for mask32x4<S> {
    #[inline(always)]
    fn select(self, if_true: u32x4<S>, if_false: u32x4<S>) -> u32x4<S> {
        self.simd.select_u32x4(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u32x4<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u32x4<S> {
    #[inline(always)]
    pub fn not(self) -> u32x4<S> {
        self.simd.not_u32x4(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.add_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.sub_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.mul_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.and_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.or_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.xor_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u32x4<S> {
        self.simd.shr_u32x4(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.shrv_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u32x4<S> {
        self.simd.shl_u32x4(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_lt_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_le_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_ge_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_gt_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.min_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.max_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.combine_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x16<S> {
        self.simd.reinterpret_u8_u32x4(self)
    }
    #[inline(always)]
    pub fn cvt_f32(self) -> f32x4<S> {
        self.simd.cvt_f32_u32x4(self)
    }
}
impl<S: Simd> crate::SimdBase<u32, S> for u32x4<S> {
    const N: usize = 4;
    type Mask = mask32x4<S>;
    type Block = u32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u32]) -> Self {
        let mut val = [0; 4];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u32) -> Self {
        simd.splat_u32x4(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdInt<u32, S> for u32x4<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_lt_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_le_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_ge_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_gt_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.zip_low_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.zip_high_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.unzip_low_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.unzip_high_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.min_u32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u32x4<S> {
        self.simd.max_u32x4(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> SimdCvtTruncate<f32x4<S>> for u32x4<S> {
    fn truncate_from(x: f32x4<S>) -> Self {
        x.simd.cvt_u32_f32x4(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct mask32x4<S: Simd> {
    pub val: [i32; 4],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i32; 4], S> for mask32x4<S> {
    #[inline(always)]
    fn simd_from(val: [i32; 4], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize], val[2usize], val[3usize]],
            simd,
        }
    }
}
impl<S: Simd> From<mask32x4<S>> for [i32; 4] {
    #[inline(always)]
    fn from(value: mask32x4<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask32x4<S> {
    type Target = [i32; 4];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask32x4<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i32, S> for mask32x4<S> {
    #[inline(always)]
    fn simd_from(value: i32, simd: S) -> Self {
        simd.splat_mask32x4(value)
    }
}
impl<S: Simd> Select<mask32x4<S>> for mask32x4<S> {
    #[inline(always)]
    fn select(self, if_true: mask32x4<S>, if_false: mask32x4<S>) -> mask32x4<S> {
        self.simd.select_mask32x4(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask32x4<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask32x4<S> {
    #[inline(always)]
    pub fn not(self) -> mask32x4<S> {
        self.simd.not_mask32x4(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.and_mask32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.or_mask32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.xor_mask32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_mask32x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.combine_mask32x4(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i32, S> for mask32x4<S> {
    const N: usize = 4;
    type Mask = mask32x4<S>;
    type Block = mask32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i32]) -> Self {
        let mut val = [0; 4];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i32) -> Self {
        simd.splat_mask32x4(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdMask<i32, S> for mask32x4<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x4<S> {
        self.simd.simd_eq_mask32x4(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct f64x2<S: Simd> {
    pub val: [f64; 2],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[f64; 2], S> for f64x2<S> {
    #[inline(always)]
    fn simd_from(val: [f64; 2], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize]],
            simd,
        }
    }
}
impl<S: Simd> From<f64x2<S>> for [f64; 2] {
    #[inline(always)]
    fn from(value: f64x2<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for f64x2<S> {
    type Target = [f64; 2];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for f64x2<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<f64, S> for f64x2<S> {
    #[inline(always)]
    fn simd_from(value: f64, simd: S) -> Self {
        simd.splat_f64x2(value)
    }
}
impl<S: Simd> Select<f64x2<S>> for mask64x2<S> {
    #[inline(always)]
    fn select(self, if_true: f64x2<S>, if_false: f64x2<S>) -> f64x2<S> {
        self.simd.select_f64x2(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for f64x2<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> f64x2<S> {
    #[inline(always)]
    pub fn abs(self) -> f64x2<S> {
        self.simd.abs_f64x2(self)
    }
    #[inline(always)]
    pub fn neg(self) -> f64x2<S> {
        self.simd.neg_f64x2(self)
    }
    #[inline(always)]
    pub fn sqrt(self) -> f64x2<S> {
        self.simd.sqrt_f64x2(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.add_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.sub_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.mul_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn div(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.div_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn copysign(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.copysign_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_eq_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_lt_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_le_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_ge_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_gt_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.max_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.max_precise_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.min_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.min_precise_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn floor(self) -> f64x2<S> {
        self.simd.floor_f64x2(self)
    }
    #[inline(always)]
    pub fn fract(self) -> f64x2<S> {
        self.simd.fract_f64x2(self)
    }
    #[inline(always)]
    pub fn trunc(self) -> f64x2<S> {
        self.simd.trunc_f64x2(self)
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.combine_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_f32(self) -> f32x4<S> {
        self.simd.reinterpret_f32_f64x2(self)
    }
}
impl<S: Simd> crate::SimdBase<f64, S> for f64x2<S> {
    const N: usize = 2;
    type Mask = mask64x2<S>;
    type Block = f64x2<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[f64] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [f64] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[f64]) -> Self {
        let mut val = [0.0; 2];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: f64) -> Self {
        simd.splat_f64x2(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdFloat<f64, S> for f64x2<S> {
    #[inline(always)]
    fn abs(self) -> f64x2<S> {
        self.simd.abs_f64x2(self)
    }
    #[inline(always)]
    fn sqrt(self) -> f64x2<S> {
        self.simd.sqrt_f64x2(self)
    }
    #[inline(always)]
    fn copysign(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.copysign_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_eq_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_lt_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_le_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_ge_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_gt_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.zip_low_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.zip_high_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.unzip_low_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.unzip_high_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.max_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.max_precise_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.min_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd.min_precise_f64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn madd(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd
            .madd_f64x2(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn msub(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f64x2<S> {
        self.simd
            .msub_f64x2(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn floor(self) -> f64x2<S> {
        self.simd.floor_f64x2(self)
    }
    #[inline(always)]
    fn fract(self) -> f64x2<S> {
        self.simd.fract_f64x2(self)
    }
    #[inline(always)]
    fn trunc(self) -> f64x2<S> {
        self.simd.trunc_f64x2(self)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(16))]
pub struct mask64x2<S: Simd> {
    pub val: [i64; 2],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i64; 2], S> for mask64x2<S> {
    #[inline(always)]
    fn simd_from(val: [i64; 2], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize]],
            simd,
        }
    }
}
impl<S: Simd> From<mask64x2<S>> for [i64; 2] {
    #[inline(always)]
    fn from(value: mask64x2<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask64x2<S> {
    type Target = [i64; 2];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask64x2<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i64, S> for mask64x2<S> {
    #[inline(always)]
    fn simd_from(value: i64, simd: S) -> Self {
        simd.splat_mask64x2(value)
    }
}
impl<S: Simd> Select<mask64x2<S>> for mask64x2<S> {
    #[inline(always)]
    fn select(self, if_true: mask64x2<S>, if_false: mask64x2<S>) -> mask64x2<S> {
        self.simd.select_mask64x2(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask64x2<S> {
    type Bytes = u8x16<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x16 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask64x2<S> {
    #[inline(always)]
    pub fn not(self) -> mask64x2<S> {
        self.simd.not_mask64x2(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.and_mask64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.or_mask64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.xor_mask64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_eq_mask64x2(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.combine_mask64x2(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i64, S> for mask64x2<S> {
    const N: usize = 2;
    type Mask = mask64x2<S>;
    type Block = mask64x2<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i64] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i64] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i64]) -> Self {
        let mut val = [0; 2];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i64) -> Self {
        simd.splat_mask64x2(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block
    }
}
impl<S: Simd> crate::SimdMask<i64, S> for mask64x2<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x2<S> {
        self.simd.simd_eq_mask64x2(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct f32x8<S: Simd> {
    pub val: [f32; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[f32; 8], S> for f32x8<S> {
    #[inline(always)]
    fn simd_from(val: [f32; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<f32x8<S>> for [f32; 8] {
    #[inline(always)]
    fn from(value: f32x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for f32x8<S> {
    type Target = [f32; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for f32x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<f32, S> for f32x8<S> {
    #[inline(always)]
    fn simd_from(value: f32, simd: S) -> Self {
        simd.splat_f32x8(value)
    }
}
impl<S: Simd> Select<f32x8<S>> for mask32x8<S> {
    #[inline(always)]
    fn select(self, if_true: f32x8<S>, if_false: f32x8<S>) -> f32x8<S> {
        self.simd.select_f32x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for f32x8<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> f32x8<S> {
    #[inline(always)]
    pub fn abs(self) -> f32x8<S> {
        self.simd.abs_f32x8(self)
    }
    #[inline(always)]
    pub fn neg(self) -> f32x8<S> {
        self.simd.neg_f32x8(self)
    }
    #[inline(always)]
    pub fn sqrt(self) -> f32x8<S> {
        self.simd.sqrt_f32x8(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.add_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.sub_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.mul_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn div(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.div_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn copysign(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.copysign_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_lt_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_le_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_ge_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_gt_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.max_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.max_precise_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.min_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.min_precise_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn floor(self) -> f32x8<S> {
        self.simd.floor_f32x8(self)
    }
    #[inline(always)]
    pub fn fract(self) -> f32x8<S> {
        self.simd.fract_f32x8(self)
    }
    #[inline(always)]
    pub fn trunc(self) -> f32x8<S> {
        self.simd.trunc_f32x8(self)
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.combine_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_f64(self) -> f64x4<S> {
        self.simd.reinterpret_f64_f32x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_i32(self) -> i32x8<S> {
        self.simd.reinterpret_i32_f32x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x32<S> {
        self.simd.reinterpret_u8_f32x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x8<S> {
        self.simd.reinterpret_u32_f32x8(self)
    }
    #[inline(always)]
    pub fn cvt_u32(self) -> u32x8<S> {
        self.simd.cvt_u32_f32x8(self)
    }
    #[inline(always)]
    pub fn cvt_i32(self) -> i32x8<S> {
        self.simd.cvt_i32_f32x8(self)
    }
}
impl<S: Simd> crate::SimdBase<f32, S> for f32x8<S> {
    const N: usize = 8;
    type Mask = mask32x8<S>;
    type Block = f32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[f32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [f32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[f32]) -> Self {
        let mut val = [0.0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: f32) -> Self {
        simd.splat_f32x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdFloat<f32, S> for f32x8<S> {
    #[inline(always)]
    fn abs(self) -> f32x8<S> {
        self.simd.abs_f32x8(self)
    }
    #[inline(always)]
    fn sqrt(self) -> f32x8<S> {
        self.simd.sqrt_f32x8(self)
    }
    #[inline(always)]
    fn copysign(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.copysign_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_lt_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_le_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_ge_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_gt_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.zip_low_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.zip_high_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.unzip_low_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.unzip_high_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.max_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.max_precise_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.min_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd.min_precise_f32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn madd(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd
            .madd_f32x8(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn msub(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f32x8<S> {
        self.simd
            .msub_f32x8(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn floor(self) -> f32x8<S> {
        self.simd.floor_f32x8(self)
    }
    #[inline(always)]
    fn fract(self) -> f32x8<S> {
        self.simd.fract_f32x8(self)
    }
    #[inline(always)]
    fn trunc(self) -> f32x8<S> {
        self.simd.trunc_f32x8(self)
    }
}
impl<S: Simd> SimdCvtFloat<u32x8<S>> for f32x8<S> {
    fn float_from(x: u32x8<S>) -> Self {
        x.simd.cvt_f32_u32x8(x)
    }
}
impl<S: Simd> SimdCvtFloat<i32x8<S>> for f32x8<S> {
    fn float_from(x: i32x8<S>) -> Self {
        x.simd.cvt_f32_i32x8(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct i8x32<S: Simd> {
    pub val: [i8; 32],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i8; 32], S> for i8x32<S> {
    #[inline(always)]
    fn simd_from(val: [i8; 32], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i8x32<S>> for [i8; 32] {
    #[inline(always)]
    fn from(value: i8x32<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i8x32<S> {
    type Target = [i8; 32];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i8x32<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i8, S> for i8x32<S> {
    #[inline(always)]
    fn simd_from(value: i8, simd: S) -> Self {
        simd.splat_i8x32(value)
    }
}
impl<S: Simd> Select<i8x32<S>> for mask8x32<S> {
    #[inline(always)]
    fn select(self, if_true: i8x32<S>, if_false: i8x32<S>) -> i8x32<S> {
        self.simd.select_i8x32(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i8x32<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i8x32<S> {
    #[inline(always)]
    pub fn not(self) -> i8x32<S> {
        self.simd.not_i8x32(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.add_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.sub_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.mul_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.and_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.or_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.xor_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i8x32<S> {
        self.simd.shr_i8x32(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.shrv_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i8x32<S> {
        self.simd.shl_i8x32(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_eq_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_lt_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_le_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_ge_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_gt_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.min_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.max_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.combine_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i8x32<S> {
        self.simd.neg_i8x32(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x32<S> {
        self.simd.reinterpret_u8_i8x32(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x8<S> {
        self.simd.reinterpret_u32_i8x32(self)
    }
}
impl<S: Simd> crate::SimdBase<i8, S> for i8x32<S> {
    const N: usize = 32;
    type Mask = mask8x32<S>;
    type Block = i8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i8]) -> Self {
        let mut val = [0; 32];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i8) -> Self {
        simd.splat_i8x32(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdInt<i8, S> for i8x32<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_eq_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_lt_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_le_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_ge_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_gt_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.zip_low_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.zip_high_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.unzip_low_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.unzip_high_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.min_i8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i8x32<S> {
        self.simd.max_i8x32(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct u8x32<S: Simd> {
    pub val: [u8; 32],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u8; 32], S> for u8x32<S> {
    #[inline(always)]
    fn simd_from(val: [u8; 32], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u8x32<S>> for [u8; 32] {
    #[inline(always)]
    fn from(value: u8x32<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u8x32<S> {
    type Target = [u8; 32];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u8x32<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u8, S> for u8x32<S> {
    #[inline(always)]
    fn simd_from(value: u8, simd: S) -> Self {
        simd.splat_u8x32(value)
    }
}
impl<S: Simd> Select<u8x32<S>> for mask8x32<S> {
    #[inline(always)]
    fn select(self, if_true: u8x32<S>, if_false: u8x32<S>) -> u8x32<S> {
        self.simd.select_u8x32(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u8x32<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u8x32<S> {
    #[inline(always)]
    pub fn not(self) -> u8x32<S> {
        self.simd.not_u8x32(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.add_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.sub_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.mul_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.and_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.or_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.xor_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u8x32<S> {
        self.simd.shr_u8x32(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.shrv_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u8x32<S> {
        self.simd.shl_u8x32(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_eq_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_lt_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_le_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_ge_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_gt_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.min_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.max_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.combine_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x8<S> {
        self.simd.reinterpret_u32_u8x32(self)
    }
}
impl<S: Simd> crate::SimdBase<u8, S> for u8x32<S> {
    const N: usize = 32;
    type Mask = mask8x32<S>;
    type Block = u8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u8]) -> Self {
        let mut val = [0; 32];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u8) -> Self {
        simd.splat_u8x32(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdInt<u8, S> for u8x32<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_eq_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_lt_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_le_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_ge_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_gt_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.zip_low_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.zip_high_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.unzip_low_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.unzip_high_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.min_u8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u8x32<S> {
        self.simd.max_u8x32(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct mask8x32<S: Simd> {
    pub val: [i8; 32],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i8; 32], S> for mask8x32<S> {
    #[inline(always)]
    fn simd_from(val: [i8; 32], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask8x32<S>> for [i8; 32] {
    #[inline(always)]
    fn from(value: mask8x32<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask8x32<S> {
    type Target = [i8; 32];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask8x32<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i8, S> for mask8x32<S> {
    #[inline(always)]
    fn simd_from(value: i8, simd: S) -> Self {
        simd.splat_mask8x32(value)
    }
}
impl<S: Simd> Select<mask8x32<S>> for mask8x32<S> {
    #[inline(always)]
    fn select(self, if_true: mask8x32<S>, if_false: mask8x32<S>) -> mask8x32<S> {
        self.simd.select_mask8x32(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask8x32<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask8x32<S> {
    #[inline(always)]
    pub fn not(self) -> mask8x32<S> {
        self.simd.not_mask8x32(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.and_mask8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.or_mask8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.xor_mask8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_eq_mask8x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.combine_mask8x32(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i8, S> for mask8x32<S> {
    const N: usize = 32;
    type Mask = mask8x32<S>;
    type Block = mask8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i8]) -> Self {
        let mut val = [0; 32];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i8) -> Self {
        simd.splat_mask8x32(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdMask<i8, S> for mask8x32<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x32<S> {
        self.simd.simd_eq_mask8x32(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct i16x16<S: Simd> {
    pub val: [i16; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i16; 16], S> for i16x16<S> {
    #[inline(always)]
    fn simd_from(val: [i16; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i16x16<S>> for [i16; 16] {
    #[inline(always)]
    fn from(value: i16x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i16x16<S> {
    type Target = [i16; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i16x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i16, S> for i16x16<S> {
    #[inline(always)]
    fn simd_from(value: i16, simd: S) -> Self {
        simd.splat_i16x16(value)
    }
}
impl<S: Simd> Select<i16x16<S>> for mask16x16<S> {
    #[inline(always)]
    fn select(self, if_true: i16x16<S>, if_false: i16x16<S>) -> i16x16<S> {
        self.simd.select_i16x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i16x16<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i16x16<S> {
    #[inline(always)]
    pub fn not(self) -> i16x16<S> {
        self.simd.not_i16x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.add_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.sub_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.mul_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.and_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.or_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.xor_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i16x16<S> {
        self.simd.shr_i16x16(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.shrv_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i16x16<S> {
        self.simd.shl_i16x16(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_eq_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_lt_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_le_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_ge_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_gt_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.min_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.max_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.combine_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i16x16<S> {
        self.simd.neg_i16x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x32<S> {
        self.simd.reinterpret_u8_i16x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x8<S> {
        self.simd.reinterpret_u32_i16x16(self)
    }
}
impl<S: Simd> crate::SimdBase<i16, S> for i16x16<S> {
    const N: usize = 16;
    type Mask = mask16x16<S>;
    type Block = i16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i16]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i16) -> Self {
        simd.splat_i16x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdInt<i16, S> for i16x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_eq_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_lt_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_le_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_ge_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_gt_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.zip_low_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.zip_high_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.unzip_low_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.unzip_high_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.min_i16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i16x16<S> {
        self.simd.max_i16x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct u16x16<S: Simd> {
    pub val: [u16; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u16; 16], S> for u16x16<S> {
    #[inline(always)]
    fn simd_from(val: [u16; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u16x16<S>> for [u16; 16] {
    #[inline(always)]
    fn from(value: u16x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u16x16<S> {
    type Target = [u16; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u16x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u16, S> for u16x16<S> {
    #[inline(always)]
    fn simd_from(value: u16, simd: S) -> Self {
        simd.splat_u16x16(value)
    }
}
impl<S: Simd> Select<u16x16<S>> for mask16x16<S> {
    #[inline(always)]
    fn select(self, if_true: u16x16<S>, if_false: u16x16<S>) -> u16x16<S> {
        self.simd.select_u16x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u16x16<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u16x16<S> {
    #[inline(always)]
    pub fn not(self) -> u16x16<S> {
        self.simd.not_u16x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.add_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.sub_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.mul_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.and_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.or_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.xor_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u16x16<S> {
        self.simd.shr_u16x16(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.shrv_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u16x16<S> {
        self.simd.shl_u16x16(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_eq_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_lt_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_le_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_ge_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_gt_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.min_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.max_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.combine_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x32<S> {
        self.simd.reinterpret_u8_u16x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x8<S> {
        self.simd.reinterpret_u32_u16x16(self)
    }
}
impl<S: Simd> crate::SimdBase<u16, S> for u16x16<S> {
    const N: usize = 16;
    type Mask = mask16x16<S>;
    type Block = u16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u16]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u16) -> Self {
        simd.splat_u16x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdInt<u16, S> for u16x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_eq_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_lt_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_le_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_ge_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_gt_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.zip_low_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.zip_high_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.unzip_low_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.unzip_high_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.min_u16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u16x16<S> {
        self.simd.max_u16x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct mask16x16<S: Simd> {
    pub val: [i16; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i16; 16], S> for mask16x16<S> {
    #[inline(always)]
    fn simd_from(val: [i16; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask16x16<S>> for [i16; 16] {
    #[inline(always)]
    fn from(value: mask16x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask16x16<S> {
    type Target = [i16; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask16x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i16, S> for mask16x16<S> {
    #[inline(always)]
    fn simd_from(value: i16, simd: S) -> Self {
        simd.splat_mask16x16(value)
    }
}
impl<S: Simd> Select<mask16x16<S>> for mask16x16<S> {
    #[inline(always)]
    fn select(self, if_true: mask16x16<S>, if_false: mask16x16<S>) -> mask16x16<S> {
        self.simd.select_mask16x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask16x16<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask16x16<S> {
    #[inline(always)]
    pub fn not(self) -> mask16x16<S> {
        self.simd.not_mask16x16(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.and_mask16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.or_mask16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.xor_mask16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_eq_mask16x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.combine_mask16x16(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i16, S> for mask16x16<S> {
    const N: usize = 16;
    type Mask = mask16x16<S>;
    type Block = mask16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i16]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i16) -> Self {
        simd.splat_mask16x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdMask<i16, S> for mask16x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x16<S> {
        self.simd.simd_eq_mask16x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct i32x8<S: Simd> {
    pub val: [i32; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i32; 8], S> for i32x8<S> {
    #[inline(always)]
    fn simd_from(val: [i32; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i32x8<S>> for [i32; 8] {
    #[inline(always)]
    fn from(value: i32x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i32x8<S> {
    type Target = [i32; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i32x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i32, S> for i32x8<S> {
    #[inline(always)]
    fn simd_from(value: i32, simd: S) -> Self {
        simd.splat_i32x8(value)
    }
}
impl<S: Simd> Select<i32x8<S>> for mask32x8<S> {
    #[inline(always)]
    fn select(self, if_true: i32x8<S>, if_false: i32x8<S>) -> i32x8<S> {
        self.simd.select_i32x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i32x8<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i32x8<S> {
    #[inline(always)]
    pub fn not(self) -> i32x8<S> {
        self.simd.not_i32x8(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.add_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.sub_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.mul_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.and_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.or_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.xor_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i32x8<S> {
        self.simd.shr_i32x8(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.shrv_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i32x8<S> {
        self.simd.shl_i32x8(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_lt_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_le_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_ge_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_gt_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.min_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.max_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.combine_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i32x8<S> {
        self.simd.neg_i32x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x32<S> {
        self.simd.reinterpret_u8_i32x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x8<S> {
        self.simd.reinterpret_u32_i32x8(self)
    }
    #[inline(always)]
    pub fn cvt_f32(self) -> f32x8<S> {
        self.simd.cvt_f32_i32x8(self)
    }
}
impl<S: Simd> crate::SimdBase<i32, S> for i32x8<S> {
    const N: usize = 8;
    type Mask = mask32x8<S>;
    type Block = i32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i32]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i32) -> Self {
        simd.splat_i32x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdInt<i32, S> for i32x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_lt_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_le_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_ge_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_gt_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.zip_low_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.zip_high_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.unzip_low_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.unzip_high_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.min_i32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i32x8<S> {
        self.simd.max_i32x8(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> SimdCvtTruncate<f32x8<S>> for i32x8<S> {
    fn truncate_from(x: f32x8<S>) -> Self {
        x.simd.cvt_i32_f32x8(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct u32x8<S: Simd> {
    pub val: [u32; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u32; 8], S> for u32x8<S> {
    #[inline(always)]
    fn simd_from(val: [u32; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u32x8<S>> for [u32; 8] {
    #[inline(always)]
    fn from(value: u32x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u32x8<S> {
    type Target = [u32; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u32x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u32, S> for u32x8<S> {
    #[inline(always)]
    fn simd_from(value: u32, simd: S) -> Self {
        simd.splat_u32x8(value)
    }
}
impl<S: Simd> Select<u32x8<S>> for mask32x8<S> {
    #[inline(always)]
    fn select(self, if_true: u32x8<S>, if_false: u32x8<S>) -> u32x8<S> {
        self.simd.select_u32x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u32x8<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u32x8<S> {
    #[inline(always)]
    pub fn not(self) -> u32x8<S> {
        self.simd.not_u32x8(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.add_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.sub_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.mul_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.and_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.or_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.xor_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u32x8<S> {
        self.simd.shr_u32x8(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.shrv_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u32x8<S> {
        self.simd.shl_u32x8(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_lt_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_le_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_ge_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_gt_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.min_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.max_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.combine_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x32<S> {
        self.simd.reinterpret_u8_u32x8(self)
    }
    #[inline(always)]
    pub fn cvt_f32(self) -> f32x8<S> {
        self.simd.cvt_f32_u32x8(self)
    }
}
impl<S: Simd> crate::SimdBase<u32, S> for u32x8<S> {
    const N: usize = 8;
    type Mask = mask32x8<S>;
    type Block = u32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u32]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u32) -> Self {
        simd.splat_u32x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdInt<u32, S> for u32x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_lt_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_le_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_ge_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_gt_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.zip_low_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.zip_high_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.unzip_low_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.unzip_high_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.min_u32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u32x8<S> {
        self.simd.max_u32x8(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> SimdCvtTruncate<f32x8<S>> for u32x8<S> {
    fn truncate_from(x: f32x8<S>) -> Self {
        x.simd.cvt_u32_f32x8(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct mask32x8<S: Simd> {
    pub val: [i32; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i32; 8], S> for mask32x8<S> {
    #[inline(always)]
    fn simd_from(val: [i32; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask32x8<S>> for [i32; 8] {
    #[inline(always)]
    fn from(value: mask32x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask32x8<S> {
    type Target = [i32; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask32x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i32, S> for mask32x8<S> {
    #[inline(always)]
    fn simd_from(value: i32, simd: S) -> Self {
        simd.splat_mask32x8(value)
    }
}
impl<S: Simd> Select<mask32x8<S>> for mask32x8<S> {
    #[inline(always)]
    fn select(self, if_true: mask32x8<S>, if_false: mask32x8<S>) -> mask32x8<S> {
        self.simd.select_mask32x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask32x8<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask32x8<S> {
    #[inline(always)]
    pub fn not(self) -> mask32x8<S> {
        self.simd.not_mask32x8(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.and_mask32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.or_mask32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.xor_mask32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_mask32x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.combine_mask32x8(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i32, S> for mask32x8<S> {
    const N: usize = 8;
    type Mask = mask32x8<S>;
    type Block = mask32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i32]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i32) -> Self {
        simd.splat_mask32x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdMask<i32, S> for mask32x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x8<S> {
        self.simd.simd_eq_mask32x8(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct f64x4<S: Simd> {
    pub val: [f64; 4],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[f64; 4], S> for f64x4<S> {
    #[inline(always)]
    fn simd_from(val: [f64; 4], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize], val[2usize], val[3usize]],
            simd,
        }
    }
}
impl<S: Simd> From<f64x4<S>> for [f64; 4] {
    #[inline(always)]
    fn from(value: f64x4<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for f64x4<S> {
    type Target = [f64; 4];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for f64x4<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<f64, S> for f64x4<S> {
    #[inline(always)]
    fn simd_from(value: f64, simd: S) -> Self {
        simd.splat_f64x4(value)
    }
}
impl<S: Simd> Select<f64x4<S>> for mask64x4<S> {
    #[inline(always)]
    fn select(self, if_true: f64x4<S>, if_false: f64x4<S>) -> f64x4<S> {
        self.simd.select_f64x4(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for f64x4<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> f64x4<S> {
    #[inline(always)]
    pub fn abs(self) -> f64x4<S> {
        self.simd.abs_f64x4(self)
    }
    #[inline(always)]
    pub fn neg(self) -> f64x4<S> {
        self.simd.neg_f64x4(self)
    }
    #[inline(always)]
    pub fn sqrt(self) -> f64x4<S> {
        self.simd.sqrt_f64x4(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.add_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.sub_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.mul_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn div(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.div_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn copysign(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.copysign_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_eq_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_lt_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_le_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_ge_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_gt_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.max_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.max_precise_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.min_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.min_precise_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn floor(self) -> f64x4<S> {
        self.simd.floor_f64x4(self)
    }
    #[inline(always)]
    pub fn fract(self) -> f64x4<S> {
        self.simd.fract_f64x4(self)
    }
    #[inline(always)]
    pub fn trunc(self) -> f64x4<S> {
        self.simd.trunc_f64x4(self)
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.combine_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_f32(self) -> f32x8<S> {
        self.simd.reinterpret_f32_f64x4(self)
    }
}
impl<S: Simd> crate::SimdBase<f64, S> for f64x4<S> {
    const N: usize = 4;
    type Mask = mask64x4<S>;
    type Block = f64x2<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[f64] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [f64] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[f64]) -> Self {
        let mut val = [0.0; 4];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: f64) -> Self {
        simd.splat_f64x4(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdFloat<f64, S> for f64x4<S> {
    #[inline(always)]
    fn abs(self) -> f64x4<S> {
        self.simd.abs_f64x4(self)
    }
    #[inline(always)]
    fn sqrt(self) -> f64x4<S> {
        self.simd.sqrt_f64x4(self)
    }
    #[inline(always)]
    fn copysign(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.copysign_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_eq_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_lt_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_le_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_ge_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_gt_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.zip_low_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.zip_high_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.unzip_low_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.unzip_high_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.max_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.max_precise_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.min_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd.min_precise_f64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn madd(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd
            .madd_f64x4(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn msub(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f64x4<S> {
        self.simd
            .msub_f64x4(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn floor(self) -> f64x4<S> {
        self.simd.floor_f64x4(self)
    }
    #[inline(always)]
    fn fract(self) -> f64x4<S> {
        self.simd.fract_f64x4(self)
    }
    #[inline(always)]
    fn trunc(self) -> f64x4<S> {
        self.simd.trunc_f64x4(self)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(32))]
pub struct mask64x4<S: Simd> {
    pub val: [i64; 4],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i64; 4], S> for mask64x4<S> {
    #[inline(always)]
    fn simd_from(val: [i64; 4], simd: S) -> Self {
        Self {
            val: [val[0usize], val[1usize], val[2usize], val[3usize]],
            simd,
        }
    }
}
impl<S: Simd> From<mask64x4<S>> for [i64; 4] {
    #[inline(always)]
    fn from(value: mask64x4<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask64x4<S> {
    type Target = [i64; 4];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask64x4<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i64, S> for mask64x4<S> {
    #[inline(always)]
    fn simd_from(value: i64, simd: S) -> Self {
        simd.splat_mask64x4(value)
    }
}
impl<S: Simd> Select<mask64x4<S>> for mask64x4<S> {
    #[inline(always)]
    fn select(self, if_true: mask64x4<S>, if_false: mask64x4<S>) -> mask64x4<S> {
        self.simd.select_mask64x4(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask64x4<S> {
    type Bytes = u8x32<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x32 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask64x4<S> {
    #[inline(always)]
    pub fn not(self) -> mask64x4<S> {
        self.simd.not_mask64x4(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.and_mask64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.or_mask64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.xor_mask64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_eq_mask64x4(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn combine(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.combine_mask64x4(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i64, S> for mask64x4<S> {
    const N: usize = 4;
    type Mask = mask64x4<S>;
    type Block = mask64x2<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i64] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i64] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i64]) -> Self {
        let mut val = [0; 4];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i64) -> Self {
        simd.splat_mask64x4(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        block.combine(block)
    }
}
impl<S: Simd> crate::SimdMask<i64, S> for mask64x4<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x4<S> {
        self.simd.simd_eq_mask64x4(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct f32x16<S: Simd> {
    pub val: [f32; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[f32; 16], S> for f32x16<S> {
    #[inline(always)]
    fn simd_from(val: [f32; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<f32x16<S>> for [f32; 16] {
    #[inline(always)]
    fn from(value: f32x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for f32x16<S> {
    type Target = [f32; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for f32x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<f32, S> for f32x16<S> {
    #[inline(always)]
    fn simd_from(value: f32, simd: S) -> Self {
        simd.splat_f32x16(value)
    }
}
impl<S: Simd> Select<f32x16<S>> for mask32x16<S> {
    #[inline(always)]
    fn select(self, if_true: f32x16<S>, if_false: f32x16<S>) -> f32x16<S> {
        self.simd.select_f32x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for f32x16<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> f32x16<S> {
    #[inline(always)]
    pub fn abs(self) -> f32x16<S> {
        self.simd.abs_f32x16(self)
    }
    #[inline(always)]
    pub fn neg(self) -> f32x16<S> {
        self.simd.neg_f32x16(self)
    }
    #[inline(always)]
    pub fn sqrt(self) -> f32x16<S> {
        self.simd.sqrt_f32x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.add_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.sub_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.mul_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn div(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.div_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn copysign(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.copysign_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_lt_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_le_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_ge_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_gt_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.max_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.max_precise_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.min_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.min_precise_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn floor(self) -> f32x16<S> {
        self.simd.floor_f32x16(self)
    }
    #[inline(always)]
    pub fn fract(self) -> f32x16<S> {
        self.simd.fract_f32x16(self)
    }
    #[inline(always)]
    pub fn trunc(self) -> f32x16<S> {
        self.simd.trunc_f32x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_f64(self) -> f64x8<S> {
        self.simd.reinterpret_f64_f32x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_i32(self) -> i32x16<S> {
        self.simd.reinterpret_i32_f32x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x64<S> {
        self.simd.reinterpret_u8_f32x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x16<S> {
        self.simd.reinterpret_u32_f32x16(self)
    }
    #[inline(always)]
    pub fn cvt_u32(self) -> u32x16<S> {
        self.simd.cvt_u32_f32x16(self)
    }
    #[inline(always)]
    pub fn cvt_i32(self) -> i32x16<S> {
        self.simd.cvt_i32_f32x16(self)
    }
}
impl<S: Simd> crate::SimdBase<f32, S> for f32x16<S> {
    const N: usize = 16;
    type Mask = mask32x16<S>;
    type Block = f32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[f32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [f32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[f32]) -> Self {
        let mut val = [0.0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: f32) -> Self {
        simd.splat_f32x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdFloat<f32, S> for f32x16<S> {
    #[inline(always)]
    fn abs(self) -> f32x16<S> {
        self.simd.abs_f32x16(self)
    }
    #[inline(always)]
    fn sqrt(self) -> f32x16<S> {
        self.simd.sqrt_f32x16(self)
    }
    #[inline(always)]
    fn copysign(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.copysign_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_lt_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_le_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_ge_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_gt_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.zip_low_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.zip_high_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.unzip_low_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.unzip_high_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.max_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.max_precise_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.min_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd.min_precise_f32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn madd(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd
            .madd_f32x16(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn msub(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f32x16<S> {
        self.simd
            .msub_f32x16(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn floor(self) -> f32x16<S> {
        self.simd.floor_f32x16(self)
    }
    #[inline(always)]
    fn fract(self) -> f32x16<S> {
        self.simd.fract_f32x16(self)
    }
    #[inline(always)]
    fn trunc(self) -> f32x16<S> {
        self.simd.trunc_f32x16(self)
    }
}
impl<S: Simd> SimdCvtFloat<u32x16<S>> for f32x16<S> {
    fn float_from(x: u32x16<S>) -> Self {
        x.simd.cvt_f32_u32x16(x)
    }
}
impl<S: Simd> SimdCvtFloat<i32x16<S>> for f32x16<S> {
    fn float_from(x: i32x16<S>) -> Self {
        x.simd.cvt_f32_i32x16(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct i8x64<S: Simd> {
    pub val: [i8; 64],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i8; 64], S> for i8x64<S> {
    #[inline(always)]
    fn simd_from(val: [i8; 64], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
                val[32usize],
                val[33usize],
                val[34usize],
                val[35usize],
                val[36usize],
                val[37usize],
                val[38usize],
                val[39usize],
                val[40usize],
                val[41usize],
                val[42usize],
                val[43usize],
                val[44usize],
                val[45usize],
                val[46usize],
                val[47usize],
                val[48usize],
                val[49usize],
                val[50usize],
                val[51usize],
                val[52usize],
                val[53usize],
                val[54usize],
                val[55usize],
                val[56usize],
                val[57usize],
                val[58usize],
                val[59usize],
                val[60usize],
                val[61usize],
                val[62usize],
                val[63usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i8x64<S>> for [i8; 64] {
    #[inline(always)]
    fn from(value: i8x64<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i8x64<S> {
    type Target = [i8; 64];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i8x64<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i8, S> for i8x64<S> {
    #[inline(always)]
    fn simd_from(value: i8, simd: S) -> Self {
        simd.splat_i8x64(value)
    }
}
impl<S: Simd> Select<i8x64<S>> for mask8x64<S> {
    #[inline(always)]
    fn select(self, if_true: i8x64<S>, if_false: i8x64<S>) -> i8x64<S> {
        self.simd.select_i8x64(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i8x64<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i8x64<S> {
    #[inline(always)]
    pub fn not(self) -> i8x64<S> {
        self.simd.not_i8x64(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.add_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.sub_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.mul_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.and_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.or_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.xor_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i8x64<S> {
        self.simd.shr_i8x64(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.shrv_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i8x64<S> {
        self.simd.shl_i8x64(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_eq_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_lt_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_le_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_ge_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_gt_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.min_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.max_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i8x64<S> {
        self.simd.neg_i8x64(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x64<S> {
        self.simd.reinterpret_u8_i8x64(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x16<S> {
        self.simd.reinterpret_u32_i8x64(self)
    }
}
impl<S: Simd> crate::SimdBase<i8, S> for i8x64<S> {
    const N: usize = 64;
    type Mask = mask8x64<S>;
    type Block = i8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i8]) -> Self {
        let mut val = [0; 64];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i8) -> Self {
        simd.splat_i8x64(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdInt<i8, S> for i8x64<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_eq_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_lt_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_le_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_ge_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_gt_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.zip_low_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.zip_high_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.unzip_low_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.unzip_high_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.min_i8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i8x64<S> {
        self.simd.max_i8x64(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct u8x64<S: Simd> {
    pub val: [u8; 64],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u8; 64], S> for u8x64<S> {
    #[inline(always)]
    fn simd_from(val: [u8; 64], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
                val[32usize],
                val[33usize],
                val[34usize],
                val[35usize],
                val[36usize],
                val[37usize],
                val[38usize],
                val[39usize],
                val[40usize],
                val[41usize],
                val[42usize],
                val[43usize],
                val[44usize],
                val[45usize],
                val[46usize],
                val[47usize],
                val[48usize],
                val[49usize],
                val[50usize],
                val[51usize],
                val[52usize],
                val[53usize],
                val[54usize],
                val[55usize],
                val[56usize],
                val[57usize],
                val[58usize],
                val[59usize],
                val[60usize],
                val[61usize],
                val[62usize],
                val[63usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u8x64<S>> for [u8; 64] {
    #[inline(always)]
    fn from(value: u8x64<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u8x64<S> {
    type Target = [u8; 64];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u8x64<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u8, S> for u8x64<S> {
    #[inline(always)]
    fn simd_from(value: u8, simd: S) -> Self {
        simd.splat_u8x64(value)
    }
}
impl<S: Simd> Select<u8x64<S>> for mask8x64<S> {
    #[inline(always)]
    fn select(self, if_true: u8x64<S>, if_false: u8x64<S>) -> u8x64<S> {
        self.simd.select_u8x64(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u8x64<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u8x64<S> {
    #[inline(always)]
    pub fn not(self) -> u8x64<S> {
        self.simd.not_u8x64(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.add_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.sub_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.mul_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.and_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.or_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.xor_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u8x64<S> {
        self.simd.shr_u8x64(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.shrv_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u8x64<S> {
        self.simd.shl_u8x64(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_eq_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_lt_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_le_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_ge_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_gt_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.min_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.max_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x16<S> {
        self.simd.reinterpret_u32_u8x64(self)
    }
}
impl<S: Simd> crate::SimdBase<u8, S> for u8x64<S> {
    const N: usize = 64;
    type Mask = mask8x64<S>;
    type Block = u8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u8]) -> Self {
        let mut val = [0; 64];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u8) -> Self {
        simd.splat_u8x64(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdInt<u8, S> for u8x64<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_eq_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_lt_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_le_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_ge_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_gt_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.zip_low_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.zip_high_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.unzip_low_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.unzip_high_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.min_u8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u8x64<S> {
        self.simd.max_u8x64(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct mask8x64<S: Simd> {
    pub val: [i8; 64],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i8; 64], S> for mask8x64<S> {
    #[inline(always)]
    fn simd_from(val: [i8; 64], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
                val[32usize],
                val[33usize],
                val[34usize],
                val[35usize],
                val[36usize],
                val[37usize],
                val[38usize],
                val[39usize],
                val[40usize],
                val[41usize],
                val[42usize],
                val[43usize],
                val[44usize],
                val[45usize],
                val[46usize],
                val[47usize],
                val[48usize],
                val[49usize],
                val[50usize],
                val[51usize],
                val[52usize],
                val[53usize],
                val[54usize],
                val[55usize],
                val[56usize],
                val[57usize],
                val[58usize],
                val[59usize],
                val[60usize],
                val[61usize],
                val[62usize],
                val[63usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask8x64<S>> for [i8; 64] {
    #[inline(always)]
    fn from(value: mask8x64<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask8x64<S> {
    type Target = [i8; 64];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask8x64<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i8, S> for mask8x64<S> {
    #[inline(always)]
    fn simd_from(value: i8, simd: S) -> Self {
        simd.splat_mask8x64(value)
    }
}
impl<S: Simd> Select<mask8x64<S>> for mask8x64<S> {
    #[inline(always)]
    fn select(self, if_true: mask8x64<S>, if_false: mask8x64<S>) -> mask8x64<S> {
        self.simd.select_mask8x64(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask8x64<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask8x64<S> {
    #[inline(always)]
    pub fn not(self) -> mask8x64<S> {
        self.simd.not_mask8x64(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.and_mask8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.or_mask8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.xor_mask8x64(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_eq_mask8x64(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i8, S> for mask8x64<S> {
    const N: usize = 64;
    type Mask = mask8x64<S>;
    type Block = mask8x16<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i8] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i8] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i8]) -> Self {
        let mut val = [0; 64];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i8) -> Self {
        simd.splat_mask8x64(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdMask<i8, S> for mask8x64<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask8x64<S> {
        self.simd.simd_eq_mask8x64(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct i16x32<S: Simd> {
    pub val: [i16; 32],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i16; 32], S> for i16x32<S> {
    #[inline(always)]
    fn simd_from(val: [i16; 32], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i16x32<S>> for [i16; 32] {
    #[inline(always)]
    fn from(value: i16x32<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i16x32<S> {
    type Target = [i16; 32];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i16x32<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i16, S> for i16x32<S> {
    #[inline(always)]
    fn simd_from(value: i16, simd: S) -> Self {
        simd.splat_i16x32(value)
    }
}
impl<S: Simd> Select<i16x32<S>> for mask16x32<S> {
    #[inline(always)]
    fn select(self, if_true: i16x32<S>, if_false: i16x32<S>) -> i16x32<S> {
        self.simd.select_i16x32(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i16x32<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i16x32<S> {
    #[inline(always)]
    pub fn not(self) -> i16x32<S> {
        self.simd.not_i16x32(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.add_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.sub_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.mul_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.and_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.or_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.xor_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i16x32<S> {
        self.simd.shr_i16x32(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.shrv_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i16x32<S> {
        self.simd.shl_i16x32(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_eq_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_lt_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_le_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_ge_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_gt_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.min_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.max_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i16x32<S> {
        self.simd.neg_i16x32(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x64<S> {
        self.simd.reinterpret_u8_i16x32(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x16<S> {
        self.simd.reinterpret_u32_i16x32(self)
    }
}
impl<S: Simd> crate::SimdBase<i16, S> for i16x32<S> {
    const N: usize = 32;
    type Mask = mask16x32<S>;
    type Block = i16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i16]) -> Self {
        let mut val = [0; 32];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i16) -> Self {
        simd.splat_i16x32(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdInt<i16, S> for i16x32<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_eq_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_lt_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_le_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_ge_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_gt_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.zip_low_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.zip_high_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.unzip_low_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.unzip_high_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.min_i16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i16x32<S> {
        self.simd.max_i16x32(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct u16x32<S: Simd> {
    pub val: [u16; 32],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u16; 32], S> for u16x32<S> {
    #[inline(always)]
    fn simd_from(val: [u16; 32], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u16x32<S>> for [u16; 32] {
    #[inline(always)]
    fn from(value: u16x32<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u16x32<S> {
    type Target = [u16; 32];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u16x32<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u16, S> for u16x32<S> {
    #[inline(always)]
    fn simd_from(value: u16, simd: S) -> Self {
        simd.splat_u16x32(value)
    }
}
impl<S: Simd> Select<u16x32<S>> for mask16x32<S> {
    #[inline(always)]
    fn select(self, if_true: u16x32<S>, if_false: u16x32<S>) -> u16x32<S> {
        self.simd.select_u16x32(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u16x32<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u16x32<S> {
    #[inline(always)]
    pub fn not(self) -> u16x32<S> {
        self.simd.not_u16x32(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.add_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.sub_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.mul_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.and_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.or_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.xor_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u16x32<S> {
        self.simd.shr_u16x32(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.shrv_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u16x32<S> {
        self.simd.shl_u16x32(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_eq_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_lt_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_le_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_ge_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_gt_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.min_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.max_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x64<S> {
        self.simd.reinterpret_u8_u16x32(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x16<S> {
        self.simd.reinterpret_u32_u16x32(self)
    }
}
impl<S: Simd> crate::SimdBase<u16, S> for u16x32<S> {
    const N: usize = 32;
    type Mask = mask16x32<S>;
    type Block = u16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u16]) -> Self {
        let mut val = [0; 32];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u16) -> Self {
        simd.splat_u16x32(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdInt<u16, S> for u16x32<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_eq_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_lt_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_le_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_ge_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_gt_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.zip_low_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.zip_high_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.unzip_low_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.unzip_high_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.min_u16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u16x32<S> {
        self.simd.max_u16x32(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct mask16x32<S: Simd> {
    pub val: [i16; 32],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i16; 32], S> for mask16x32<S> {
    #[inline(always)]
    fn simd_from(val: [i16; 32], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
                val[16usize],
                val[17usize],
                val[18usize],
                val[19usize],
                val[20usize],
                val[21usize],
                val[22usize],
                val[23usize],
                val[24usize],
                val[25usize],
                val[26usize],
                val[27usize],
                val[28usize],
                val[29usize],
                val[30usize],
                val[31usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask16x32<S>> for [i16; 32] {
    #[inline(always)]
    fn from(value: mask16x32<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask16x32<S> {
    type Target = [i16; 32];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask16x32<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i16, S> for mask16x32<S> {
    #[inline(always)]
    fn simd_from(value: i16, simd: S) -> Self {
        simd.splat_mask16x32(value)
    }
}
impl<S: Simd> Select<mask16x32<S>> for mask16x32<S> {
    #[inline(always)]
    fn select(self, if_true: mask16x32<S>, if_false: mask16x32<S>) -> mask16x32<S> {
        self.simd.select_mask16x32(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask16x32<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask16x32<S> {
    #[inline(always)]
    pub fn not(self) -> mask16x32<S> {
        self.simd.not_mask16x32(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.and_mask16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.or_mask16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.xor_mask16x32(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_eq_mask16x32(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i16, S> for mask16x32<S> {
    const N: usize = 32;
    type Mask = mask16x32<S>;
    type Block = mask16x8<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i16] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i16] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i16]) -> Self {
        let mut val = [0; 32];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i16) -> Self {
        simd.splat_mask16x32(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdMask<i16, S> for mask16x32<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask16x32<S> {
        self.simd.simd_eq_mask16x32(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct i32x16<S: Simd> {
    pub val: [i32; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i32; 16], S> for i32x16<S> {
    #[inline(always)]
    fn simd_from(val: [i32; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<i32x16<S>> for [i32; 16] {
    #[inline(always)]
    fn from(value: i32x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for i32x16<S> {
    type Target = [i32; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for i32x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i32, S> for i32x16<S> {
    #[inline(always)]
    fn simd_from(value: i32, simd: S) -> Self {
        simd.splat_i32x16(value)
    }
}
impl<S: Simd> Select<i32x16<S>> for mask32x16<S> {
    #[inline(always)]
    fn select(self, if_true: i32x16<S>, if_false: i32x16<S>) -> i32x16<S> {
        self.simd.select_i32x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for i32x16<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> i32x16<S> {
    #[inline(always)]
    pub fn not(self) -> i32x16<S> {
        self.simd.not_i32x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.add_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.sub_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.mul_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.and_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.or_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.xor_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> i32x16<S> {
        self.simd.shr_i32x16(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.shrv_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> i32x16<S> {
        self.simd.shl_i32x16(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_lt_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_le_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_ge_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_gt_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.min_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.max_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn neg(self) -> i32x16<S> {
        self.simd.neg_i32x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x64<S> {
        self.simd.reinterpret_u8_i32x16(self)
    }
    #[inline(always)]
    pub fn reinterpret_u32(self) -> u32x16<S> {
        self.simd.reinterpret_u32_i32x16(self)
    }
    #[inline(always)]
    pub fn cvt_f32(self) -> f32x16<S> {
        self.simd.cvt_f32_i32x16(self)
    }
}
impl<S: Simd> crate::SimdBase<i32, S> for i32x16<S> {
    const N: usize = 16;
    type Mask = mask32x16<S>;
    type Block = i32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i32]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i32) -> Self {
        simd.splat_i32x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdInt<i32, S> for i32x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_lt_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_le_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_ge_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_gt_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.zip_low_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.zip_high_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.unzip_low_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.unzip_high_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.min_i32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> i32x16<S> {
        self.simd.max_i32x16(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> SimdCvtTruncate<f32x16<S>> for i32x16<S> {
    fn truncate_from(x: f32x16<S>) -> Self {
        x.simd.cvt_i32_f32x16(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct u32x16<S: Simd> {
    pub val: [u32; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[u32; 16], S> for u32x16<S> {
    #[inline(always)]
    fn simd_from(val: [u32; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<u32x16<S>> for [u32; 16] {
    #[inline(always)]
    fn from(value: u32x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for u32x16<S> {
    type Target = [u32; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for u32x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<u32, S> for u32x16<S> {
    #[inline(always)]
    fn simd_from(value: u32, simd: S) -> Self {
        simd.splat_u32x16(value)
    }
}
impl<S: Simd> Select<u32x16<S>> for mask32x16<S> {
    #[inline(always)]
    fn select(self, if_true: u32x16<S>, if_false: u32x16<S>) -> u32x16<S> {
        self.simd.select_u32x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for u32x16<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> u32x16<S> {
    #[inline(always)]
    pub fn not(self) -> u32x16<S> {
        self.simd.not_u32x16(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.add_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.sub_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.mul_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.and_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.or_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.xor_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shr(self, shift: u32) -> u32x16<S> {
        self.simd.shr_u32x16(self, shift)
    }
    #[inline(always)]
    pub fn shrv(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.shrv_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn shl(self, shift: u32) -> u32x16<S> {
        self.simd.shl_u32x16(self, shift)
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_lt_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_le_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_ge_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_gt_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.min_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.max_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn reinterpret_u8(self) -> u8x64<S> {
        self.simd.reinterpret_u8_u32x16(self)
    }
    #[inline(always)]
    pub fn cvt_f32(self) -> f32x16<S> {
        self.simd.cvt_f32_u32x16(self)
    }
}
impl<S: Simd> crate::SimdBase<u32, S> for u32x16<S> {
    const N: usize = 16;
    type Mask = mask32x16<S>;
    type Block = u32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[u32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [u32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[u32]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: u32) -> Self {
        simd.splat_u32x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdInt<u32, S> for u32x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_lt_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_le_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_ge_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_gt_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.zip_low_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.zip_high_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.unzip_low_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.unzip_high_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.min_u32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> u32x16<S> {
        self.simd.max_u32x16(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> SimdCvtTruncate<f32x16<S>> for u32x16<S> {
    fn truncate_from(x: f32x16<S>) -> Self {
        x.simd.cvt_u32_f32x16(x)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct mask32x16<S: Simd> {
    pub val: [i32; 16],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i32; 16], S> for mask32x16<S> {
    #[inline(always)]
    fn simd_from(val: [i32; 16], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
                val[8usize],
                val[9usize],
                val[10usize],
                val[11usize],
                val[12usize],
                val[13usize],
                val[14usize],
                val[15usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask32x16<S>> for [i32; 16] {
    #[inline(always)]
    fn from(value: mask32x16<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask32x16<S> {
    type Target = [i32; 16];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask32x16<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i32, S> for mask32x16<S> {
    #[inline(always)]
    fn simd_from(value: i32, simd: S) -> Self {
        simd.splat_mask32x16(value)
    }
}
impl<S: Simd> Select<mask32x16<S>> for mask32x16<S> {
    #[inline(always)]
    fn select(self, if_true: mask32x16<S>, if_false: mask32x16<S>) -> mask32x16<S> {
        self.simd.select_mask32x16(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask32x16<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask32x16<S> {
    #[inline(always)]
    pub fn not(self) -> mask32x16<S> {
        self.simd.not_mask32x16(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.and_mask32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.or_mask32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.xor_mask32x16(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_mask32x16(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i32, S> for mask32x16<S> {
    const N: usize = 16;
    type Mask = mask32x16<S>;
    type Block = mask32x4<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i32] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i32] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i32]) -> Self {
        let mut val = [0; 16];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i32) -> Self {
        simd.splat_mask32x16(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdMask<i32, S> for mask32x16<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask32x16<S> {
        self.simd.simd_eq_mask32x16(self, rhs.simd_into(self.simd))
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct f64x8<S: Simd> {
    pub val: [f64; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[f64; 8], S> for f64x8<S> {
    #[inline(always)]
    fn simd_from(val: [f64; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<f64x8<S>> for [f64; 8] {
    #[inline(always)]
    fn from(value: f64x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for f64x8<S> {
    type Target = [f64; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for f64x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<f64, S> for f64x8<S> {
    #[inline(always)]
    fn simd_from(value: f64, simd: S) -> Self {
        simd.splat_f64x8(value)
    }
}
impl<S: Simd> Select<f64x8<S>> for mask64x8<S> {
    #[inline(always)]
    fn select(self, if_true: f64x8<S>, if_false: f64x8<S>) -> f64x8<S> {
        self.simd.select_f64x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for f64x8<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> f64x8<S> {
    #[inline(always)]
    pub fn abs(self) -> f64x8<S> {
        self.simd.abs_f64x8(self)
    }
    #[inline(always)]
    pub fn neg(self) -> f64x8<S> {
        self.simd.neg_f64x8(self)
    }
    #[inline(always)]
    pub fn sqrt(self) -> f64x8<S> {
        self.simd.sqrt_f64x8(self)
    }
    #[inline(always)]
    pub fn add(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.add_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn sub(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.sub_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn mul(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.mul_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn div(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.div_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn copysign(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.copysign_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_eq_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_lt_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_le_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_ge_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_gt_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.max_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.max_precise_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.min_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.min_precise_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn floor(self) -> f64x8<S> {
        self.simd.floor_f64x8(self)
    }
    #[inline(always)]
    pub fn fract(self) -> f64x8<S> {
        self.simd.fract_f64x8(self)
    }
    #[inline(always)]
    pub fn trunc(self) -> f64x8<S> {
        self.simd.trunc_f64x8(self)
    }
    #[inline(always)]
    pub fn reinterpret_f32(self) -> f32x16<S> {
        self.simd.reinterpret_f32_f64x8(self)
    }
}
impl<S: Simd> crate::SimdBase<f64, S> for f64x8<S> {
    const N: usize = 8;
    type Mask = mask64x8<S>;
    type Block = f64x2<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[f64] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [f64] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[f64]) -> Self {
        let mut val = [0.0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: f64) -> Self {
        simd.splat_f64x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdFloat<f64, S> for f64x8<S> {
    #[inline(always)]
    fn abs(self) -> f64x8<S> {
        self.simd.abs_f64x8(self)
    }
    #[inline(always)]
    fn sqrt(self) -> f64x8<S> {
        self.simd.sqrt_f64x8(self)
    }
    #[inline(always)]
    fn copysign(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.copysign_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_eq_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_lt(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_lt_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_le(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_le_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_ge(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_ge_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn simd_gt(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_gt_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_low(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.zip_low_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn zip_high(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.zip_high_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_low(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.unzip_low_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn unzip_high(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.unzip_high_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.max_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn max_precise(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.max_precise_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.min_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn min_precise(self, rhs: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd.min_precise_f64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    fn madd(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd
            .madd_f64x8(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn msub(self, op1: impl SimdInto<Self, S>, op2: impl SimdInto<Self, S>) -> f64x8<S> {
        self.simd
            .msub_f64x8(self, op1.simd_into(self.simd), op2.simd_into(self.simd))
    }
    #[inline(always)]
    fn floor(self) -> f64x8<S> {
        self.simd.floor_f64x8(self)
    }
    #[inline(always)]
    fn fract(self) -> f64x8<S> {
        self.simd.fract_f64x8(self)
    }
    #[inline(always)]
    fn trunc(self) -> f64x8<S> {
        self.simd.trunc_f64x8(self)
    }
}
#[derive(Clone, Copy, Debug)]
#[repr(C, align(64))]
pub struct mask64x8<S: Simd> {
    pub val: [i64; 8],
    pub simd: S,
}
impl<S: Simd> SimdFrom<[i64; 8], S> for mask64x8<S> {
    #[inline(always)]
    fn simd_from(val: [i64; 8], simd: S) -> Self {
        Self {
            val: [
                val[0usize],
                val[1usize],
                val[2usize],
                val[3usize],
                val[4usize],
                val[5usize],
                val[6usize],
                val[7usize],
            ],
            simd,
        }
    }
}
impl<S: Simd> From<mask64x8<S>> for [i64; 8] {
    #[inline(always)]
    fn from(value: mask64x8<S>) -> Self {
        value.val
    }
}
impl<S: Simd> core::ops::Deref for mask64x8<S> {
    type Target = [i64; 8];
    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.val
    }
}
impl<S: Simd> core::ops::DerefMut for mask64x8<S> {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.val
    }
}
impl<S: Simd> SimdFrom<i64, S> for mask64x8<S> {
    #[inline(always)]
    fn simd_from(value: i64, simd: S) -> Self {
        simd.splat_mask64x8(value)
    }
}
impl<S: Simd> Select<mask64x8<S>> for mask64x8<S> {
    #[inline(always)]
    fn select(self, if_true: mask64x8<S>, if_false: mask64x8<S>) -> mask64x8<S> {
        self.simd.select_mask64x8(self, if_true, if_false)
    }
}
impl<S: Simd> Bytes for mask64x8<S> {
    type Bytes = u8x64<S>;
    #[inline(always)]
    fn to_bytes(self) -> Self::Bytes {
        unsafe {
            u8x64 {
                val: core::mem::transmute(self.val),
                simd: self.simd,
            }
        }
    }
    #[inline(always)]
    fn from_bytes(value: Self::Bytes) -> Self {
        unsafe {
            Self {
                val: core::mem::transmute(value.val),
                simd: value.simd,
            }
        }
    }
}
impl<S: Simd> mask64x8<S> {
    #[inline(always)]
    pub fn not(self) -> mask64x8<S> {
        self.simd.not_mask64x8(self)
    }
    #[inline(always)]
    pub fn and(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.and_mask64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn or(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.or_mask64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn xor(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.xor_mask64x8(self, rhs.simd_into(self.simd))
    }
    #[inline(always)]
    pub fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_eq_mask64x8(self, rhs.simd_into(self.simd))
    }
}
impl<S: Simd> crate::SimdBase<i64, S> for mask64x8<S> {
    const N: usize = 8;
    type Mask = mask64x8<S>;
    type Block = mask64x2<S>;
    #[inline(always)]
    fn witness(&self) -> S {
        self.simd
    }
    #[inline(always)]
    fn as_slice(&self) -> &[i64] {
        &self.val
    }
    #[inline(always)]
    fn as_mut_slice(&mut self) -> &mut [i64] {
        &mut self.val
    }
    #[inline(always)]
    fn from_slice(simd: S, slice: &[i64]) -> Self {
        let mut val = [0; 8];
        val.copy_from_slice(slice);
        Self { val, simd }
    }
    #[inline(always)]
    fn splat(simd: S, val: i64) -> Self {
        simd.splat_mask64x8(val)
    }
    #[inline(always)]
    fn block_splat(block: Self::Block) -> Self {
        let block2 = block.combine(block);
        block2.combine(block2)
    }
}
impl<S: Simd> crate::SimdMask<i64, S> for mask64x8<S> {
    #[inline(always)]
    fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> mask64x8<S> {
        self.simd.simd_eq_mask64x8(self, rhs.simd_into(self.simd))
    }
}
