diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index bcdc069cf0..6a6a3ae924 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -167,12 +167,6 @@ unsafe extern "unadjusted" { fn __lasx_xvbitsel_v(a: __v32u8, b: __v32u8, c: __v32u8) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitseli.b"] fn __lasx_xvbitseli_b(a: __v32u8, b: __v32u8, c: u32) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvshuf4i.b"] - fn __lasx_xvshuf4i_b(a: __v32i8, b: u32) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvshuf4i.h"] - fn __lasx_xvshuf4i_h(a: __v16i16, b: u32) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvshuf4i.w"] - fn __lasx_xvshuf4i_w(a: __v8i32, b: u32) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvclo.b"] fn __lasx_xvclo_b(a: __v32i8) -> __v32i8; #[link_name = "llvm.loongarch.lasx.xvclo.h"] @@ -1631,33 +1625,6 @@ pub fn lasx_xvbitseli_b(a: m256i, b: m256i) -> m256i { unsafe { transmute(__lasx_xvbitseli_b(transmute(a), transmute(b), IMM8)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvshuf4i_b(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lasx_xvshuf4i_b(transmute(a), IMM8)) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvshuf4i_h(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lasx_xvshuf4i_h(transmute(a), IMM8)) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvshuf4i_w(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lasx_xvshuf4i_w(transmute(a), IMM8)) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index 03d11c4b9f..3bb7059fae 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -267,6 +267,53 @@ pub(crate) const unsafe fn simd_packod_d(a: T, b: T) -> T { simd_shuffle!(b, a, [1, 5, 3, 7]) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_b(a: T) -> T { + simd_shuffle!( + a, + a, + [ + ((I >> 0) & 3) + 0, ((I >> 2) & 3) + 0, ((I >> 4) & 3) + 0, ((I >> 6) & 3) + 0, + ((I >> 0) & 3) + 4, ((I >> 2) & 3) + 4, ((I >> 4) & 3) + 4, ((I >> 6) & 3) + 4, + ((I >> 0) & 3) + 8, ((I >> 2) & 3) + 8, ((I >> 4) & 3) + 8, ((I >> 6) & 3) + 8, + ((I >> 0) & 3) + 12, ((I >> 2) & 3) + 12, ((I >> 4) & 3) + 12, ((I >> 6) & 3) + 12, + ((I >> 0) & 3) + 16, ((I >> 2) & 3) + 16, ((I >> 4) & 3) + 16, ((I >> 6) & 3) + 16, + ((I >> 0) & 3) + 20, ((I >> 2) & 3) + 20, ((I >> 4) & 3) + 20, ((I >> 6) & 3) + 20, + ((I >> 0) & 3) + 24, ((I >> 2) & 3) + 24, ((I >> 4) & 3) + 24, ((I >> 6) & 3) + 24, + ((I >> 0) & 3) + 28, ((I >> 2) & 3) + 28, ((I >> 4) & 3) + 28, ((I >> 6) & 3) + 28 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_h(a: T) -> T { + simd_shuffle!( + a, + a, + [ + ((I >> 0) & 3) + 0, ((I >> 2) & 3) + 0, ((I >> 4) & 3) + 0, ((I >> 6) & 3) + 0, + ((I >> 0) & 3) + 4, ((I >> 2) & 3) + 4, ((I >> 4) & 3) + 4, ((I >> 6) & 3) + 4, + ((I >> 0) & 3) + 8, ((I >> 2) & 3) + 8, ((I >> 4) & 3) + 8, ((I >> 6) & 3) + 8, + ((I >> 0) & 3) + 12, ((I >> 2) & 3) + 12, ((I >> 4) & 3) + 12, ((I >> 6) & 3) + 12, + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_w(a: T) -> T { + simd_shuffle!( + a, + a, + [ + ((I >> 0) & 3) + 0, ((I >> 2) & 3) + 0, ((I >> 4) & 3) + 0, ((I >> 6) & 3) + 0, + ((I >> 0) & 3) + 4, ((I >> 2) & 3) + 4, ((I >> 4) & 3) + 4, ((I >> 6) & 3) + 4 + ] + ) +} + impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32); impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16); impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8); @@ -488,6 +535,9 @@ impl_vuv!("lasx", lasx_xvrepl128vei_b, simd_replvei_b, m256i, i8x32, 4, const); impl_vuv!("lasx", lasx_xvrepl128vei_h, simd_replvei_h, m256i, i16x16, 3, const); impl_vuv!("lasx", lasx_xvrepl128vei_w, simd_replvei_w, m256i, i32x8, 2, const); impl_vuv!("lasx", lasx_xvrepl128vei_d, simd_replvei_d, m256i, i64x4, 1, const); +impl_vuv!("lasx", lasx_xvshuf4i_b, simd_shuf4i_b, m256i, i8x32, 8, const); +impl_vuv!("lasx", lasx_xvshuf4i_h, simd_shuf4i_h, m256i, i16x16, 8, const); +impl_vuv!("lasx", lasx_xvshuf4i_w, simd_shuf4i_w, m256i, i32x8, 8, const); impl_vug!("lasx", lasx_xvpickve2gr_w, is::simd_extract, m256i, i32x8, i32, 3); impl_vug!("lasx", lasx_xvpickve2gr_d, is::simd_extract, m256i, i64x4, i64, 2); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index b78e2234e4..555866040e 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -173,12 +173,6 @@ unsafe extern "unadjusted" { fn __lsx_vbitsel_v(a: __v16u8, b: __v16u8, c: __v16u8) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitseli.b"] fn __lsx_vbitseli_b(a: __v16u8, b: __v16u8, c: u32) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vshuf4i.b"] - fn __lsx_vshuf4i_b(a: __v16i8, b: u32) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vshuf4i.h"] - fn __lsx_vshuf4i_h(a: __v8i16, b: u32) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vshuf4i.w"] - fn __lsx_vshuf4i_w(a: __v4i32, b: u32) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vclo.b"] fn __lsx_vclo_b(a: __v16i8) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vclo.h"] @@ -363,8 +357,6 @@ unsafe extern "unadjusted" { fn __lsx_vfrstp_b(a: __v16i8, b: __v16i8, c: __v16i8) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vfrstp.h"] fn __lsx_vfrstp_h(a: __v8i16, b: __v8i16, c: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vshuf4i.d"] - fn __lsx_vshuf4i_d(a: __v2i64, b: __v2i64, c: u32) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vbsrl.v"] fn __lsx_vbsrl_v(a: __v16i8, b: u32) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vbsll.v"] @@ -1574,33 +1566,6 @@ pub fn lsx_vbitseli_b(a: m128i, b: m128i) -> m128i { unsafe { transmute(__lsx_vbitseli_b(transmute(a), transmute(b), IMM8)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vshuf4i_b(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lsx_vshuf4i_b(transmute(a), IMM8)) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vshuf4i_h(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lsx_vshuf4i_h(transmute(a), IMM8)) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vshuf4i_w(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lsx_vshuf4i_w(transmute(a), IMM8)) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -2261,15 +2226,6 @@ pub fn lsx_vfrstp_h(a: m128i, b: m128i, c: m128i) -> m128i { unsafe { transmute(__lsx_vfrstp_h(transmute(a), transmute(b), transmute(c))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vshuf4i_d(a: m128i, b: m128i) -> m128i { - static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(__lsx_vshuf4i_d(transmute(a), transmute(b), IMM8)) } -} - #[inline] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index d734ff69c7..c1f32cfe92 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -173,6 +173,46 @@ pub(crate) const unsafe fn simd_packod_d(a: T, b: T) -> T { simd_shuffle!(b, a, [1, 3]) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_b(a: T) -> T { + simd_shuffle!( + a, + a, + [ + ((I >> 0) & 3) + 0, ((I >> 2) & 3) + 0, ((I >> 4) & 3) + 0, ((I >> 6) & 3) + 0, + ((I >> 0) & 3) + 4, ((I >> 2) & 3) + 4, ((I >> 4) & 3) + 4, ((I >> 6) & 3) + 4, + ((I >> 0) & 3) + 8, ((I >> 2) & 3) + 8, ((I >> 4) & 3) + 8, ((I >> 6) & 3) + 8, + ((I >> 0) & 3) + 12, ((I >> 2) & 3) + 12, ((I >> 4) & 3) + 12, ((I >> 6) & 3) + 12 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_h(a: T) -> T { + simd_shuffle!( + a, + a, + [ + ((I >> 0) & 3) + 0, ((I >> 2) & 3) + 0, ((I >> 4) & 3) + 0, ((I >> 6) & 3) + 0, + ((I >> 0) & 3) + 4, ((I >> 2) & 3) + 4, ((I >> 4) & 3) + 4, ((I >> 6) & 3) + 4 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_w(a: T) -> T { + simd_shuffle!(a, a, [((I >> 0) & 3), ((I >> 2) & 3), ((I >> 4) & 3), ((I >> 6) & 3)]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_shuf4i_d(a: T, b: T) -> T { + simd_shuffle!(a, b, [((I >> 0) & 3), ((I >> 2) & 3)]) +} + impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16); impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8); impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4); @@ -389,6 +429,9 @@ impl_vuv!("lsx", lsx_vreplvei_b, simd_replvei_b, m128i, i8x16, 4, const); impl_vuv!("lsx", lsx_vreplvei_h, simd_replvei_h, m128i, i16x8, 3, const); impl_vuv!("lsx", lsx_vreplvei_w, simd_replvei_w, m128i, i32x4, 2, const); impl_vuv!("lsx", lsx_vreplvei_d, simd_replvei_d, m128i, i64x2, 1, const); +impl_vuv!("lsx", lsx_vshuf4i_b, simd_shuf4i_b, m128i, i8x16, 8, const); +impl_vuv!("lsx", lsx_vshuf4i_h, simd_shuf4i_h, m128i, i16x8, 8, const); +impl_vuv!("lsx", lsx_vshuf4i_w, simd_shuf4i_w, m128i, i32x4, 8, const); impl_vug!("lsx", lsx_vpickve2gr_b, is::simd_extract, m128i, i8x16, i32, 4); impl_vug!("lsx", lsx_vpickve2gr_h, is::simd_extract, m128i, i16x8, i32, 3); @@ -437,6 +480,8 @@ impl_vvvv!("lsx", lsx_vfnmadd_d, ls::simd_fnmadd, m128d, f64x2); impl_vvvv!("lsx", lsx_vfnmsub_s, ls::simd_fnmsub, m128, f32x4); impl_vvvv!("lsx", lsx_vfnmsub_d, ls::simd_fnmsub, m128d, f64x2); +impl_vvuv!("lsx", lsx_vshuf4i_d, simd_shuf4i_d, m128i, i64x2, 8, const); + impl_vugv!("lsx", lsx_vinsgr2vr_b, is::simd_insert, m128i, i8x16, i32, 4); impl_vugv!("lsx", lsx_vinsgr2vr_h, is::simd_insert, m128i, i16x8, i32, 3); impl_vugv!("lsx", lsx_vinsgr2vr_w, is::simd_insert, m128i, i32x4, i32, 2); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index 24ee4874ae..9cbd18f980 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -334,6 +334,26 @@ macro_rules! impl_vvvv { pub(super) use impl_vvvv; +macro_rules! impl_vvuv { + ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $ibs:expr, const) => { + #[inline] + #[target_feature(enable = $ft)] + #[rustc_legacy_const_generics(2)] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub fn $name(a: $oty, b: $oty) -> $oty { + static_assert_uimm_bits!(IMM, $ibs); + unsafe { + let a: $ity = transmute(a); + let b: $ity = transmute(b); + let r: $ity = $op::(a, b); + transmute(r) + } + } + }; +} + +pub(super) use impl_vvuv; + macro_rules! impl_vugv { ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => { #[inline] diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index 03d06af28a..41432adf25 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -1642,16 +1642,19 @@ asm-fmts = xd, xj, ui8 data-types = UV32QI, UV32QI, UV32QI, USI /// lasx_xvshuf4i_b +impl = portable name = lasx_xvshuf4i_b asm-fmts = xd, xj, ui8 data-types = V32QI, V32QI, USI /// lasx_xvshuf4i_h +impl = portable name = lasx_xvshuf4i_h asm-fmts = xd, xj, ui8 data-types = V16HI, V16HI, USI /// lasx_xvshuf4i_w +impl = portable name = lasx_xvshuf4i_w asm-fmts = xd, xj, ui8 data-types = V8SI, V8SI, USI diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index 188de6983f..211c3c0fcf 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -1657,16 +1657,19 @@ asm-fmts = vd, vj, ui8 data-types = UV16QI, UV16QI, UV16QI, USI /// lsx_vshuf4i_b +impl = portable name = lsx_vshuf4i_b asm-fmts = vd, vj, ui8 data-types = V16QI, V16QI, USI /// lsx_vshuf4i_h +impl = portable name = lsx_vshuf4i_h asm-fmts = vd, vj, ui8 data-types = V8HI, V8HI, USI /// lsx_vshuf4i_w +impl = portable name = lsx_vshuf4i_w asm-fmts = vd, vj, ui8 data-types = V4SI, V4SI, USI @@ -2366,6 +2369,7 @@ asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI, V8HI /// lsx_vshuf4i_d +impl = portable name = lsx_vshuf4i_d asm-fmts = vd, vj, ui8 data-types = V2DI, V2DI, V2DI, USI diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index 9580125e1a..abbfcb3365 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -259,6 +259,10 @@ lsx_vpackod_b lsx_vpackod_h lsx_vpackod_w lsx_vpackod_d +lsx_vshuf4i_b +lsx_vshuf4i_h +lsx_vshuf4i_w +lsx_vshuf4i_d # LASX intrinsics lasx_xvsll_b @@ -520,3 +524,6 @@ lasx_xvpackod_b lasx_xvpackod_h lasx_xvpackod_w lasx_xvpackod_d +lasx_xvshuf4i_b +lasx_xvshuf4i_h +lasx_xvshuf4i_w