From d01cb31b1a2c22bdec843c6b9eb8bec6966cba63 Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Fri, 29 May 2026 23:37:52 +0800
Subject: [PATCH 1/8] Support f16 on targets where GCC doesn't support it

---
 src/abi.rs                               |   1 +
 src/builder.rs                           | 171 +++++++++++++++++++++--
 src/context.rs                           |   5 +
 src/intrinsic/llvm.rs                    |  41 ++++--
 src/intrinsic/mod.rs                     |  50 +++++--
 src/intrinsic/simd.rs                    | 156 +++++++++++++++++----
 src/type_.rs                             |  17 ++-
 src/type_of.rs                           |   1 +
 tests/compile/f16-abi.rs                 |  49 +++++++
 tests/lang_tests.rs                      |   4 +-
 tests/run/f16.rs                         |  44 ++++++
 tools/cspell_dicts/rustc_codegen_gcc.txt |   4 +
 12 files changed, 484 insertions(+), 59 deletions(-)
 create mode 100644 tests/compile/f16-abi.rs
 create mode 100644 tests/run/f16.rs

diff --git a/src/abi.rs b/src/abi.rs
index 7239a5bcb04..233c8cfc7c0 100644
--- a/src/abi.rs
+++ b/src/abi.rs
@@ -86,6 +86,7 @@ impl GccType for Reg {
         match self.kind {
             RegKind::Integer => cx.type_ix(self.size.bits()),
             RegKind::Float => match self.size.bits() {
+                16 => cx.f16_abi_type,
                 32 => cx.type_f32(),
                 64 => cx.type_f64(),
                 _ => bug!("unsupported float: {:?}", self),
diff --git a/src/builder.rs b/src/builder.rs
index 76bd37396a1..184218d0928 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -454,6 +454,106 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         );
         result.to_rvalue()
     }
+
+    fn f16_ext_fn_name(&self, dest_ty: Type<'gcc>) -> Option<&'static str> {
+        match self.cx.type_kind(dest_ty) {
+            TypeKind::Float => Some("__extendhfsf2"),
+            TypeKind::Double => Some("__extendhfdf2"),
+            _ => None,
+        }
+    }
+
+    fn f16_trunc_fn_name(&self, src_ty: Type<'gcc>) -> Option<&'static str> {
+        match self.cx.type_kind(src_ty) {
+            TypeKind::Float => Some("__truncsfhf2"),
+            TypeKind::Double => Some("__truncdfhf2"),
+            _ => None,
+        }
+    }
+
+    fn call_unary_fn(
+        &self,
+        name: &str,
+        value: RValue<'gcc>,
+        param_ty: Type<'gcc>,
+        return_ty: Type<'gcc>,
+    ) -> RValue<'gcc> {
+        let param = self.context.new_parameter(None, param_ty, "a");
+        let func = self.context.new_function(
+            None,
+            gccjit::FunctionType::Extern,
+            return_ty,
+            &[param],
+            name,
+            false,
+        );
+        self.context.new_call(self.location, func, &[value])
+    }
+
+    fn f16_to_float_ext(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
+        if self.cx.type_kind(value.get_type()) != TypeKind::Half {
+            return None;
+        }
+
+        let func_name = self.f16_ext_fn_name(dest_ty)?;
+        let value = self.cx.bitcast_if_needed(value, self.cx.f16_abi_type);
+        Some(self.call_unary_fn(func_name, value, self.cx.f16_abi_type, dest_ty))
+    }
+
+    fn float_to_f16_trunc(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
+        if self.cx.type_kind(dest_ty) != TypeKind::Half {
+            return None;
+        }
+
+        let value_type = value.get_type();
+        let func_name = self.f16_trunc_fn_name(value_type)?;
+        let value = self.call_unary_fn(func_name, value, value_type, self.cx.f16_abi_type);
+        Some(self.cx.bitcast_if_needed(value, dest_ty))
+    }
+
+    fn int_to_f16_trunc(
+        &self,
+        value: RValue<'gcc>,
+        dest_ty: Type<'gcc>,
+        signed: bool,
+    ) -> Option<RValue<'gcc>> {
+        if self.cx.type_kind(dest_ty) != TypeKind::Half {
+            return None;
+        }
+
+        let value = if signed {
+            self.gcc_int_to_float_cast(value, self.cx.type_f32())
+        } else {
+            self.gcc_uint_to_float_cast(value, self.cx.type_f32())
+        };
+        self.float_to_f16_trunc(value, dest_ty)
+    }
+
+    fn f16_arithmetic_binary_op(
+        &mut self,
+        a: RValue<'gcc>,
+        b: RValue<'gcc>,
+        op: impl FnOnce(&mut Self, RValue<'gcc>, RValue<'gcc>) -> RValue<'gcc>,
+    ) -> Option<RValue<'gcc>> {
+        let dest_ty = a.get_type();
+        let a = self.f16_to_float_ext(a, self.cx.type_f32())?;
+        let b = self
+            .f16_to_float_ext(b, self.cx.type_f32())
+            .expect("f16 binary operands should have the same type");
+        let result = op(self, a, b);
+        Some(self.float_to_f16_trunc(result, dest_ty).expect("f32 should truncate to f16"))
+    }
+
+    fn f16_arithmetic_unary_op(
+        &mut self,
+        value: RValue<'gcc>,
+        op: impl FnOnce(&mut Self, RValue<'gcc>) -> RValue<'gcc>,
+    ) -> Option<RValue<'gcc>> {
+        let dest_ty = value.get_type();
+        let value = self.f16_to_float_ext(value, self.cx.type_f32())?;
+        let result = op(self, value);
+        Some(self.float_to_f16_trunc(result, dest_ty).expect("f32 should truncate to f16"))
+    }
 }
 
 impl<'tcx> HasTyCtxt<'tcx> for Builder<'_, '_, 'tcx> {
@@ -695,6 +795,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(a, b, |this, a, b| this.assign_to_var(a + b))
+        {
+            return value;
+        }
         self.assign_to_var(a + b)
     }
 
@@ -704,6 +809,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fsub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(a, b, |this, a, b| this.assign_to_var(a - b))
+        {
+            return value;
+        }
         self.assign_to_var(a - b)
     }
 
@@ -712,6 +822,17 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fmul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_arithmetic_binary_op(a, b, |this, a, b| {
+            this.assign_to_var(this.cx.context.new_binary_op(
+                this.location,
+                BinaryOp::Mult,
+                a.get_type(),
+                a,
+                b,
+            ))
+        }) {
+            return value;
+        }
         self.assign_to_var(self.cx.context.new_binary_op(
             self.location,
             BinaryOp::Mult,
@@ -748,6 +869,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fdiv(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(a, b, |this, a, b| this.assign_to_var(a / b))
+        {
+            return value;
+        }
         self.assign_to_var(a / b)
     }
 
@@ -760,6 +886,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn frem(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_arithmetic_binary_op(a, b, |this, a, b| {
+            let fmodf = this.context.get_builtin_function("fmodf");
+            this.context.new_call(this.location, fmodf, &[a, b])
+        }) {
+            return value;
+        }
         // FIXME(antoyo): add check in libgccjit since using the binary operator % causes the following error:
         // during RTL pass: expand
         // libgccjit.so: error: in expmed_mode_index, at expmed.h:240
@@ -795,14 +927,6 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
         #[cfg(feature = "master")]
         match self.cx.type_kind(a_type) {
-            TypeKind::Half => {
-                let fmodf = self.context.get_builtin_function("fmodf");
-                let f32_type = self.type_f32();
-                let a = self.context.new_cast(self.location, a, f32_type);
-                let b = self.context.new_cast(self.location, b, f32_type);
-                let result = self.context.new_call(self.location, fmodf, &[a, b]);
-                return self.context.new_cast(self.location, result, a_type);
-            }
             TypeKind::Float => {
                 let fmodf = self.context.get_builtin_function("fmodf");
                 return self.context.new_call(self.location, fmodf, &[a, b]);
@@ -882,6 +1006,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fneg(&mut self, a: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_arithmetic_unary_op(a, |this, a| {
+            this.cx.context.new_unary_op(this.location, UnaryOp::Minus, a.get_type(), a)
+        }) {
+            return value;
+        }
         set_rvalue_location(
             self,
             self.cx.context.new_unary_op(self.location, UnaryOp::Minus, a.get_type(), a),
@@ -1282,27 +1411,41 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fptoui(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let value = self.f16_to_float_ext(value, self.cx.type_f32()).unwrap_or(value);
         set_rvalue_location(self, self.gcc_float_to_uint_cast(value, dest_ty))
     }
 
     fn fptosi(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let value = self.f16_to_float_ext(value, self.cx.type_f32()).unwrap_or(value);
         set_rvalue_location(self, self.gcc_float_to_int_cast(value, dest_ty))
     }
 
     fn uitofp(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.int_to_f16_trunc(value, dest_ty, false) {
+            return set_rvalue_location(self, value);
+        }
         set_rvalue_location(self, self.gcc_uint_to_float_cast(value, dest_ty))
     }
 
     fn sitofp(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.int_to_f16_trunc(value, dest_ty, true) {
+            return set_rvalue_location(self, value);
+        }
         set_rvalue_location(self, self.gcc_int_to_float_cast(value, dest_ty))
     }
 
     fn fptrunc(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.float_to_f16_trunc(value, dest_ty) {
+            return set_rvalue_location(self, value);
+        }
         // FIXME(antoyo): make sure it truncates.
         set_rvalue_location(self, self.context.new_cast(self.location, value, dest_ty))
     }
 
     fn fpext(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_to_float_ext(value, dest_ty) {
+            return set_rvalue_location(self, value);
+        }
         set_rvalue_location(self, self.context.new_cast(self.location, value, dest_ty))
     }
 
@@ -1368,6 +1511,10 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // LLVM has a concept of "unordered compares", where eg ULT returns true if either the two
         // arguments are unordered (i.e. either is NaN), or the lhs is less than the rhs. GCC does
         // not natively have this concept, so in some cases we must manually handle NaNs
+
+        let lhs = self.f16_to_float_ext(lhs, self.cx.type_f32()).unwrap_or(lhs);
+        let rhs = self.f16_to_float_ext(rhs, self.cx.type_f32()).unwrap_or(rhs);
+
         let must_handle_nan = match op {
             RealPredicate::RealPredicateFalse => unreachable!(),
             RealPredicate::RealOEQ => false,
@@ -1848,6 +1995,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         if scalar.is_bool() {
             return self.unchecked_utrunc(val, self.cx().type_i1());
         }
+        if let abi::Primitive::Float(abi::Float::F16) = scalar.primitive() {
+            return self.cx.bitcast_if_needed(val, self.cx.f16_abi_type);
+        }
         val
     }
 
@@ -1864,9 +2014,12 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     fn fptoint_sat(
         &mut self,
         signed: bool,
-        val: RValue<'gcc>,
+        mut val: RValue<'gcc>,
         dest_ty: Type<'gcc>,
     ) -> RValue<'gcc> {
+        if let Some(extended) = self.f16_to_float_ext(val, self.cx.type_f32()) {
+            val = extended;
+        }
         let src_ty = self.cx.val_ty(val);
         let (float_ty, int_ty) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
             assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
diff --git a/src/context.rs b/src/context.rs
index ed313859aea..647a4fa8078 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -59,6 +59,8 @@ pub struct CodegenCx<'gcc, 'tcx> {
     pub u128_type: Type<'gcc>,
     pub usize_type: Type<'gcc>,
 
+    pub f16_abi_type: Type<'gcc>,
+
     pub char_type: Type<'gcc>,
     pub uchar_type: Type<'gcc>,
     pub short_type: Type<'gcc>,
@@ -184,6 +186,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         let u32_type = create_type(CType::UInt32t, tcx.types.u32);
         let u64_type = create_type(CType::UInt64t, tcx.types.u64);
 
+        let f16_abi_type = context.new_int_type(2, false);
+
         let (i128_type, u128_type) = if supports_128bit_integers {
             let i128_type = create_type(CType::Int128t, tcx.types.i128);
             let u128_type = create_type(CType::UInt128t, tcx.types.u128);
@@ -260,6 +264,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             u32_type,
             u64_type,
             u128_type,
+            f16_abi_type,
             char_type,
             uchar_type,
             short_type,
diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs
index b19f63dd077..1344bee88fe 100644
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@@ -62,6 +62,25 @@ fn aes_output_type<'a, 'gcc, 'tcx>(
     (typ, field1, field2)
 }
 
+fn splat_scalar_to_vector<'a, 'gcc, 'tcx>(
+    builder: &Builder<'a, 'gcc, 'tcx>,
+    vector_ty: Type<'gcc>,
+    value: RValue<'gcc>,
+) -> RValue<'gcc> {
+    let vector_ty_info = vector_ty.dyncast_vector().expect("vector type");
+    let element_ty = vector_ty_info.get_element_type();
+    let value = if value.get_type() != element_ty {
+        builder.context.new_bitcast(None, value, element_ty)
+    } else {
+        value
+    };
+    builder.context.new_rvalue_from_vector(
+        None,
+        vector_ty,
+        &vec![value; vector_ty_info.get_num_units()],
+    )
+}
+
 fn wide_aes_output_type<'a, 'gcc, 'tcx>(
     builder: &Builder<'a, 'gcc, 'tcx>,
 ) -> (Type<'gcc>, Field<'gcc>, Field<'gcc>) {
@@ -613,9 +632,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg3_type = gcc_func.get_param_type(2);
                 let arg4_type = gcc_func.get_param_type(3);
-                let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 8]);
-                let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 8]);
-                let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 8]);
+                let a = splat_scalar_to_vector(builder, arg1_type, new_args[0]);
+                let b = splat_scalar_to_vector(builder, arg2_type, new_args[1]);
+                let c = splat_scalar_to_vector(builder, arg3_type, new_args[2]);
                 let arg4 = builder.context.new_rvalue_from_int(arg4_type, -1);
                 args = vec![a, b, c, arg4, new_args[3]].into();
             }
@@ -712,9 +731,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 let arg1_type = gcc_func.get_param_type(0);
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg3_type = gcc_func.get_param_type(2);
-                let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 4]);
-                let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 4]);
-                let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 4]);
+                let a = splat_scalar_to_vector(builder, arg1_type, new_args[0]);
+                let b = splat_scalar_to_vector(builder, arg2_type, new_args[1]);
+                let c = splat_scalar_to_vector(builder, arg3_type, new_args[2]);
                 args = vec![a, b, c, new_args[3]].into();
             }
             "__builtin_ia32_vfmaddsd3_round" => {
@@ -722,9 +741,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 let arg1_type = gcc_func.get_param_type(0);
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg3_type = gcc_func.get_param_type(2);
-                let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 2]);
-                let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 2]);
-                let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 2]);
+                let a = splat_scalar_to_vector(builder, arg1_type, new_args[0]);
+                let b = splat_scalar_to_vector(builder, arg2_type, new_args[1]);
+                let c = splat_scalar_to_vector(builder, arg3_type, new_args[2]);
                 args = vec![a, b, c, new_args[3]].into();
             }
             "__builtin_ia32_ldmxcsr" => {
@@ -838,6 +857,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
                 let zero = builder.context.new_rvalue_zero(builder.int_type);
                 return_value =
                     builder.context.new_vector_access(None, return_value, zero).to_rvalue();
+                let expected_type = orig_args[0].get_type();
+                if return_value.get_type() != expected_type {
+                    return_value = builder.context.new_bitcast(None, return_value, expected_type);
+                }
             }
         }
         "__builtin_ia32_addcarryx_u64"
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index b85eb752991..df430b82693 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -145,17 +145,15 @@ fn generic_f16_builtin<'gcc, 'tcx>(
     name: Symbol,
     args: &[OperandRef<'tcx, RValue<'gcc>>],
 ) -> RValue<'gcc> {
-    let f32_type = cx.type_f32();
     let builtin_name = match name {
         sym::fabs => "fabsf",
         _ => unreachable!(),
     };
 
     let func = cx.context.get_builtin_function(builtin_name);
-    let args: Vec<_> =
-        args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect();
+    let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
     let result = cx.context.new_call(None, func, &args);
-    cx.context.new_cast(None, result, cx.type_f16())
+    f32_to_f16(cx, result, cx.f16_abi_type)
 }
 
 fn f16_builtin<'gcc, 'tcx>(
@@ -163,7 +161,6 @@ fn f16_builtin<'gcc, 'tcx>(
     name: Symbol,
     args: &[OperandRef<'tcx, RValue<'gcc>>],
 ) -> RValue<'gcc> {
-    let f32_type = cx.type_f32();
     let builtin_name = match name {
         sym::ceilf16 => "__builtin_ceilf",
         sym::copysignf16 => "__builtin_copysignf",
@@ -183,10 +180,43 @@ fn f16_builtin<'gcc, 'tcx>(
     };
 
     let func = cx.context.get_builtin_function(builtin_name);
-    let args: Vec<_> =
-        args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect();
+    let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
     let result = cx.context.new_call(None, func, &args);
-    cx.context.new_cast(None, result, cx.type_f16())
+    f32_to_f16(cx, result, cx.f16_abi_type)
+}
+
+fn f16_to_f32<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, value: RValue<'gcc>) -> RValue<'gcc> {
+    let value = cx.bitcast_if_needed(value, cx.f16_abi_type);
+    let f32_type = cx.type_f32();
+    let param = cx.context.new_parameter(None, cx.f16_abi_type, "a");
+    let func = cx.context.new_function(
+        None,
+        FunctionType::Extern,
+        f32_type,
+        &[param],
+        "__extendhfsf2",
+        false,
+    );
+    cx.context.new_call(None, func, &[value])
+}
+
+fn f32_to_f16<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    value: RValue<'gcc>,
+    dest_ty: Type<'gcc>,
+) -> RValue<'gcc> {
+    let f32_type = cx.type_f32();
+    let param = cx.context.new_parameter(None, f32_type, "a");
+    let func = cx.context.new_function(
+        None,
+        FunctionType::Extern,
+        cx.f16_abi_type,
+        &[param],
+        "__truncsfhf2",
+        false,
+    );
+    let value = cx.context.new_call(None, func, &[value]);
+    cx.bitcast_if_needed(value, dest_ty)
 }
 
 impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
@@ -318,10 +348,10 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
             }
             sym::powif16 => {
                 let func = self.cx.context.get_builtin_function("__builtin_powif");
-                let arg0 = self.cx.context.new_cast(None, args[0].immediate(), self.cx.type_f32());
+                let arg0 = f16_to_f32(self.cx, args[0].immediate());
                 let args = [arg0, args[1].immediate()];
                 let result = self.cx.context.new_call(None, func, &args);
-                self.cx.context.new_cast(None, result, self.cx.type_f16())
+                f32_to_f16(self.cx, result, self.cx.f16_abi_type)
             }
             sym::powif128 => {
                 let f128_type = self.cx.type_f128();
diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs
index 82ef99703b2..a3ea8424da5 100644
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@@ -1,8 +1,8 @@
 use std::iter::FromIterator;
 
-use gccjit::{BinaryOp, RValue, ToRValue, Type};
 #[cfg(feature = "master")]
-use gccjit::{ComparisonOp, UnaryOp};
+use gccjit::ComparisonOp;
+use gccjit::{BinaryOp, RValue, ToRValue, Type, UnaryOp};
 use rustc_abi::{Align, Size};
 use rustc_codegen_ssa::base::compare_simd_types;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -795,12 +795,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         }
     }
 
+    #[expect(clippy::too_many_arguments)]
     fn simd_simple_float_intrinsic<'gcc, 'tcx>(
         name: Symbol,
         in_elem: Ty<'_>,
         in_ty: Ty<'_>,
         in_len: u64,
         bx: &mut Builder<'_, 'gcc, 'tcx>,
+        llret_ty: Type<'gcc>,
         span: Span,
         args: &[OperandRef<'tcx, RValue<'gcc>>],
     ) -> Result<RValue<'gcc>, ErrorGuaranteed> {
@@ -814,10 +816,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             return_error!(InvalidMonomorphization::BasicFloatType { span, name, ty: in_ty });
         };
         let elem_ty = bx.cx.type_float_from_ty(*f);
-        let (elem_ty_str, elem_ty, cast_type) = match f.bit_width() {
-            16 => ("", elem_ty, Some(bx.cx.double_type)),
-            32 => ("f", elem_ty, None),
-            64 => ("", elem_ty, None),
+        let is_f16 = f.bit_width() == 16;
+        let elem_ty_str = match f.bit_width() {
+            16 | 32 => "f",
+            64 => "",
             _ => {
                 return_error!(InvalidMonomorphization::FloatingPointVector {
                     span,
@@ -828,7 +830,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         };
 
-        let vec_ty = bx.cx.type_vector(elem_ty, in_len);
+        let vec_ty = if is_f16 { llret_ty } else { bx.cx.type_vector(elem_ty, in_len) };
+        let result_elem_ty =
+            vec_ty.unqualified().dyncast_vector().expect("vector return type").get_element_type();
 
         let intr_name = match name {
             sym::simd_ceil => "ceil",
@@ -862,14 +866,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let mut element = bx.extract_element(arg.immediate(), index).to_rvalue();
                 // FIXME: it would probably be better to not have casts here and use the proper
                 // instructions.
-                if let Some(typ) = cast_type {
-                    element = bx.context.new_cast(None, element, typ);
+                if is_f16 {
+                    element = super::f16_to_f32(bx.cx, element);
                 }
                 arguments.push(element);
             }
             let mut result = bx.context.new_call(None, function, &arguments);
-            if cast_type.is_some() {
-                result = bx.context.new_cast(None, result, elem_ty);
+            if is_f16 {
+                result = super::f32_to_f16(bx.cx, result, result_elem_ty);
             }
             vector_elements.push(result);
         }
@@ -896,7 +900,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             | sym::simd_round_ties_even
             | sym::simd_trunc
     ) {
-        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
+        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, llret_ty, span, args);
     }
 
     #[cfg(feature = "master")]
@@ -1211,19 +1215,47 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return Ok(bx.context.new_rvalue_zero(bx.i32_type));
     }
 
-    arith_binary! {
-        simd_add: Uint, Int => add, Float => fadd;
-        simd_sub: Uint, Int => sub, Float => fsub;
-        simd_mul: Uint, Int => mul, Float => fmul;
-        simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
-        simd_rem: Uint => urem, Int => srem, Float => frem;
-        simd_shl: Uint, Int => shl;
-        simd_shr: Uint => lshr, Int => ashr;
-        simd_and: Uint, Int => and;
-        simd_or: Uint, Int => or; // FIXME(antoyo): calling `or` might not work on vectors.
-        simd_xor: Uint, Int => xor;
-        simd_minimum_number_nsz: Float => vector_minimum_number_nsz;
-        simd_maximum_number_nsz: Float => vector_maximum_number_nsz;
+    fn simd_f16_neg<'gcc, 'tcx>(
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        value: RValue<'gcc>,
+        result_ty: Type<'gcc>,
+    ) -> RValue<'gcc> {
+        let vector_type = result_ty.unqualified().dyncast_vector().expect("vector result type");
+        let elem_ty = vector_type.get_element_type();
+        let f32_type = bx.cx.type_f32();
+        let elements = (0..vector_type.get_num_units())
+            .map(|i| {
+                let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                let value = bx.extract_element(value, index).to_rvalue();
+                let value = super::f16_to_f32(bx.cx, value);
+                let result = bx.context.new_unary_op(None, UnaryOp::Minus, f32_type, value);
+                super::f32_to_f16(bx.cx, result, elem_ty)
+            })
+            .collect::<Vec<_>>();
+        bx.context.new_rvalue_from_vector(None, result_ty, &elements)
+    }
+
+    fn simd_f16_binary_op<'gcc, 'tcx>(
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        lhs: RValue<'gcc>,
+        rhs: RValue<'gcc>,
+        result_ty: Type<'gcc>,
+        op: impl Fn(&mut Builder<'_, 'gcc, 'tcx>, RValue<'gcc>, RValue<'gcc>) -> RValue<'gcc>,
+    ) -> RValue<'gcc> {
+        let vector_type = result_ty.unqualified().dyncast_vector().expect("vector result type");
+        let elem_ty = vector_type.get_element_type();
+        let elements = (0..vector_type.get_num_units())
+            .map(|i| {
+                let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                let lhs = bx.extract_element(lhs, index).to_rvalue();
+                let rhs = bx.extract_element(rhs, index).to_rvalue();
+                let lhs = super::f16_to_f32(bx.cx, lhs);
+                let rhs = super::f16_to_f32(bx.cx, rhs);
+                let result = op(bx, lhs, rhs);
+                super::f32_to_f16(bx.cx, result, elem_ty)
+            })
+            .collect::<Vec<_>>();
+        bx.context.new_rvalue_from_vector(None, result_ty, &elements)
     }
 
     macro_rules! arith_unary {
@@ -1240,6 +1272,80 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         }
     }
 
+    if let ty::Float(ref f) = *in_elem.kind()
+        && f.bit_width() == 16
+    {
+        match name {
+            sym::simd_add => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |_, lhs, rhs| lhs + rhs,
+                ));
+            }
+            sym::simd_sub => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |_, lhs, rhs| lhs - rhs,
+                ));
+            }
+            sym::simd_mul => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |bx, lhs, rhs| {
+                        bx.context.new_binary_op(None, BinaryOp::Mult, lhs.get_type(), lhs, rhs)
+                    },
+                ));
+            }
+            sym::simd_div => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |_, lhs, rhs| lhs / rhs,
+                ));
+            }
+            sym::simd_rem => {
+                let fmodf = bx.context.get_builtin_function("fmodf");
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |bx, lhs, rhs| bx.context.new_call(None, fmodf, &[lhs, rhs]),
+                ));
+            }
+            sym::simd_neg => {
+                return Ok(simd_f16_neg(bx, args[0].immediate(), llret_ty));
+            }
+            _ => {}
+        }
+    }
+
+    arith_binary! {
+        simd_add: Uint, Int => add, Float => fadd;
+        simd_sub: Uint, Int => sub, Float => fsub;
+        simd_mul: Uint, Int => mul, Float => fmul;
+        simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
+        simd_rem: Uint => urem, Int => srem, Float => frem;
+        simd_shl: Uint, Int => shl;
+        simd_shr: Uint => lshr, Int => ashr;
+        simd_and: Uint, Int => and;
+        simd_or: Uint, Int => or; // FIXME(antoyo): calling `or` might not work on vectors.
+        simd_xor: Uint, Int => xor;
+        simd_minimum_number_nsz: Float => vector_minimum_number_nsz;
+        simd_maximum_number_nsz: Float => vector_maximum_number_nsz;
+    }
+
     arith_unary! {
         simd_neg: Int => neg, Float => fneg;
     }
diff --git a/src/type_.rs b/src/type_.rs
index 5252f93a92e..ba33fca7c39 100644
--- a/src/type_.rs
+++ b/src/type_.rs
@@ -153,7 +153,7 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
         if self.supports_f16_type {
             return self.context.new_c_type(CType::Float16);
         }
-        bug!("unsupported float width 16")
+        self.f16_abi_type
     }
 
     fn type_f32(&self) -> Type<'gcc> {
@@ -186,7 +186,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
 
     #[cfg(feature = "master")]
     fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
-        if self.is_int_type_or_bool(typ) {
+        if typ == self.f16_abi_type {
+            TypeKind::Half
+        } else if self.is_int_type_or_bool(typ) {
             TypeKind::Integer
         } else if typ.get_pointee().is_some() {
             TypeKind::Pointer
@@ -220,7 +222,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
 
     #[cfg(not(feature = "master"))]
     fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
-        if self.is_int_type_or_bool(typ) {
+        if typ == self.f16_abi_type {
+            TypeKind::Half
+        } else if self.is_int_type_or_bool(typ) {
             TypeKind::Integer
         } else if typ.is_compatible_with(self.float_type) {
             TypeKind::Float
@@ -270,6 +274,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
 
     #[cfg(feature = "master")]
     fn float_width(&self, typ: Type<'gcc>) -> usize {
+        if typ == self.f16_abi_type {
+            return 16;
+        }
         if typ.is_floating_point() {
             (typ.get_size() * u8::BITS).try_into().unwrap()
         } else {
@@ -281,7 +288,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
     fn float_width(&self, typ: Type<'gcc>) -> usize {
         let f32 = self.context.new_type::<f32>();
         let f64 = self.context.new_type::<f64>();
-        if typ.is_compatible_with(f32) {
+        if typ == self.f16_abi_type {
+            16
+        } else if typ.is_compatible_with(f32) {
             32
         } else if typ.is_compatible_with(f64) {
             64
diff --git a/src/type_of.rs b/src/type_of.rs
index 5b198eeaf01..cfbf797db7a 100644
--- a/src/type_of.rs
+++ b/src/type_of.rs
@@ -285,6 +285,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
         match scalar.primitive() {
             Int(i, true) => cx.type_from_integer(i),
             Int(i, false) => cx.type_from_unsigned_integer(i),
+            Float(abi::Float::F16) => cx.f16_abi_type,
             Float(f) => cx.type_from_float(f),
             Pointer(address_space) => {
                 // If we know the alignment, pick something better than i8.
diff --git a/tests/compile/f16-abi.rs b/tests/compile/f16-abi.rs
new file mode 100644
index 00000000000..49f399361ff
--- /dev/null
+++ b/tests/compile/f16-abi.rs
@@ -0,0 +1,49 @@
+// Compiler:
+
+#![crate_type = "lib"]
+#![feature(f16)]
+
+#[unsafe(no_mangle)]
+pub extern "C" fn f16_identity(a: f16) -> f16 {
+    a
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_f32(a: f16) -> f32 {
+    a as f32
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_f64(a: f16) -> f64 {
+    a as f64
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_i32(a: f16) -> i32 {
+    a as i32
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_u32(a: f16) -> u32 {
+    a as u32
+}
+
+#[unsafe(no_mangle)]
+pub fn i32_to_f16(a: i32) -> f16 {
+    a as f16
+}
+
+#[unsafe(no_mangle)]
+pub fn u32_to_f16(a: u32) -> f16 {
+    a as f16
+}
+
+#[unsafe(no_mangle)]
+pub fn f32_to_f16(a: f32) -> f16 {
+    a as f16
+}
+
+#[unsafe(no_mangle)]
+pub fn f64_to_f16(a: f64) -> f16 {
+    a as f16
+}
diff --git a/tests/lang_tests.rs b/tests/lang_tests.rs
index e3baf1e038f..14f878dabbb 100644
--- a/tests/lang_tests.rs
+++ b/tests/lang_tests.rs
@@ -223,7 +223,7 @@ fn run_tests(tempdir: PathBuf, current_dir: String) {
         "[DEBUG] lang run",
         "tests/run",
         TestMode::CompileAndRun,
-        &[],
+        &["f16.rs"],
     );
     build_test_runner(
         tempdir,
@@ -232,7 +232,7 @@ fn run_tests(tempdir: PathBuf, current_dir: String) {
         "[RELEASE] lang run",
         "tests/run",
         TestMode::CompileAndRun,
-        &[],
+        &["f16.rs"],
     );
 }
 
diff --git a/tests/run/f16.rs b/tests/run/f16.rs
new file mode 100644
index 00000000000..22582e518e2
--- /dev/null
+++ b/tests/run/f16.rs
@@ -0,0 +1,44 @@
+// Compiler:
+//
+// Run-time:
+//   status: 0
+
+#![feature(core_intrinsics, f16)]
+#![allow(internal_features)]
+
+use std::hint::black_box;
+use std::intrinsics::{fmaf16, powif16};
+
+fn assert_f16_bits(value: f16, bits: u16) {
+    assert_eq!(value.to_bits(), bits);
+}
+
+fn main() {
+    let one_and_half = black_box(f16::from_bits(0x3e00));
+    assert_eq!(one_and_half as f32, 1.5f32);
+    assert_eq!(one_and_half as f64, 1.5f64);
+
+    let three_and_three_quarters = black_box(f16::from_bits(0x4380));
+    assert_eq!(three_and_three_quarters as i32, 3);
+    assert_eq!(three_and_three_quarters as u32, 3);
+
+    let negative_two_and_half = black_box(f16::from_bits(0xc100));
+    assert_eq!(negative_two_and_half as i32, -2);
+    assert_eq!(negative_two_and_half as u32, 0);
+
+    assert_f16_bits(black_box(1.5f32) as f16, 0x3e00);
+    assert_f16_bits(black_box(-2.0f32) as f16, 0xc000);
+    assert_f16_bits(black_box(1.5f64) as f16, 0x3e00);
+    assert_f16_bits(black_box(42i32) as f16, 0x5140);
+    assert_f16_bits(black_box(42u32) as f16, 0x5140);
+
+    let one = black_box(1.0f16);
+    let two = black_box(2.0f16);
+    let three = black_box(3.0f16);
+    assert_f16_bits(one + two, 0x4200);
+    assert_f16_bits(two * three, 0x4600);
+    assert_f16_bits(two / one, 0x4000);
+    assert_f16_bits(-three, 0xc200);
+    assert_f16_bits(fmaf16(one, two, -three), 0xbc00);
+    assert_f16_bits(powif16(two, 3), 0x4800);
+}
diff --git a/tools/cspell_dicts/rustc_codegen_gcc.txt b/tools/cspell_dicts/rustc_codegen_gcc.txt
index 619221d5260..d35616771a0 100644
--- a/tools/cspell_dicts/rustc_codegen_gcc.txt
+++ b/tools/cspell_dicts/rustc_codegen_gcc.txt
@@ -13,6 +13,8 @@ ctlz
 ctpop
 cttz
 ctzll
+extendhfdf
+extendhfsf
 flto
 fmaximumf
 fmuladd
@@ -71,6 +73,8 @@ spir
 subo
 sysv
 tbaa
+truncdfhf
+truncsfhf
 uitofp
 unord
 uninlined

From 0f1220a932f931d7c9bf077cff4159d4acf27441 Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sat, 30 May 2026 00:01:33 +0800
Subject: [PATCH 2/8] Fix f16 SIMD float cast

---
 src/intrinsic/simd.rs | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs
index a3ea8424da5..87ccc619b5a 100644
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@@ -694,6 +694,32 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         };
 
         match (in_style, out_style) {
+            (Style::Float, Style::Float)
+                if matches!(in_elem.kind(), ty::Float(f) if f.bit_width() == 16)
+                    || matches!(out_elem.kind(), ty::Float(f) if f.bit_width() == 16) =>
+            {
+                let arg = args[0].immediate();
+                let result_elem_ty = llret_ty
+                    .unqualified()
+                    .dyncast_vector()
+                    .expect("vector return type")
+                    .get_element_type();
+                let mut elements = Vec::with_capacity(in_len as usize);
+                for i in 0..in_len {
+                    let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                    let mut element = bx.extract_element(arg, index).to_rvalue();
+                    if matches!(in_elem.kind(), ty::Float(f) if f.bit_width() == 16) {
+                        element = super::f16_to_f32(bx.cx, element);
+                    }
+                    if matches!(out_elem.kind(), ty::Float(f) if f.bit_width() == 16) {
+                        element = super::f32_to_f16(bx.cx, element, result_elem_ty);
+                    } else {
+                        element = bx.context.new_cast(None, element, result_elem_ty);
+                    }
+                    elements.push(element);
+                }
+                return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
+            }
             (Style::Unsupported, Style::Unsupported) => {
                 require!(
                     false,

From f46d747e5f1a0fcb61b7c29597c08bb35f7e7d9c Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sat, 30 May 2026 01:00:09 +0800
Subject: [PATCH 3/8] Fix f16 algebraic float ops

---
 src/builder.rs | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/builder.rs b/src/builder.rs
index 184218d0928..06c553ba8fa 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -1054,21 +1054,41 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn fadd_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a + b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs + rhs)
     }
 
     fn fsub_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a - b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs - rhs)
     }
 
     fn fmul_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a * b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs * rhs)
     }
 
     fn fdiv_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a / b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs / rhs)
     }
 

From 423620d46acbd0ffe3419776811d4ad53b4d3e19 Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sat, 30 May 2026 15:49:07 +0800
Subject: [PATCH 4/8] Fix algebraic

---
 src/builder.rs        | 68 +++++-------------------------------
 src/context.rs        | 80 ++++++++++++++++++++++++++++++++++++++++++-
 src/intrinsic/mod.rs  | 45 +++++++++---------------
 src/intrinsic/simd.rs | 65 ++++++++++++++++++++++++-----------
 tests/run/f16.rs      | 12 ++++++-
 5 files changed, 160 insertions(+), 110 deletions(-)

diff --git a/src/builder.rs b/src/builder.rs
index 06c553ba8fa..403e2daef25 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -455,60 +455,20 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         result.to_rvalue()
     }
 
-    fn f16_ext_fn_name(&self, dest_ty: Type<'gcc>) -> Option<&'static str> {
-        match self.cx.type_kind(dest_ty) {
-            TypeKind::Float => Some("__extendhfsf2"),
-            TypeKind::Double => Some("__extendhfdf2"),
-            _ => None,
-        }
-    }
-
-    fn f16_trunc_fn_name(&self, src_ty: Type<'gcc>) -> Option<&'static str> {
-        match self.cx.type_kind(src_ty) {
-            TypeKind::Float => Some("__truncsfhf2"),
-            TypeKind::Double => Some("__truncdfhf2"),
-            _ => None,
-        }
-    }
-
-    fn call_unary_fn(
-        &self,
-        name: &str,
-        value: RValue<'gcc>,
-        param_ty: Type<'gcc>,
-        return_ty: Type<'gcc>,
-    ) -> RValue<'gcc> {
-        let param = self.context.new_parameter(None, param_ty, "a");
-        let func = self.context.new_function(
-            None,
-            gccjit::FunctionType::Extern,
-            return_ty,
-            &[param],
-            name,
-            false,
-        );
-        self.context.new_call(self.location, func, &[value])
-    }
-
     fn f16_to_float_ext(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
-        if self.cx.type_kind(value.get_type()) != TypeKind::Half {
+        if !self.cx.is_f16_abi_storage_type(value.get_type()) {
             return None;
         }
 
-        let func_name = self.f16_ext_fn_name(dest_ty)?;
-        let value = self.cx.bitcast_if_needed(value, self.cx.f16_abi_type);
-        Some(self.call_unary_fn(func_name, value, self.cx.f16_abi_type, dest_ty))
+        self.cx.f16_to_float_libcall(value, dest_ty, self.location)
     }
 
     fn float_to_f16_trunc(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
-        if self.cx.type_kind(dest_ty) != TypeKind::Half {
+        if !self.cx.is_f16_abi_storage_type(dest_ty) {
             return None;
         }
 
-        let value_type = value.get_type();
-        let func_name = self.f16_trunc_fn_name(value_type)?;
-        let value = self.call_unary_fn(func_name, value, value_type, self.cx.f16_abi_type);
-        Some(self.cx.bitcast_if_needed(value, dest_ty))
+        self.cx.float_to_f16_libcall(value, dest_ty, self.location)
     }
 
     fn int_to_f16_trunc(
@@ -517,7 +477,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         dest_ty: Type<'gcc>,
         signed: bool,
     ) -> Option<RValue<'gcc>> {
-        if self.cx.type_kind(dest_ty) != TypeKind::Half {
+        if !self.cx.is_f16_abi_storage_type(dest_ty) {
             return None;
         }
 
@@ -543,17 +503,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let result = op(self, a, b);
         Some(self.float_to_f16_trunc(result, dest_ty).expect("f32 should truncate to f16"))
     }
-
-    fn f16_arithmetic_unary_op(
-        &mut self,
-        value: RValue<'gcc>,
-        op: impl FnOnce(&mut Self, RValue<'gcc>) -> RValue<'gcc>,
-    ) -> Option<RValue<'gcc>> {
-        let dest_ty = value.get_type();
-        let value = self.f16_to_float_ext(value, self.cx.type_f32())?;
-        let result = op(self, value);
-        Some(self.float_to_f16_trunc(result, dest_ty).expect("f32 should truncate to f16"))
-    }
 }
 
 impl<'tcx> HasTyCtxt<'tcx> for Builder<'_, '_, 'tcx> {
@@ -1006,10 +955,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fneg(&mut self, a: RValue<'gcc>) -> RValue<'gcc> {
-        if let Some(value) = self.f16_arithmetic_unary_op(a, |this, a| {
-            this.cx.context.new_unary_op(this.location, UnaryOp::Minus, a.get_type(), a)
-        }) {
-            return value;
+        let dest_ty = a.get_type();
+        if self.cx.is_f16_abi_storage_type(dest_ty) {
+            return self.cx.f16_neg(a, dest_ty);
         }
         set_rvalue_location(
             self,
diff --git a/src/context.rs b/src/context.rs
index 647a4fa8078..2cc06d4ad4c 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -1,9 +1,12 @@
 use std::cell::{Cell, RefCell};
 use std::collections::HashMap;
 
-use gccjit::{Block, CType, Context, Function, FunctionType, LValue, Location, RValue, Type};
+use gccjit::{
+    BinaryOp, Block, CType, Context, Function, FunctionType, LValue, Location, RValue, Type,
+};
 use rustc_abi::{Align, HasDataLayout, PointeeInfo, Size, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::base::wants_msvc_seh;
+use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeCodegenMethods, MiscCodegenMethods};
 use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN};
@@ -376,6 +379,81 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             value
         }
     }
+
+    fn call_unary_fn(
+        &self,
+        name: &str,
+        value: RValue<'gcc>,
+        param_ty: Type<'gcc>,
+        return_ty: Type<'gcc>,
+        location: Option<Location<'gcc>>,
+    ) -> RValue<'gcc> {
+        let param = self.context.new_parameter(None, param_ty, "a");
+        let func =
+            self.context.new_function(None, FunctionType::Extern, return_ty, &[param], name, false);
+        self.context.new_call(location, func, &[value])
+    }
+
+    fn f16_ext_fn_name(&self, dest_ty: Type<'gcc>) -> Option<&'static str> {
+        match self.type_kind(dest_ty) {
+            TypeKind::Float => Some("__extendhfsf2"),
+            TypeKind::Double => Some("__extendhfdf2"),
+            _ => None,
+        }
+    }
+
+    fn f16_trunc_fn_name(&self, src_ty: Type<'gcc>) -> Option<&'static str> {
+        match self.type_kind(src_ty) {
+            TypeKind::Float => Some("__truncsfhf2"),
+            TypeKind::Double => Some("__truncdfhf2"),
+            _ => None,
+        }
+    }
+
+    pub fn is_f16_abi_storage_type(&self, typ: Type<'gcc>) -> bool {
+        // Callers use this only for Rust f16 operations. The compatibility arm handles
+        // GCC versions that hand back an equivalent u16 storage type instead of the exact handle.
+        self.type_kind(typ) == TypeKind::Half
+            || (!typ.is_floating_point()
+                && typ.get_size() == 2
+                && typ.is_compatible_with(self.f16_abi_type))
+    }
+
+    pub fn f16_to_float_libcall(
+        &self,
+        value: RValue<'gcc>,
+        dest_ty: Type<'gcc>,
+        location: Option<Location<'gcc>>,
+    ) -> Option<RValue<'gcc>> {
+        let name = self.f16_ext_fn_name(dest_ty)?;
+        let value = self.bitcast_if_needed(value, self.f16_abi_type);
+        Some(self.call_unary_fn(name, value, self.f16_abi_type, dest_ty, location))
+    }
+
+    pub fn float_to_f16_libcall(
+        &self,
+        value: RValue<'gcc>,
+        dest_ty: Type<'gcc>,
+        location: Option<Location<'gcc>>,
+    ) -> Option<RValue<'gcc>> {
+        let value_type = value.get_type();
+        let name = self.f16_trunc_fn_name(value_type)?;
+        let value = self.call_unary_fn(name, value, value_type, self.f16_abi_type, location);
+        Some(self.bitcast_if_needed(value, dest_ty))
+    }
+
+    pub fn f16_neg(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let value = self.bitcast_if_needed(value, self.f16_abi_type);
+        let sign_bit = self.gcc_uint(self.f16_abi_type, 0x8000);
+        let value = self.context.new_binary_op(
+            None,
+            BinaryOp::BitwiseXor,
+            self.f16_abi_type,
+            value,
+            sign_bit,
+        );
+        self.bitcast_if_needed(value, dest_ty)
+    }
 }
 
 impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index df430b82693..a31e2744424 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -153,7 +153,7 @@ fn generic_f16_builtin<'gcc, 'tcx>(
     let func = cx.context.get_builtin_function(builtin_name);
     let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
     let result = cx.context.new_call(None, func, &args);
-    f32_to_f16(cx, result, cx.f16_abi_type)
+    float_to_f16(cx, result, cx.f16_abi_type)
 }
 
 fn f16_builtin<'gcc, 'tcx>(
@@ -182,41 +182,30 @@ fn f16_builtin<'gcc, 'tcx>(
     let func = cx.context.get_builtin_function(builtin_name);
     let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
     let result = cx.context.new_call(None, func, &args);
-    f32_to_f16(cx, result, cx.f16_abi_type)
+    float_to_f16(cx, result, cx.f16_abi_type)
+}
+
+fn f16_to_float<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    value: RValue<'gcc>,
+    dest_ty: Type<'gcc>,
+) -> RValue<'gcc> {
+    cx.f16_to_float_libcall(value, dest_ty, None)
+        .unwrap_or_else(|| bug!("cannot extend f16 to {:?}", cx.type_kind(dest_ty)))
 }
 
 fn f16_to_f32<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, value: RValue<'gcc>) -> RValue<'gcc> {
-    let value = cx.bitcast_if_needed(value, cx.f16_abi_type);
-    let f32_type = cx.type_f32();
-    let param = cx.context.new_parameter(None, cx.f16_abi_type, "a");
-    let func = cx.context.new_function(
-        None,
-        FunctionType::Extern,
-        f32_type,
-        &[param],
-        "__extendhfsf2",
-        false,
-    );
-    cx.context.new_call(None, func, &[value])
+    f16_to_float(cx, value, cx.type_f32())
 }
 
-fn f32_to_f16<'gcc, 'tcx>(
+fn float_to_f16<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     value: RValue<'gcc>,
     dest_ty: Type<'gcc>,
 ) -> RValue<'gcc> {
-    let f32_type = cx.type_f32();
-    let param = cx.context.new_parameter(None, f32_type, "a");
-    let func = cx.context.new_function(
-        None,
-        FunctionType::Extern,
-        cx.f16_abi_type,
-        &[param],
-        "__truncsfhf2",
-        false,
-    );
-    let value = cx.context.new_call(None, func, &[value]);
-    cx.bitcast_if_needed(value, dest_ty)
+    let value_ty = value.get_type();
+    cx.float_to_f16_libcall(value, dest_ty, None)
+        .unwrap_or_else(|| bug!("cannot truncate {:?} to f16", cx.type_kind(value_ty)))
 }
 
 impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
@@ -351,7 +340,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
                 let arg0 = f16_to_f32(self.cx, args[0].immediate());
                 let args = [arg0, args[1].immediate()];
                 let result = self.cx.context.new_call(None, func, &args);
-                f32_to_f16(self.cx, result, self.cx.f16_abi_type)
+                float_to_f16(self.cx, result, self.cx.f16_abi_type)
             }
             sym::powif128 => {
                 let f128_type = self.cx.type_f128();
diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs
index 87ccc619b5a..c8e8c49052a 100644
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@@ -693,26 +693,21 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             _ => Style::Unsupported,
         };
 
+        let in_is_f16 = matches!(in_elem.kind(), ty::Float(f) if f.bit_width() == 16);
+        let out_is_f16 = matches!(out_elem.kind(), ty::Float(f) if f.bit_width() == 16);
+
         match (in_style, out_style) {
-            (Style::Float, Style::Float)
-                if matches!(in_elem.kind(), ty::Float(f) if f.bit_width() == 16)
-                    || matches!(out_elem.kind(), ty::Float(f) if f.bit_width() == 16) =>
-            {
+            (Style::Float, Style::Float) if in_is_f16 || out_is_f16 => {
                 let arg = args[0].immediate();
-                let result_elem_ty = llret_ty
-                    .unqualified()
-                    .dyncast_vector()
-                    .expect("vector return type")
-                    .get_element_type();
+                let result_elem_ty = bx.element_type(llret_ty);
                 let mut elements = Vec::with_capacity(in_len as usize);
                 for i in 0..in_len {
                     let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
                     let mut element = bx.extract_element(arg, index).to_rvalue();
-                    if matches!(in_elem.kind(), ty::Float(f) if f.bit_width() == 16) {
-                        element = super::f16_to_f32(bx.cx, element);
-                    }
-                    if matches!(out_elem.kind(), ty::Float(f) if f.bit_width() == 16) {
-                        element = super::f32_to_f16(bx.cx, element, result_elem_ty);
+                    if in_is_f16 {
+                        element = super::f16_to_float(bx.cx, element, result_elem_ty);
+                    } else if out_is_f16 {
+                        element = super::float_to_f16(bx.cx, element, result_elem_ty);
                     } else {
                         element = bx.context.new_cast(None, element, result_elem_ty);
                     }
@@ -720,6 +715,39 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 }
                 return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
             }
+            (Style::Int, Style::Float) if out_is_f16 => {
+                let arg = args[0].immediate();
+                let result_elem_ty = bx.element_type(llret_ty);
+                let mut elements = Vec::with_capacity(in_len as usize);
+                for i in 0..in_len {
+                    let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                    let element = bx.extract_element(arg, index).to_rvalue();
+                    let element = match *in_elem.kind() {
+                        ty::Int(_) => bx.sitofp(element, result_elem_ty),
+                        ty::Uint(_) => bx.uitofp(element, result_elem_ty),
+                        _ => unreachable!(),
+                    };
+                    elements.push(element);
+                }
+                return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
+            }
+            (Style::Float, Style::Int) if in_is_f16 => {
+                let arg = args[0].immediate();
+                let result_elem_ty = bx.element_type(llret_ty);
+                let mut elements = Vec::with_capacity(in_len as usize);
+                for i in 0..in_len {
+                    let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                    let element = bx.extract_element(arg, index).to_rvalue();
+                    let element = super::f16_to_f32(bx.cx, element);
+                    let element = match *out_elem.kind() {
+                        ty::Int(_) => bx.fptosi(element, result_elem_ty),
+                        ty::Uint(_) => bx.fptoui(element, result_elem_ty),
+                        _ => unreachable!(),
+                    };
+                    elements.push(element);
+                }
+                return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
+            }
             (Style::Unsupported, Style::Unsupported) => {
                 require!(
                     false,
@@ -899,7 +927,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
             let mut result = bx.context.new_call(None, function, &arguments);
             if is_f16 {
-                result = super::f32_to_f16(bx.cx, result, result_elem_ty);
+                result = super::float_to_f16(bx.cx, result, result_elem_ty);
             }
             vector_elements.push(result);
         }
@@ -1248,14 +1276,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     ) -> RValue<'gcc> {
         let vector_type = result_ty.unqualified().dyncast_vector().expect("vector result type");
         let elem_ty = vector_type.get_element_type();
-        let f32_type = bx.cx.type_f32();
         let elements = (0..vector_type.get_num_units())
             .map(|i| {
                 let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
                 let value = bx.extract_element(value, index).to_rvalue();
-                let value = super::f16_to_f32(bx.cx, value);
-                let result = bx.context.new_unary_op(None, UnaryOp::Minus, f32_type, value);
-                super::f32_to_f16(bx.cx, result, elem_ty)
+                bx.cx.f16_neg(value, elem_ty)
             })
             .collect::<Vec<_>>();
         bx.context.new_rvalue_from_vector(None, result_ty, &elements)
@@ -1278,7 +1303,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let lhs = super::f16_to_f32(bx.cx, lhs);
                 let rhs = super::f16_to_f32(bx.cx, rhs);
                 let result = op(bx, lhs, rhs);
-                super::f32_to_f16(bx.cx, result, elem_ty)
+                super::float_to_f16(bx.cx, result, elem_ty)
             })
             .collect::<Vec<_>>();
         bx.context.new_rvalue_from_vector(None, result_ty, &elements)
diff --git a/tests/run/f16.rs b/tests/run/f16.rs
index 22582e518e2..0c416add4fc 100644
--- a/tests/run/f16.rs
+++ b/tests/run/f16.rs
@@ -3,9 +3,10 @@
 // Run-time:
 //   status: 0
 
-#![feature(core_intrinsics, f16)]
+#![feature(core_intrinsics, f16, float_algebraic)]
 #![allow(internal_features)]
 
+use std::cmp::Ordering;
 use std::hint::black_box;
 use std::intrinsics::{fmaf16, powif16};
 
@@ -41,4 +42,13 @@ fn main() {
     assert_f16_bits(-three, 0xc200);
     assert_f16_bits(fmaf16(one, two, -three), 0xbc00);
     assert_f16_bits(powif16(two, 3), 0x4800);
+
+    assert_f16_bits(black_box(123.0f16).algebraic_add(black_box(456.0f16)), 0x6086);
+    assert_f16_bits(black_box(123.0f16).algebraic_rem(black_box(17.0f16)), 0x4400);
+
+    let q_nan = f16::from_bits(0x7e00);
+    let s_nan = f16::from_bits(0x7c2a);
+    assert_f16_bits(-q_nan, 0xfe00);
+    assert_f16_bits(-s_nan, 0xfc2a);
+    assert_eq!(f16::total_cmp(&-q_nan, &-s_nan), Ordering::Less);
 }

From 14ac6f1333b51c8a8eb2dde8e6409503a58663ac Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sat, 30 May 2026 15:55:42 +0800
Subject: [PATCH 5/8] Make clippy happy

---
 src/context.rs        | 5 +++--
 src/intrinsic/simd.rs | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/context.rs b/src/context.rs
index 2cc06d4ad4c..db3f66ee64d 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -413,8 +413,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     pub fn is_f16_abi_storage_type(&self, typ: Type<'gcc>) -> bool {
         // Callers use this only for Rust f16 operations. The compatibility arm handles
         // GCC versions that hand back an equivalent u16 storage type instead of the exact handle.
-        self.type_kind(typ) == TypeKind::Half
-            || (!typ.is_floating_point()
+        let kind = self.type_kind(typ);
+        kind == TypeKind::Half
+            || (kind == TypeKind::Integer
                 && typ.get_size() == 2
                 && typ.is_compatible_with(self.f16_abi_type))
     }
diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs
index c8e8c49052a..6fb0782fe3b 100644
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@@ -1,8 +1,8 @@
 use std::iter::FromIterator;
 
+use gccjit::{BinaryOp, RValue, ToRValue, Type};
 #[cfg(feature = "master")]
-use gccjit::ComparisonOp;
-use gccjit::{BinaryOp, RValue, ToRValue, Type, UnaryOp};
+use gccjit::{ComparisonOp, UnaryOp};
 use rustc_abi::{Align, Size};
 use rustc_codegen_ssa::base::compare_simd_types;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};

From 9cb2a63c23dff3410468413a95564ac671cbdf4a Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sat, 30 May 2026 21:23:12 +0800
Subject: [PATCH 6/8] Support fma

---
 src/intrinsic/llvm.rs | 13 +------------
 src/intrinsic/mod.rs  | 29 +++++++++++++++--------------
 tests/run/f16.rs      |  8 +++++++-
 3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs
index 1344bee88fe..395a21d9ded 100644
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 
-use gccjit::{CType, Context, Field, Function, FunctionPtrType, RValue, ToRValue, Type};
+use gccjit::{Context, Field, Function, FunctionPtrType, RValue, ToRValue, Type};
 use rustc_codegen_ssa::traits::BuilderMethods;
 
 use crate::builder::Builder;
@@ -806,13 +806,6 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 ]
                 .into();
             }
-            "fma" => {
-                let mut new_args = args.to_vec();
-                new_args[0] = builder.context.new_cast(None, new_args[0], builder.double_type);
-                new_args[1] = builder.context.new_cast(None, new_args[1], builder.double_type);
-                new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type);
-                args = new_args.into();
-            }
             "__builtin_ia32_sqrtsh_mask_round"
             | "__builtin_ia32_vcvtss2sh_mask_round"
             | "__builtin_ia32_vcvtsd2sh_mask_round"
@@ -933,10 +926,6 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
                 &[first_mask, second_mask],
             );
         }
-        "fma" => {
-            let f16_type = builder.context.new_c_type(CType::Float16);
-            return_value = builder.context.new_cast(None, return_value, f16_type);
-        }
         "__builtin_ia32_encodekey128_u32" => {
             // The builtin __builtin_ia32_encodekey128_u32 writes the result in its pointer argument while
             // llvm.x86.encodekey128 returns a value.
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index a31e2744424..1eb207c4600 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -150,10 +150,7 @@ fn generic_f16_builtin<'gcc, 'tcx>(
         _ => unreachable!(),
     };
 
-    let func = cx.context.get_builtin_function(builtin_name);
-    let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
-    let result = cx.context.new_call(None, func, &args);
-    float_to_f16(cx, result, cx.f16_abi_type)
+    call_f32_builtin_for_f16(cx, builtin_name, args)
 }
 
 fn f16_builtin<'gcc, 'tcx>(
@@ -179,6 +176,14 @@ fn f16_builtin<'gcc, 'tcx>(
         _ => unreachable!(),
     };
 
+    call_f32_builtin_for_f16(cx, builtin_name, args)
+}
+
+fn call_f32_builtin_for_f16<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    builtin_name: &str,
+    args: &[OperandRef<'tcx, RValue<'gcc>>],
+) -> RValue<'gcc> {
     let func = cx.context.get_builtin_function(builtin_name);
     let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
     let result = cx.context.new_call(None, func, &args);
@@ -617,24 +622,20 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
         args: &[OperandRef<'tcx, Self::Value>],
         is_cleanup: bool,
     ) -> Self::Value {
+        let sym = self.tcx.symbol_name(instance).name;
+        if sym == "llvm.fma.f16" {
+            return call_f32_builtin_for_f16(self.cx, "fmaf", args);
+        }
+
         let func = if let Some(&func) = self.intrinsic_instances.borrow().get(&instance) {
             func
         } else {
-            let sym = self.tcx.symbol_name(instance).name;
-
             let func = if let Some(func) = self.intrinsics.borrow().get(sym) {
                 *func
             } else {
                 self.linkage.set(FunctionType::Extern);
 
-                let func = match sym {
-                    "llvm.fma.f16" => {
-                        // fma is not a target builtin, but a normal builtin, so we handle it differently
-                        // here.
-                        self.context.get_builtin_function("fma")
-                    }
-                    _ => llvm::intrinsic(sym, self),
-                };
+                let func = llvm::intrinsic(sym, self);
 
                 self.intrinsics.borrow_mut().insert(sym.to_string(), func);
 
diff --git a/tests/run/f16.rs b/tests/run/f16.rs
index 0c416add4fc..2b5d8d577aa 100644
--- a/tests/run/f16.rs
+++ b/tests/run/f16.rs
@@ -3,13 +3,18 @@
 // Run-time:
 //   status: 0
 
-#![feature(core_intrinsics, f16, float_algebraic)]
+#![feature(core_intrinsics, f16, float_algebraic, link_llvm_intrinsics)]
 #![allow(internal_features)]
 
 use std::cmp::Ordering;
 use std::hint::black_box;
 use std::intrinsics::{fmaf16, powif16};
 
+unsafe extern "C" {
+    #[link_name = "llvm.fma.f16"]
+    fn llvm_fma_f16(a: f16, b: f16, c: f16) -> f16;
+}
+
 fn assert_f16_bits(value: f16, bits: u16) {
     assert_eq!(value.to_bits(), bits);
 }
@@ -41,6 +46,7 @@ fn main() {
     assert_f16_bits(two / one, 0x4000);
     assert_f16_bits(-three, 0xc200);
     assert_f16_bits(fmaf16(one, two, -three), 0xbc00);
+    assert_f16_bits(unsafe { llvm_fma_f16(one, two, -three) }, 0xbc00);
     assert_f16_bits(powif16(two, 3), 0x4800);
 
     assert_f16_bits(black_box(123.0f16).algebraic_add(black_box(456.0f16)), 0x6086);

From 7bfd3a766ea1e133eee7cf427b78c63f2369d832 Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sat, 30 May 2026 21:34:55 +0800
Subject: [PATCH 7/8] Support f*_fast

---
 src/builder.rs   | 20 ++++++++++++++++++++
 tests/run/f16.rs |  6 +++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/builder.rs b/src/builder.rs
index 403e2daef25..4aa1087f897 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -971,24 +971,44 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn fadd_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs + rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs + rhs);
         self.assign_to_var(result)
     }
 
     fn fsub_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs - rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs - rhs);
         self.assign_to_var(result)
     }
 
     fn fmul_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs * rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs * rhs);
         self.assign_to_var(result)
     }
 
     fn fdiv_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs / rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs / rhs);
         self.assign_to_var(result)
     }
diff --git a/tests/run/f16.rs b/tests/run/f16.rs
index 2b5d8d577aa..040c442ce78 100644
--- a/tests/run/f16.rs
+++ b/tests/run/f16.rs
@@ -8,7 +8,7 @@
 
 use std::cmp::Ordering;
 use std::hint::black_box;
-use std::intrinsics::{fmaf16, powif16};
+use std::intrinsics::{fadd_fast, fdiv_fast, fmaf16, fmul_fast, fsub_fast, powif16};
 
 unsafe extern "C" {
     #[link_name = "llvm.fma.f16"]
@@ -45,6 +45,10 @@ fn main() {
     assert_f16_bits(two * three, 0x4600);
     assert_f16_bits(two / one, 0x4000);
     assert_f16_bits(-three, 0xc200);
+    assert_f16_bits(unsafe { fadd_fast(one, two) }, 0x4200);
+    assert_f16_bits(unsafe { fsub_fast(two, one) }, 0x3c00);
+    assert_f16_bits(unsafe { fmul_fast(two, three) }, 0x4600);
+    assert_f16_bits(unsafe { fdiv_fast(three, two) }, 0x3e00);
     assert_f16_bits(fmaf16(one, two, -three), 0xbc00);
     assert_f16_bits(unsafe { llvm_fma_f16(one, two, -three) }, 0xbc00);
     assert_f16_bits(powif16(two, 3), 0x4800);

From 7b0471ce9626ce69d749be4271dc7732f3c41525 Mon Sep 17 00:00:00 2001
From: cijiugechu <cijiugechu@gmail.com>
Date: Sun, 31 May 2026 15:22:01 +0800
Subject: [PATCH 8/8] Force checkout rust test repo

---
 build_system/src/test.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/build_system/src/test.rs b/build_system/src/test.rs
index 3f02df83995..b649fe98609 100644
--- a/build_system/src/test.rs
+++ b/build_system/src/test.rs
@@ -515,7 +515,6 @@ fn setup_rustc(env: &mut Env, args: &TestArg) -> Result<PathBuf, String> {
     // If the repository was already cloned, command will fail, so doesn't matter.
     let _ = git_clone("https://github.com/rust-lang/rust.git", Some(&rust_dir_path), false);
     let rust_dir: Option<&Path> = Some(&rust_dir_path);
-    run_command(&[&"git", &"checkout", &"--", &"tests/"], rust_dir)?;
     run_command_with_output_and_env(&[&"git", &"fetch"], rust_dir, Some(env))?;
     let rustc_commit = match rustc_version_info(env.get("RUSTC").map(|s| s.as_str()))?.commit_hash {
         Some(commit_hash) => commit_hash,
@@ -523,12 +522,12 @@ fn setup_rustc(env: &mut Env, args: &TestArg) -> Result<PathBuf, String> {
     };
     if rustc_commit != "unknown" {
         run_command_with_output_and_env(
-            &[&"git", &"checkout", &rustc_commit],
+            &[&"git", &"checkout", &"--force", &rustc_commit],
             rust_dir,
             Some(env),
         )?;
     } else {
-        run_command_with_output_and_env(&[&"git", &"checkout"], rust_dir, Some(env))?;
+        run_command_with_output_and_env(&[&"git", &"checkout", &"--force"], rust_dir, Some(env))?;
     }
 
     let cargo = String::from_utf8(