llvm · farzonl · Mar 12, 2025 · Feb 25, 2025 · Feb 26, 2025 · Feb 27, 2025
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1237,40 +1237,6 @@ float3 floor(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
 float4 floor(float4);
 
-//===----------------------------------------------------------------------===//
-// fmod builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T fmod(T x, T y)
-/// \brief Returns the linear interpolation of x to y.
-/// \param x [in] The dividend.
-/// \param y [in] The divisor.
-///
-/// Return the floating-point remainder of the x parameter divided by the y
-/// parameter.
-
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-half fmod(half, half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-half2 fmod(half2, half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-half3 fmod(half3, half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-half4 fmod(half4, half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-float fmod(float, float);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-float2 fmod(float2, float2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-float3 fmod(float3, float3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
-float4 fmod(float4, float4);
-
 //===----------------------------------------------------------------------===//
 // frac builtins
 //===----------------------------------------------------------------------===//

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -57,6 +57,32 @@ constexpr vector<T, L> reflect_vec_impl(vector<T, L> I, vector<T, L> N) {
   return I - 2 * N * dot(I, N);
 #endif
 }
+
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+fmod_impl(T X, T Y) {
+#if !defined(__DIRECTX__)
+  return __builtin_elementwise_fmod(X, Y);
+#else
+  T div = X / Y;
+  bool ge = div >= 0;
+  T frc = frac(abs(div));
+  return select<T>(ge, frc, -frc) * Y;
+#endif
+}
+
+template <typename T, int N>
+constexpr vector<T, N> fmod_vec_impl(vector<T, N> X, vector<T, N> Y) {
+#if !defined(__DIRECTX__)
+  return __builtin_elementwise_fmod(X, Y);
+#else
+  vector<T, N> div = X / Y;
+  vector<bool, N> ge = div >= 0;
+  vector<T, N> frc = frac(abs(div));
+  return select<T>(ge, frc, -frc) * Y;
+#endif
+}
+
 } // namespace __detail
 } // namespace hlsl
 

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -117,6 +117,34 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR<float, N> X,
   return __detail::distance_vec_impl(X, Y);
 }
 
+//===----------------------------------------------------------------------===//
+// fmod builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T fmod(T x, T y)
+/// \brief Returns the linear interpolation of x to y.
+/// \param x [in] The dividend.
+/// \param y [in] The divisor.
+///
+/// Return the floating-point remainder of the x parameter divided by the y
+/// parameter.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half fmod(half X, half Y) { return __detail::fmod_impl(X, Y); }
+
+const inline float fmod(float X, float Y) { return __detail::fmod_impl(X, Y); }
+
+template <int N>
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline vector<half, N> fmod(vector<half, N> X, vector<half, N> Y) {
+  return __detail::fmod_vec_impl(X, Y);
+}
+
+template <int N>
+const inline vector<float, N> fmod(vector<float, N> X, vector<float, N> Y) {
+  return __detail::fmod_vec_impl(X, Y);
+}
+
 //===----------------------------------------------------------------------===//
 // length builtins
 //===----------------------------------------------------------------------===//

diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl
@@ -4,16 +4,16 @@
 //
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=half
+// RUN:   -emit-llvm -o - | FileCheck %s -DFNATTRS="noundef nofpclass(nan inf)" \
+// RUN:   -DTYPE=half -DINT_TYPE=f16 --check-prefixes=DXCHECK
 
 //
 // ---------- No Native Half support test -----------
 //
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s \
-// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=float
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm \
+// RUN:   -o - | FileCheck %s -DFNATTRS="noundef nofpclass(nan inf)" \
+// RUN:   -DTYPE=float -DINT_TYPE=f32 --check-prefixes=DXCHECK
 
 
 // Spirv target:
@@ -22,56 +22,128 @@
 //
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   -emit-llvm -o - | FileCheck %s \
 // RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half
 
 //
 // ---------- No Native Half support test -----------
 //
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm \
 // RUN:   -o - | FileCheck %s \
 // RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float
 
 
 
+// DXCHECK: define [[FNATTRS]] [[TYPE]] @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge [[TYPE]] %{{.*}}, 0
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.[[INT_TYPE]]([[TYPE]] %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.[[INT_TYPE]]([[TYPE]] %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, [[TYPE]] %{{.*}}, [[TYPE]] %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn [[TYPE]] %hlsl.select.i, %{{.*}}
+// DXCHECK: ret [[TYPE]] %mul.i
 // CHECK: define [[FNATTRS]] [[TYPE]] @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]]
-// CHECK: ret [[TYPE]] %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn [[TYPE]]
+// CHECK: ret [[TYPE]] %fmod.i
 half test_fmod_half(half p0, half p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x [[TYPE]]> %{{.*}}, zeroinitializer
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2[[INT_TYPE]](<2 x [[TYPE]]> %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2[[INT_TYPE]](<2 x [[TYPE]]> %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %{{.*}}, <2 x [[TYPE]]> %{{.*}}, <2 x [[TYPE]]> %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %hlsl.select.i, %{{.*}}
+// DXCHECK: ret <2 x [[TYPE]]> %mul.i
 // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]>
-// CHECK: ret <2 x [[TYPE]]> %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]>
+// CHECK: ret <2 x [[TYPE]]> %fmod.i
 half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x [[TYPE]]> %{{.*}}, zeroinitializer
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.fabs.v3[[INT_TYPE]](<3 x [[TYPE]]> %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.dx.frac.v3[[INT_TYPE]](<3 x [[TYPE]]> %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %{{.*}}, <3 x [[TYPE]]> %{{.*}}, <3 x [[TYPE]]> %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %hlsl.select.i, %{{.*}}
+// DXCHECK: ret <3 x [[TYPE]]> %mul.i
 // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]>
-// CHECK: ret <3 x [[TYPE]]> %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]>
+// CHECK: ret <3 x [[TYPE]]> %fmod.i
 half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x [[TYPE]]> %{{.*}}, zeroinitializer
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.fabs.v4[[INT_TYPE]](<4 x [[TYPE]]> %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.dx.frac.v4[[INT_TYPE]](<4 x [[TYPE]]> %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %{{.*}}, <4 x [[TYPE]]> %{{.*}}, <4 x [[TYPE]]> %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %hlsl.select.i, %{{.*}}
+// DXCHECK: ret <4 x [[TYPE]]> %mul.i
 // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]>
-// CHECK: ret <4 x [[TYPE]]> %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]>
+// CHECK: ret <4 x [[TYPE]]> %fmod.i
 half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] float @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn float %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge float %{{.*}}, 0.000000e+00
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn float @llvm.dx.frac.f32(float %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn float %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float %{{.*}}, float %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float %hlsl.select.i, %{{.*}}
+// DXCHECK: ret float %mul.i
 // CHECK: define [[FNATTRS]] float @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float
-// CHECK: ret float %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn float
+// CHECK: ret float %fmod.i
 float test_fmod_float(float p0, float p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] <2 x float> @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x float> %{{.*}}, zeroinitializer
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32(<2 x float> %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.frac.v2f32(<2 x float> %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x float> %hlsl.select.i, %{{.*}}
+// DXCHECK: ret <2 x float> %mul.i
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x float>
-// CHECK: ret <2 x float> %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x float>
+// CHECK: ret <2 x float> %fmod.i
 float2 test_fmod_float2(float2 p0, float2 p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] <3 x float> @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x float> %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x float> %{{.*}}, zeroinitializer
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32(<3 x float> %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.frac.v3f32(<3 x float> %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x float> %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x float> %hlsl.select.i, %{{.*}}
+// DXCHECK: ret <3 x float> %mul.i
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x float>
-// CHECK: ret <3 x float> %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <3 x float>
+// CHECK: ret <3 x float> %fmod.i
 float3 test_fmod_float3(float3 p0, float3 p1) { return fmod(p0, p1); }
 
+// DXCHECK: define [[FNATTRS]] <4 x float> @
+// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x float> %{{.*}}, %{{.*}}
+// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x float> %{{.*}}, zeroinitializer
+// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}})
+// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.frac.v4f32(<4 x float> %elt.abs.i)
+// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x float> %{{.*}}
+// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %fneg.i
+// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %hlsl.select.i, %{{.*}}
+// DXCHECK: ret <4 x float> %mul.i
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x float>
-// CHECK: ret <4 x float> %fmod
+// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <4 x float>
+// CHECK: ret <4 x float> %fmod.i
 float4 test_fmod_float4(float4 p0, float4 p1) { return fmod(p0, p1); }
 
diff --git a/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+float test_no_second_arg(float2 p0) {
+  return fmod(p0);
+  // expected-error@-1 {{no matching function for call to 'fmod'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+}
+
+float test_too_many_arg(float2 p0) {
+  return fmod(p0, p0, p0);
+  // expected-error@-1 {{no matching function for call to 'fmod'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+}
+
+float test_double_inputs(double p0, double p1) {
+  return fmod(p0, p1);
+  // expected-error@-1  {{call to 'fmod' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float test_int_inputs(int p0, int p1) {
+  return fmod(p0, p1);
+  // expected-error@-1  {{call to 'fmod' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}