Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the fmod intrinsic #130320

Merged
merged 11 commits into from
Mar 12, 2025
34 changes: 0 additions & 34 deletions clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1237,40 +1237,6 @@ float3 floor(float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
float4 floor(float4);

//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//

/// \fn T fmod(T x, T y)
/// \brief Returns the linear interpolation of x to y.
/// \param x [in] The dividend.
/// \param y [in] The divisor.
///
/// Return the floating-point remainder of the x parameter divided by the y
/// parameter.

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
half fmod(half, half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
half2 fmod(half2, half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
half3 fmod(half3, half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
half4 fmod(half4, half4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
float fmod(float, float);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
float2 fmod(float2, float2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
float3 fmod(float3, float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_fmod)
float4 fmod(float4, float4);

//===----------------------------------------------------------------------===//
// frac builtins
//===----------------------------------------------------------------------===//
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,32 @@ constexpr vector<T, L> reflect_vec_impl(vector<T, L> I, vector<T, L> N) {
return I - 2 * N * dot(I, N);
#endif
}

template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
fmod_impl(T X, T Y) {
#if !defined(__DIRECTX__)
return __builtin_elementwise_fmod(X, Y);
#else
T div = X / Y;
bool ge = div >= 0;
T frc = frac(abs(div));
return select<T>(ge, frc, -frc) * Y;
#endif
}

template <typename T, int N>
constexpr vector<T, N> fmod_vec_impl(vector<T, N> X, vector<T, N> Y) {
#if !defined(__DIRECTX__)
return __builtin_elementwise_fmod(X, Y);
#else
vector<T, N> div = X / Y;
vector<bool, N> ge = div >= 0;
vector<T, N> frc = frac(abs(div));
return select<T>(ge, frc, -frc) * Y;
#endif
}

} // namespace __detail
} // namespace hlsl

Expand Down
28 changes: 28 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,34 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR<float, N> X,
return __detail::distance_vec_impl(X, Y);
}

//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//

/// \fn T fmod(T x, T y)
/// \brief Returns the linear interpolation of x to y.
/// \param x [in] The dividend.
/// \param y [in] The divisor.
///
/// Return the floating-point remainder of the x parameter divided by the y
/// parameter.

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline half fmod(half X, half Y) { return __detail::fmod_impl(X, Y); }

const inline float fmod(float X, float Y) { return __detail::fmod_impl(X, Y); }

template <int N>
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline vector<half, N> fmod(vector<half, N> X, vector<half, N> Y) {
return __detail::fmod_vec_impl(X, Y);
}

template <int N>
const inline vector<float, N> fmod(vector<float, N> X, vector<float, N> Y) {
return __detail::fmod_vec_impl(X, Y);
}

//===----------------------------------------------------------------------===//
// length builtins
//===----------------------------------------------------------------------===//
Expand Down
118 changes: 95 additions & 23 deletions clang/test/CodeGenHLSL/builtins/fmod.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
//
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=half
// RUN: -emit-llvm -o - | FileCheck %s -DFNATTRS="noundef nofpclass(nan inf)" \
// RUN: -DTYPE=half -DINT_TYPE=f16 --check-prefixes=DXCHECK

//
// ---------- No Native Half support test -----------
//
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s \
// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=float
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm \
// RUN: -o - | FileCheck %s -DFNATTRS="noundef nofpclass(nan inf)" \
// RUN: -DTYPE=float -DINT_TYPE=f32 --check-prefixes=DXCHECK


// Spirv target:
Expand All @@ -22,56 +22,128 @@
//
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: -emit-llvm -o - | FileCheck %s \
// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half

//
// ---------- No Native Half support test -----------
//
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \
// RUN: -o - | FileCheck %s \
// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float



// DXCHECK: define [[FNATTRS]] [[TYPE]] @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge [[TYPE]] %{{.*}}, 0
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.[[INT_TYPE]]([[TYPE]] %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.[[INT_TYPE]]([[TYPE]] %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, [[TYPE]] %{{.*}}, [[TYPE]] %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn [[TYPE]] %hlsl.select.i, %{{.*}}
// DXCHECK: ret [[TYPE]] %mul.i
// CHECK: define [[FNATTRS]] [[TYPE]] @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]]
// CHECK: ret [[TYPE]] %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn [[TYPE]]
// CHECK: ret [[TYPE]] %fmod.i
half test_fmod_half(half p0, half p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x [[TYPE]]> %{{.*}}, zeroinitializer
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2[[INT_TYPE]](<2 x [[TYPE]]> %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2[[INT_TYPE]](<2 x [[TYPE]]> %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %{{.*}}, <2 x [[TYPE]]> %{{.*}}, <2 x [[TYPE]]> %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %hlsl.select.i, %{{.*}}
// DXCHECK: ret <2 x [[TYPE]]> %mul.i
// CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]>
// CHECK: ret <2 x [[TYPE]]> %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]>
// CHECK: ret <2 x [[TYPE]]> %fmod.i
half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x [[TYPE]]> %{{.*}}, zeroinitializer
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.fabs.v3[[INT_TYPE]](<3 x [[TYPE]]> %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> @llvm.dx.frac.v3[[INT_TYPE]](<3 x [[TYPE]]> %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %{{.*}}, <3 x [[TYPE]]> %{{.*}}, <3 x [[TYPE]]> %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> %hlsl.select.i, %{{.*}}
// DXCHECK: ret <3 x [[TYPE]]> %mul.i
// CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]>
// CHECK: ret <3 x [[TYPE]]> %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]>
// CHECK: ret <3 x [[TYPE]]> %fmod.i
half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x [[TYPE]]> %{{.*}}, zeroinitializer
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.fabs.v4[[INT_TYPE]](<4 x [[TYPE]]> %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> @llvm.dx.frac.v4[[INT_TYPE]](<4 x [[TYPE]]> %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %{{.*}}, <4 x [[TYPE]]> %{{.*}}, <4 x [[TYPE]]> %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> %hlsl.select.i, %{{.*}}
// DXCHECK: ret <4 x [[TYPE]]> %mul.i
// CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]>
// CHECK: ret <4 x [[TYPE]]> %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]>
// CHECK: ret <4 x [[TYPE]]> %fmod.i
half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] float @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn float %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge float %{{.*}}, 0.000000e+00
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn float @llvm.dx.frac.f32(float %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn float %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float %{{.*}}, float %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float %hlsl.select.i, %{{.*}}
// DXCHECK: ret float %mul.i
// CHECK: define [[FNATTRS]] float @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float
// CHECK: ret float %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn float
// CHECK: ret float %fmod.i
float test_fmod_float(float p0, float p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] <2 x float> @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x float> %{{.*}}, zeroinitializer
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32(<2 x float> %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.frac.v2f32(<2 x float> %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x float> %hlsl.select.i, %{{.*}}
// DXCHECK: ret <2 x float> %mul.i
// CHECK: define [[FNATTRS]] <2 x float> @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x float>
// CHECK: ret <2 x float> %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x float>
// CHECK: ret <2 x float> %fmod.i
float2 test_fmod_float2(float2 p0, float2 p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] <3 x float> @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x float> %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <3 x float> %{{.*}}, zeroinitializer
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32(<3 x float> %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.frac.v3f32(<3 x float> %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x float> %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <3 x i1> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <3 x float> %hlsl.select.i, %{{.*}}
// DXCHECK: ret <3 x float> %mul.i
// CHECK: define [[FNATTRS]] <3 x float> @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x float>
// CHECK: ret <3 x float> %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <3 x float>
// CHECK: ret <3 x float> %fmod.i
float3 test_fmod_float3(float3 p0, float3 p1) { return fmod(p0, p1); }

// DXCHECK: define [[FNATTRS]] <4 x float> @
// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <4 x float> %{{.*}}, %{{.*}}
// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <4 x float> %{{.*}}, zeroinitializer
// DXCHECK: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}})
// DXCHECK: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.frac.v4f32(<4 x float> %elt.abs.i)
// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <4 x float> %{{.*}}
// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %fneg.i
// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %hlsl.select.i, %{{.*}}
// DXCHECK: ret <4 x float> %mul.i
// CHECK: define [[FNATTRS]] <4 x float> @
// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x float>
// CHECK: ret <4 x float> %fmod
// CHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <4 x float>
// CHECK: ret <4 x float> %fmod.i
float4 test_fmod_float4(float4 p0, float4 p1) { return fmod(p0, p1); }

33 changes: 33 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify

float test_no_second_arg(float2 p0) {
return fmod(p0);
// expected-error@-1 {{no matching function for call to 'fmod'}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
}

float test_too_many_arg(float2 p0) {
return fmod(p0, p0, p0);
// expected-error@-1 {{no matching function for call to 'fmod'}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
}

float test_double_inputs(double p0, double p1) {
return fmod(p0, p1);
// expected-error@-1 {{call to 'fmod' is ambiguous}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}

float test_int_inputs(int p0, int p1) {
return fmod(p0, p1);
// expected-error@-1 {{call to 'fmod' is ambiguous}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
Loading