-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][FMV] Add missing feature dependencies and detect at runtime. #119231
Conversation
i8mm -> simd fp16fml -> simd frintts -> fp bf16 -> simd sme -> fp16 Approved in ACLE as ARM-software/acle#368
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-aarch64 Author: Alexandros Lamprineas (labrinea) Changesi8mm -> simd Approved in ACLE as ARM-software/acle#368 Patch is 63.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119231.diff 15 Files Affected:
diff --git a/clang/test/CodeGen/AArch64/cpu-supports.c b/clang/test/CodeGen/AArch64/cpu-supports.c
index 76fcea0be31581..406201781d4803 100644
--- a/clang/test/CodeGen/AArch64/cpu-supports.c
+++ b/clang/test/CodeGen/AArch64/cpu-supports.c
@@ -18,8 +18,8 @@
// CHECK-NEXT: br label [[RETURN:%.*]]
// CHECK: if.end:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17867063951360
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17867063951360
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17936857268992
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17936857268992
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[IF_THEN1:%.*]], label [[IF_END2:%.*]]
// CHECK: if.then1:
@@ -27,8 +27,8 @@
// CHECK-NEXT: br label [[RETURN]]
// CHECK: if.end2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 171136785840078848
-// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171136785840078848
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 171141184020873984
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171141184020873984
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[IF_THEN3:%.*]], label [[IF_END4:%.*]]
// CHECK: if.then3:
diff --git a/clang/test/CodeGen/AArch64/fmv-dependencies.c b/clang/test/CodeGen/AArch64/fmv-dependencies.c
index f74b7aa32c7dca..3a524b89496e04 100644
--- a/clang/test/CodeGen/AArch64/fmv-dependencies.c
+++ b/clang/test/CodeGen/AArch64/fmv-dependencies.c
@@ -183,10 +183,10 @@ int caller() {
// CHECK: attributes #[[sha2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+v8a"
// CHECK: attributes #[[sha3]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+sha3,+v8a"
// CHECK: attributes #[[sm4]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sm4,+v8a"
-// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+v8a"
-// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a"
-// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a"
-// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a"
+// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+v8a"
+// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a"
+// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a"
+// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme2,+v8a"
// CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a"
// CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
// CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a"
diff --git a/clang/test/CodeGen/AArch64/mixed-target-attributes.c b/clang/test/CodeGen/AArch64/mixed-target-attributes.c
index bb6fb7eb8862a2..1ccb0c6177c8c5 100644
--- a/clang/test/CodeGen/AArch64/mixed-target-attributes.c
+++ b/clang/test/CodeGen/AArch64/mixed-target-attributes.c
@@ -66,24 +66,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @explicit_default._Mjscvt
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
// CHECK-NEXT: ret ptr @explicit_default._Mrdm
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16
-// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 784
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
@@ -140,24 +140,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @implicit_default._Mjscvt
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
// CHECK-NEXT: ret ptr @implicit_default._Mrdm
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16
-// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 784
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
@@ -207,16 +207,16 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @default_def_with_version_decls._Mjscvt
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 784
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 784
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 961279424754d5..6b7acbbd4fc597 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -64,16 +64,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 32896
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 32896
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @ftc._MaesMlse
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
@@ -100,16 +100,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186049280
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186049280
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @ftc_def._MmemtagMsha2
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4864
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4864
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
@@ -129,8 +129,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4864
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4864
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
@@ -157,8 +157,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1040
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1808
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1808
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
@@ -310,16 +310,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 619551195904
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 619551195904
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @ftc_inline2._MfcmaMsve2-bitperm
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65536
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65792
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65792
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
@@ -360,8 +360,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014673387388928
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014673387388928
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014743180706560
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014743180706560
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
@@ -376,8 +376,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_inline1._MpredresMrcpc
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 513
-// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 769
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 769
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
@@ -411,8 +411,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
@@ -521,16 +521,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI-NEXT: resolver_entry:
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 32896
-// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 32896
+// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664
+// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK-MTE-BTI: resolver_return:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc._MaesMlse
// CHECK-MTE-BTI: resolver_else:
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736
-// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736
+// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352
+// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK-MTE-BTI: resolver_return1:
@@ -557,16 +557,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI-NEXT: resolver_entry:
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
-// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
+// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186049280
+// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186049280
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK-MTE-BTI: resolver_return:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_def._MmemtagMsha2
// CHECK-MTE-BTI: resolver_else:
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096
-// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096
+// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4864
+// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4864
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK-MTE-BTI: resolver_return1:
@@ -586,8 +586,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI-NEXT: resolver_entry:
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096
-// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4864
+// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4864
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK-MTE-BTI: resolver_return:
@@ -614,8 +614,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "d...
[truncated]
|
@sdesmalen-arm the dependencies were agreed internally with @paulwalker-arm. Let us know if you have any reservations. |
* inlined ExtensionSet::toCpuSupportsMask inside AArch64::getCpuSupportsMask * added explanatory comments for AArch64::getCpuSupportsMask * replaced auto in range-based loops with explicit type
@sdesmalen-arm thanks for reviewing. I have addressed your comments. Do the llvm tests look okay? |
i8mm -> simd
fp16fml -> simd
frintts -> fp
bf16 -> simd
sme -> fp16
Approved in ACLE as ARM-software/acle#368