@@ -2641,6 +2641,165 @@ DeleteDeadIFuncs(Module &M,
2641
2641
return Changed;
2642
2642
}
2643
2643
2644
+ // Follows the use-def chain of \p V backwards until it finds a Function,
2645
+ // in which case it collects in \p Versions. Return true on successful
2646
+ // use-def chain traversal, false otherwise.
2647
+ static bool collectVersions (TargetTransformInfo &TTI, Value *V,
2648
+ SmallVectorImpl<Function *> &Versions) {
2649
+ if (auto *F = dyn_cast<Function>(V)) {
2650
+ if (!TTI.isMultiversionedFunction (*F))
2651
+ return false ;
2652
+ Versions.push_back (F);
2653
+ } else if (auto *Sel = dyn_cast<SelectInst>(V)) {
2654
+ if (!collectVersions (TTI, Sel->getTrueValue (), Versions))
2655
+ return false ;
2656
+ if (!collectVersions (TTI, Sel->getFalseValue (), Versions))
2657
+ return false ;
2658
+ } else if (auto *Phi = dyn_cast<PHINode>(V)) {
2659
+ for (unsigned I = 0 , E = Phi->getNumIncomingValues (); I != E; ++I)
2660
+ if (!collectVersions (TTI, Phi->getIncomingValue (I), Versions))
2661
+ return false ;
2662
+ } else {
2663
+ // Unknown instruction type. Bail.
2664
+ return false ;
2665
+ }
2666
+ return true ;
2667
+ }
2668
+
2669
+ // Bypass the IFunc Resolver of MultiVersioned functions when possible. To
2670
+ // deduce whether the optimization is legal we need to compare the target
2671
+ // features between caller and callee versions. The criteria for bypassing
2672
+ // the resolver are the following:
2673
+ //
2674
+ // * If the callee's feature set is a subset of the caller's feature set,
2675
+ // then the callee is a candidate for direct call.
2676
+ //
2677
+ // * Among such candidates the one of highest priority is the best match
2678
+ // and it shall be picked, unless there is a version of the callee with
2679
+ // higher priority than the best match which cannot be picked from a
2680
+ // higher priority caller (directly or through the resolver).
2681
+ //
2682
+ // * For every higher priority callee version than the best match, there
2683
+ // is a higher priority caller version whose feature set availability
2684
+ // is implied by the callee's feature set.
2685
+ //
2686
+ static bool OptimizeNonTrivialIFuncs (
2687
+ Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2688
+ bool Changed = false ;
2689
+
2690
+ // Cache containing the mask constructed from a function's target features.
2691
+ DenseMap<Function *, uint64_t > FeatureMask;
2692
+
2693
+ for (GlobalIFunc &IF : M.ifuncs ()) {
2694
+ if (IF.isInterposable ())
2695
+ continue ;
2696
+
2697
+ Function *Resolver = IF.getResolverFunction ();
2698
+ if (!Resolver)
2699
+ continue ;
2700
+
2701
+ if (Resolver->isInterposable ())
2702
+ continue ;
2703
+
2704
+ TargetTransformInfo &TTI = GetTTI (*Resolver);
2705
+
2706
+ // Discover the callee versions.
2707
+ SmallVector<Function *> Callees;
2708
+ if (any_of (*Resolver, [&TTI, &Callees](BasicBlock &BB) {
2709
+ if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator ()))
2710
+ if (!collectVersions (TTI, Ret->getReturnValue (), Callees))
2711
+ return true ;
2712
+ return false ;
2713
+ }))
2714
+ continue ;
2715
+
2716
+ assert (!Callees.empty () && " Expecting successful collection of versions" );
2717
+
2718
+ // Cache the feature mask for each callee.
2719
+ for (Function *Callee : Callees) {
2720
+ auto [It, Inserted] = FeatureMask.try_emplace (Callee);
2721
+ if (Inserted)
2722
+ It->second = TTI.getFeatureMask (*Callee);
2723
+ }
2724
+
2725
+ // Sort the callee versions in decreasing priority order.
2726
+ sort (Callees, [&](auto *LHS, auto *RHS) {
2727
+ return FeatureMask[LHS] > FeatureMask[RHS];
2728
+ });
2729
+
2730
+ // Find the callsites and cache the feature mask for each caller.
2731
+ SmallVector<Function *> Callers;
2732
+ DenseMap<Function *, SmallVector<CallBase *>> CallSites;
2733
+ for (User *U : IF.users ()) {
2734
+ if (auto *CB = dyn_cast<CallBase>(U)) {
2735
+ if (CB->getCalledOperand () == &IF) {
2736
+ Function *Caller = CB->getFunction ();
2737
+ auto [FeatIt, FeatInserted] = FeatureMask.try_emplace (Caller);
2738
+ if (FeatInserted)
2739
+ FeatIt->second = TTI.getFeatureMask (*Caller);
2740
+ auto [CallIt, CallInserted] = CallSites.try_emplace (Caller);
2741
+ if (CallInserted)
2742
+ Callers.push_back (Caller);
2743
+ CallIt->second .push_back (CB);
2744
+ }
2745
+ }
2746
+ }
2747
+
2748
+ // Sort the caller versions in decreasing priority order.
2749
+ sort (Callers, [&](auto *LHS, auto *RHS) {
2750
+ return FeatureMask[LHS] > FeatureMask[RHS];
2751
+ });
2752
+
2753
+ auto implies = [](uint64_t A, uint64_t B) { return (A & B) == B; };
2754
+
2755
+ // Index to the highest priority candidate.
2756
+ unsigned I = 0 ;
2757
+ // Now try to redirect calls starting from higher priority callers.
2758
+ for (Function *Caller : Callers) {
2759
+ assert (I < Callees.size () && " Found callers of equal priority" );
2760
+
2761
+ Function *Callee = Callees[I];
2762
+ uint64_t CallerBits = FeatureMask[Caller];
2763
+ uint64_t CalleeBits = FeatureMask[Callee];
2764
+
2765
+ // In the case of FMV callers, we know that all higher priority callers
2766
+ // than the current one did not get selected at runtime, which helps
2767
+ // reason about the callees (if they have versions that mandate presence
2768
+ // of the features which we already know are unavailable on this target).
2769
+ if (TTI.isMultiversionedFunction (*Caller)) {
2770
+ // If the feature set of the caller implies the feature set of the
2771
+ // highest priority candidate then it shall be picked. In case of
2772
+ // identical sets advance the candidate index one position.
2773
+ if (CallerBits == CalleeBits)
2774
+ ++I;
2775
+ else if (!implies (CallerBits, CalleeBits)) {
2776
+ // Keep advancing the candidate index as long as the caller's
2777
+ // features are a subset of the current candidate's.
2778
+ while (implies (CalleeBits, CallerBits)) {
2779
+ if (++I == Callees.size ())
2780
+ break ;
2781
+ CalleeBits = FeatureMask[Callees[I]];
2782
+ }
2783
+ continue ;
2784
+ }
2785
+ } else {
2786
+ // We can't reason much about non-FMV callers. Just pick the highest
2787
+ // priority callee if it matches, otherwise bail.
2788
+ if (I > 0 || !implies (CallerBits, CalleeBits))
2789
+ continue ;
2790
+ }
2791
+ auto &Calls = CallSites[Caller];
2792
+ for (CallBase *CS : Calls)
2793
+ CS->setCalledOperand (Callee);
2794
+ Changed = true ;
2795
+ }
2796
+ if (IF.use_empty () ||
2797
+ all_of (IF.users (), [](User *U) { return isa<GlobalAlias>(U); }))
2798
+ NumIFuncsResolved++;
2799
+ }
2800
+ return Changed;
2801
+ }
2802
+
2644
2803
static bool
2645
2804
optimizeGlobalsInModule (Module &M, const DataLayout &DL,
2646
2805
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2707,6 +2866,9 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
2707
2866
// Optimize IFuncs whose callee's are statically known.
2708
2867
LocalChange |= OptimizeStaticIFuncs (M);
2709
2868
2869
+ // Optimize IFuncs based on the target features of the caller.
2870
+ LocalChange |= OptimizeNonTrivialIFuncs (M, GetTTI);
2871
+
2710
2872
// Remove any IFuncs that are now dead.
2711
2873
LocalChange |= DeleteDeadIFuncs (M, NotDiscardableComdats);
2712
2874
0 commit comments