0019-RISCV-Implement-lowering-of-ISD-SELECT.patch

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Subject: [RISCV] Implement lowering of ISD::SELECT

Although ISD::SELECT_CC is a more natural match for RISCVISD::SELECT_CC (and
ultimately the integer RISC-V conditional branch instructions), we choose to
expand ISD::SELECT_CC and lower ISD::SELECT. The appropriate compare+branch
will be created in the case where an ISD::SELECT condition value is created by
an ISD::SETCC node, which operates on XLen types. Other datatypes such as
floating point don't have conditional branch instructions, and lowering
ISD::SELECT allows more flexibility for handling these cases.
---
 lib/Target/RISCV/RISCVISelLowering.cpp | 152 +++++++++++++++++++++++++++++++++
 lib/Target/RISCV/RISCVISelLowering.h   |   8 +-
 lib/Target/RISCV/RISCVInstrInfo.td     |  15 ++++
 test/CodeGen/RISCV/bare-select.ll      |  18 ++++
 test/CodeGen/RISCV/select-cc.ll        | 100 ++++++++++++++++++++++
 5 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/RISCV/bare-select.ll
 create mode 100644 test/CodeGen/RISCV/select-cc.ll

diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 98f7aa16e2e..606365ea59b 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -56,6 +56,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
 
   setOperationAction(ISD::BR_CC, XLenVT, Expand);
+  setOperationAction(ISD::SELECT, XLenVT, Custom);
+  setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
+
   setBooleanContents(ZeroOrOneBooleanContent);
 
   // Function alignments (log2).
@@ -63,6 +66,45 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setPrefFunctionAlignment(3);
 }
 
+// Changes the condition code and swaps operands if necessary, so the SetCC
+// operation matches one of the comparisons supported directly in the RISC-V
+// ISA.
+static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
+  switch (CC) {
+  default:
+    break;
+  case ISD::SETGT:
+  case ISD::SETLE:
+  case ISD::SETUGT:
+  case ISD::SETULE:
+    CC = ISD::getSetCCSwappedOperands(CC);
+    std::swap(LHS, RHS);
+    break;
+  }
+}
+
+// Return the RISC-V branch opcode that matches the given DAG integer
+// condition code. The CondCode must be one of those supported by the RISC-V
+// ISA (see normaliseSetCC).
+static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unsupported CondCode");
+  case ISD::SETEQ:
+    return RISCV::BEQ;
+  case ISD::SETNE:
+    return RISCV::BNE;
+  case ISD::SETLT:
+    return RISCV::BLT;
+  case ISD::SETGE:
+    return RISCV::BGE;
+  case ISD::SETULT:
+    return RISCV::BLTU;
+  case ISD::SETUGE:
+    return RISCV::BGEU;
+  }
+}
+
 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
                                             SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -70,6 +112,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     report_fatal_error("unimplemented operand");
   case ISD::GlobalAddress:
     return lowerGlobalAddress(Op, DAG);
+  case ISD::SELECT:
+    return lowerSELECT(Op, DAG);
   }
 }
 
@@ -95,6 +139,112 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
   }
 }
 
+SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue CondV = Op.getOperand(0);
+  SDValue TrueV = Op.getOperand(1);
+  SDValue FalseV = Op.getOperand(2);
+  SDLoc DL(Op);
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // If the result type is XLenVT and CondV is the output of a SETCC node
+  // which also operated on XLenVT inputs, then merge the SETCC node into the
+  // lowered RISCVISD::SELECT_CC to take advantage of the integer
+  // compare+branch instructions. i.e.:
+  // (select (setcc lhs, rhs, cc), truev, falsev)
+  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
+  if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
+      CondV.getOperand(0).getSimpleValueType() == XLenVT) {
+    SDValue LHS = CondV.getOperand(0);
+    SDValue RHS = CondV.getOperand(1);
+    auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
+    ISD::CondCode CCVal = CC->get();
+
+    normaliseSetCC(LHS, RHS, CCVal);
+
+    SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+    SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
+    return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
+  }
+
+  // Otherwise:
+  // (select condv, truev, falsev)
+  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
+  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
+  SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
+
+  return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
+}
+
+MachineBasicBlock *
+RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  assert(MI.getOpcode() == RISCV::Select_GPR_Using_CC_GPR &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the triangle
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and the condcode to use to select the appropriate branch.
+  //
+  // We produce the following control flow:
+  //     HeadMBB
+  //     |  \
+  //     |  IfFalseMBB
+  //     | /
+  //    TailMBB
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = ++BB->getIterator();
+
+  MachineBasicBlock *HeadMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  F->insert(I, IfFalseMBB);
+  F->insert(I, TailMBB);
+  // Move all remaining instructions to TailMBB.
+  TailMBB->splice(TailMBB->begin(), HeadMBB,
+                  std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
+  // Set the successors for HeadMBB.
+  HeadMBB->addSuccessor(IfFalseMBB);
+  HeadMBB->addSuccessor(TailMBB);
+
+  // Insert appropriate branch.
+  unsigned LHS = MI.getOperand(1).getReg();
+  unsigned RHS = MI.getOperand(2).getReg();
+  auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
+  unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
+
+  BuildMI(HeadMBB, DL, TII.get(Opcode))
+    .addReg(LHS)
+    .addReg(RHS)
+    .addMBB(TailMBB);
+
+  // IfFalseMBB just falls through to TailMBB.
+  IfFalseMBB->addSuccessor(TailMBB);
+
+  // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
+  BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
+          MI.getOperand(0).getReg())
+      .addReg(MI.getOperand(4).getReg())
+      .addMBB(HeadMBB)
+      .addReg(MI.getOperand(5).getReg())
+      .addMBB(IfFalseMBB);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return TailMBB;
+}
+
 // Calling Convention Implementation.
 #include "RISCVGenCallingConv.inc"
 
@@ -326,6 +476,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "RISCVISD::RET_FLAG";
   case RISCVISD::CALL:
     return "RISCVISD::CALL";
+  case RISCVISD::SELECT_CC:
+    return "RISCVISD::SELECT_CC";
   }
   return nullptr;
 }
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 471cd84fdcd..4d7b4697fa7 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -25,7 +25,8 @@ namespace RISCVISD {
 enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
   RET_FLAG,
-  CALL
+  CALL,
+  SELECT_CC
 };
 }
 
@@ -42,6 +43,10 @@ public:
   // This method returns the name of a target specific DAG node.
   const char *getTargetNodeName(unsigned Opcode) const override;
 
+  MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr &MI,
+                              MachineBasicBlock *BB) const override;
+
 private:
   // Lower incoming arguments, copy physregs into vregs
   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
@@ -60,6 +65,7 @@ private:
     return true;
   }
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
 };
 }
 
diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td
index 6f9067ae839..72abe14fd71 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@@ -22,6 +22,9 @@ def SDT_RISCVCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
                                             SDTCisVT<1, i32>]>;
 def SDT_RISCVCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
                                           SDTCisVT<1, i32>]>;
+def SDT_RISCVSelectCC     : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
+                                                 SDTCisSameAs<0, 4>,
+                                                 SDTCisSameAs<4, 5>]>;
 
 
 def Call         : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
@@ -33,6 +36,8 @@ def CallSeqEnd   : SDNode<"ISD::CALLSEQ_END", SDT_RISCVCallSeqEnd,
                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def RetFlag      : SDNode<"RISCVISD::RET_FLAG", SDTNone,
                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def SelectCC     : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC,
+                          [SDNPInGlue]>;
 
 //===----------------------------------------------------------------------===//
 // Operand and SDNode transformation definitions.
@@ -100,6 +105,9 @@ def simm21_lsb0 : Operand<OtherVT> {
   let DecoderMethod = "decodeSImmOperandAndLsl1<21>";
 }
 
+// A parameterized register class alternative to i32imm/i64imm from Target.td.
+def ixlenimm : Operand<XLenVT>;
+
 // Standalone (codegen-only) immleaf patterns.
 def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
 
@@ -320,6 +328,13 @@ def : PatGprSimm12<setlt, SLTI>;
 def : PatGprGpr<setult, SLTU>;
 def : PatGprSimm12<setult, SLTIU>;
 
+let usesCustomInserter = 1 in
+def Select_GPR_Using_CC_GPR
+    : Pseudo<(outs GPR:$dst),
+             (ins GPR:$lhs, GPR:$rhs, ixlenimm:$imm, GPR:$src, GPR:$src2),
+             [(set XLenVT:$dst, (SelectCC GPR:$lhs, GPR:$rhs,
+              (XLenVT imm:$imm), GPR:$src, GPR:$src2))]>;
+
 /// Branches and jumps
 
 // Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch
diff --git a/test/CodeGen/RISCV/bare-select.ll b/test/CodeGen/RISCV/bare-select.ll
new file mode 100644
index 00000000000..016e3a7a9cd
--- /dev/null
+++ b/test/CodeGen/RISCV/bare-select.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+
+define i32 @bare_select(i1 %a, i32 %b, i32 %c) {
+; RV32I-LABEL: bare_select:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    addi a3, zero, 0
+; RV32I-NEXT:    bne a0, a3, .LBB0_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    addi a1, a2, 0
+; RV32I-NEXT:  .LBB0_2:
+; RV32I-NEXT:    addi a0, a1, 0
+; RV32I-NEXT:    jalr zero, ra, 0
+  %1 = select i1 %a, i32 %b, i32 %c
+  ret i32 %1
+}
diff --git a/test/CodeGen/RISCV/select-cc.ll b/test/CodeGen/RISCV/select-cc.ll
new file mode 100644
index 00000000000..ddc5983525e
--- /dev/null
+++ b/test/CodeGen/RISCV/select-cc.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+
+define i32 @foo(i32 %a, i32 *%b) {
+; RV32I-LABEL: foo:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    beq a0, a2, .LBB0_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_2:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    bne a0, a2, .LBB0_4
+; RV32I-NEXT:  # %bb.3:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_4:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    bltu a2, a0, .LBB0_6
+; RV32I-NEXT:  # %bb.5:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_6:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    bgeu a0, a2, .LBB0_8
+; RV32I-NEXT:  # %bb.7:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_8:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    bltu a0, a2, .LBB0_10
+; RV32I-NEXT:  # %bb.9:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_10:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    bgeu a2, a0, .LBB0_12
+; RV32I-NEXT:  # %bb.11:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_12:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    blt a2, a0, .LBB0_14
+; RV32I-NEXT:  # %bb.13:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_14:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    bge a0, a2, .LBB0_16
+; RV32I-NEXT:  # %bb.15:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_16:
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    blt a0, a2, .LBB0_18
+; RV32I-NEXT:  # %bb.17:
+; RV32I-NEXT:    addi a0, a2, 0
+; RV32I-NEXT:  .LBB0_18:
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    bge a1, a0, .LBB0_20
+; RV32I-NEXT:  # %bb.19:
+; RV32I-NEXT:    addi a0, a1, 0
+; RV32I-NEXT:  .LBB0_20:
+; RV32I-NEXT:    jalr zero, ra, 0
+  %val1 = load volatile i32, i32* %b
+  %tst1 = icmp eq i32 %a, %val1
+  %val2 = select i1 %tst1, i32 %a, i32 %val1
+
+  %val3 = load volatile i32, i32* %b
+  %tst2 = icmp ne i32 %val2, %val3
+  %val4 = select i1 %tst2, i32 %val2, i32 %val3
+
+  %val5 = load volatile i32, i32* %b
+  %tst3 = icmp ugt i32 %val4, %val5
+  %val6 = select i1 %tst3, i32 %val4, i32 %val5
+
+  %val7 = load volatile i32, i32* %b
+  %tst4 = icmp uge i32 %val6, %val7
+  %val8 = select i1 %tst4, i32 %val6, i32 %val7
+
+  %val9 = load volatile i32, i32* %b
+  %tst5 = icmp ult i32 %val8, %val9
+  %val10 = select i1 %tst5, i32 %val8, i32 %val9
+
+  %val11 = load volatile i32, i32* %b
+  %tst6 = icmp ule i32 %val10, %val11
+  %val12 = select i1 %tst6, i32 %val10, i32 %val11
+
+  %val13 = load volatile i32, i32* %b
+  %tst7 = icmp sgt i32 %val12, %val13
+  %val14 = select i1 %tst7, i32 %val12, i32 %val13
+
+  %val15 = load volatile i32, i32* %b
+  %tst8 = icmp sge i32 %val14, %val15
+  %val16 = select i1 %tst8, i32 %val14, i32 %val15
+
+  %val17 = load volatile i32, i32* %b
+  %tst9 = icmp slt i32 %val16, %val17
+  %val18 = select i1 %tst9, i32 %val16, i32 %val17
+
+  %val19 = load volatile i32, i32* %b
+  %tst10 = icmp sle i32 %val18, %val19
+  %val20 = select i1 %tst10, i32 %val18, i32 %val19
+
+  ret i32 %val20
+}
-- 
2.16.2