Skip to content

Commit

Permalink
[RISCV][VLOPT] Add support for Widening Floating-Point Fused Multiply…
Browse files Browse the repository at this point in the history
…-Add Instructions (#126485)

We already had getOperandInfo support, so this marks the instructions as
supported in isCandidate. It also adds support for vfwmaccbf16.v{v,f}
from zvfbfwma
  • Loading branch information
lukel97 authored Feb 10, 2025
1 parent 71ee257 commit 771f6b9
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 4 deletions.
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,8 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VFWMSAC_VV:
case RISCV::VFWNMSAC_VF:
case RISCV::VFWNMSAC_VV:
case RISCV::VFWMACCBF16_VV:
case RISCV::VFWMACCBF16_VF:
// Vector Widening Floating-Point Add/Subtract Instructions
// Dest EEW=2*SEW. Source EEW=SEW.
case RISCV::VFWADD_VV:
Expand Down Expand Up @@ -1050,6 +1052,17 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VFMSUB_VF:
case RISCV::VFNMSUB_VV:
case RISCV::VFNMSUB_VF:
// Vector Widening Floating-Point Fused Multiply-Add Instructions
case RISCV::VFWMACC_VV:
case RISCV::VFWMACC_VF:
case RISCV::VFWNMACC_VV:
case RISCV::VFWNMACC_VF:
case RISCV::VFWMSAC_VV:
case RISCV::VFWMSAC_VF:
case RISCV::VFWNMSAC_VV:
case RISCV::VFWNMSAC_VF:
case RISCV::VFWMACCBF16_VV:
case RISCV::VFWMACCBF16_VF:
// Vector Floating-Point MIN/MAX Instructions
case RISCV::VFMIN_VF:
case RISCV::VFMIN_VV:
Expand Down
218 changes: 214 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT

; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer

Expand Down Expand Up @@ -4351,3 +4351,213 @@ define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vsca
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}

define <vscale x 4 x double> @vfwmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwmacc_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwmacc_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwmacc.vv v8, v12, v14
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwmacc_vf:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwmacc.vf v8, fa0, v12
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwmacc_vf:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwmacc.vf v8, fa0, v12
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwnmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwnmacc_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwnmacc.vv v8, v12, v14
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwnmacc_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwnmacc.vv v8, v12, v14
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwnmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwnmacc_vf:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwnmacc.vf v8, fa0, v12
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwnmacc_vf:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwnmacc.vf v8, fa0, v12
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwmsac_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwmsac.vv v8, v12, v14
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwmsac_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwmsac.vv v8, v12, v14
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwmsac_vf:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwmsac.vf v8, fa0, v12
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwmsac_vf:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwmsac.vf v8, fa0, v12
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwnmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwnmsac_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwnmsac.vv v8, v12, v14
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwnmsac_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwnmsac.vv v8, v12, v14
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x double> @vfwnmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwnmsac_vf:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vfwnmsac.vf v8, fa0, v12
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwnmsac_vf:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vfwnmsac.vf v8, fa0, v12
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v16
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}

define <vscale x 4 x float> @vfwmaccbf16_vv(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwmaccbf16_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwmaccbf16_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; VLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}

define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) {
; NOVLOPT-LABEL: vfwmaccbf16_vf:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfwmaccbf16_vf:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; VLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}

0 comments on commit 771f6b9

Please sign in to comment.