Skip to content

[NVPTX] Cleanup ISel code after float register removal, use BasicNVPTXInst #141711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 29, 2025

Conversation

AlexMaclean
Copy link
Member

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 28, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Patch is 398.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141711.diff

19 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp (-6)
  • (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+46-72)
  • (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+529-728)
  • (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+453-656)
  • (modified) llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp (+4-6)
  • (modified) llvm/test/CodeGen/NVPTX/bf16-instructions.ll (+299-299)
  • (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll (+25-25)
  • (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+2-4)
  • (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+49-59)
  • (modified) llvm/test/CodeGen/NVPTX/fexp2.ll (+60-60)
  • (modified) llvm/test/CodeGen/NVPTX/flog2.ll (+40-40)
  • (modified) llvm/test/CodeGen/NVPTX/fma-relu-contract.ll (+298-298)
  • (modified) llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll (+230-230)
  • (modified) llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll (+478-478)
  • (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+4-6)
  • (modified) llvm/test/CodeGen/NVPTX/inline-asm.ll (+3-3)
  • (modified) llvm/test/CodeGen/NVPTX/math-intrins.ll (+124-124)
  • (modified) llvm/test/CodeGen/NVPTX/param-add.ll (+11-11)
  • (modified) llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir (+7-7)
diff --git a/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp b/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp
index 008209785a683..cd404819cb837 100644
--- a/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp
@@ -53,16 +53,10 @@ static bool traverseMoveUse(MachineInstr &U, const MachineRegisterInfo &MRI,
                             SmallVectorImpl<MachineInstr *> &RemoveList,
                             SmallVectorImpl<MachineInstr *> &LoadInsts) {
   switch (U.getOpcode()) {
-  case NVPTX::LD_f32:
-  case NVPTX::LD_f64:
   case NVPTX::LD_i16:
   case NVPTX::LD_i32:
   case NVPTX::LD_i64:
   case NVPTX::LD_i8:
-  case NVPTX::LDV_f32_v2:
-  case NVPTX::LDV_f32_v4:
-  case NVPTX::LDV_f64_v2:
-  case NVPTX::LDV_f64_v4:
   case NVPTX::LDV_i16_v2:
   case NVPTX::LDV_i16_v4:
   case NVPTX::LDV_i32_v2:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index b05a4713e6340..b1f653f9c3aed 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1015,33 +1015,29 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
 
 // Helper function template to reduce amount of boilerplate code for
 // opcode selection.
-static std::optional<unsigned> pickOpcodeForVT(
-    MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i8,
-    std::optional<unsigned> Opcode_i16, std::optional<unsigned> Opcode_i32,
-    std::optional<unsigned> Opcode_i64, std::optional<unsigned> Opcode_f32,
-    std::optional<unsigned> Opcode_f64) {
+static std::optional<unsigned>
+pickOpcodeForVT(MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i8,
+                std::optional<unsigned> Opcode_i16,
+                std::optional<unsigned> Opcode_i32,
+                std::optional<unsigned> Opcode_i64) {
   switch (VT) {
   case MVT::i1:
   case MVT::i8:
     return Opcode_i8;
-  case MVT::i16:
-    return Opcode_i16;
-  case MVT::i32:
-    return Opcode_i32;
-  case MVT::i64:
-    return Opcode_i64;
   case MVT::f16:
+  case MVT::i16:
   case MVT::bf16:
     return Opcode_i16;
   case MVT::v2f16:
   case MVT::v2bf16:
   case MVT::v2i16:
   case MVT::v4i8:
-    return Opcode_i32;
+  case MVT::i32:
   case MVT::f32:
-    return Opcode_f32;
+    return Opcode_i32;
+  case MVT::i64:
   case MVT::f64:
-    return Opcode_f64;
+    return Opcode_i64;
   default:
     return std::nullopt;
   }
@@ -1101,9 +1097,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
                    Chain};
 
   const MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
-  const std::optional<unsigned> Opcode =
-      pickOpcodeForVT(TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32,
-                      NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64);
+  const std::optional<unsigned> Opcode = pickOpcodeForVT(
+      TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32, NVPTX::LD_i64);
   if (!Opcode)
     return false;
 
@@ -1203,22 +1198,19 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
   default:
     return false;
   case NVPTXISD::LoadV2:
-    Opcode =
-        pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2,
-                        NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2,
-                        NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2);
+    Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2,
+                             NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2,
+                             NVPTX::LDV_i64_v2);
     break;
   case NVPTXISD::LoadV4:
-    Opcode =
-        pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4,
-                        NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, NVPTX::LDV_i64_v4,
-                        NVPTX::LDV_f32_v4, NVPTX::LDV_f64_v4);
+    Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4,
+                             NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4,
+                             NVPTX::LDV_i64_v4);
     break;
   case NVPTXISD::LoadV8:
     Opcode =
         pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, {/* no v8i8 */},
-                        {/* no v8i16 */}, NVPTX::LDV_i32_v8, {/* no v8i64 */},
-                        NVPTX::LDV_f32_v8, {/* no v8f64 */});
+                        {/* no v8i16 */}, NVPTX::LDV_i32_v8, {/* no v8i64 */});
     break;
   }
   if (!Opcode)
@@ -1286,48 +1278,42 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
     Opcode = pickOpcodeForVT(
         EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8,
         NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
-        NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
-        NVPTX::INT_PTX_LDG_GLOBAL_f64);
+        NVPTX::INT_PTX_LDG_GLOBAL_i64);
     break;
   case ISD::INTRINSIC_W_CHAIN:
     Opcode = pickOpcodeForVT(
         EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8,
         NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
-        NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
-        NVPTX::INT_PTX_LDU_GLOBAL_f64);
+        NVPTX::INT_PTX_LDU_GLOBAL_i64);
     break;
   case NVPTXISD::LoadV2:
     Opcode = pickOpcodeForVT(
         EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE,
         NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
-        NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
-        NVPTX::INT_PTX_LDG_G_v2f64_ELE);
+        NVPTX::INT_PTX_LDG_G_v2i64_ELE);
     break;
   case NVPTXISD::LDUV2:
     Opcode = pickOpcodeForVT(
         EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE,
         NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
-        NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
-        NVPTX::INT_PTX_LDU_G_v2f64_ELE);
+        NVPTX::INT_PTX_LDU_G_v2i64_ELE);
     break;
   case NVPTXISD::LoadV4:
     Opcode = pickOpcodeForVT(
         EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE,
         NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
-        NVPTX::INT_PTX_LDG_G_v4i64_ELE, NVPTX::INT_PTX_LDG_G_v4f32_ELE,
-        NVPTX::INT_PTX_LDG_G_v4f64_ELE);
+        NVPTX::INT_PTX_LDG_G_v4i64_ELE);
     break;
   case NVPTXISD::LDUV4:
-    Opcode = pickOpcodeForVT(
-        EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE,
-        NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
-        {/* no v4i64 */}, NVPTX::INT_PTX_LDU_G_v4f32_ELE, {/* no v4f64 */});
+    Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                             NVPTX::INT_PTX_LDU_G_v4i8_ELE,
+                             NVPTX::INT_PTX_LDU_G_v4i16_ELE,
+                             NVPTX::INT_PTX_LDU_G_v4i32_ELE, {/* no v4i64 */});
     break;
   case NVPTXISD::LoadV8:
     Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, {/* no v8i8 */},
                              {/* no v8i16 */}, NVPTX::INT_PTX_LDG_G_v8i32_ELE,
-                             {/* no v8i64 */}, NVPTX::INT_PTX_LDG_G_v8f32_ELE,
-                             {/* no v8f64 */});
+                             {/* no v8i64 */});
     break;
   }
   if (!Opcode)
@@ -1421,9 +1407,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
 
   const MVT::SimpleValueType SourceVT =
       Value.getNode()->getSimpleValueType(0).SimpleTy;
-  const std::optional<unsigned> Opcode =
-      pickOpcodeForVT(SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32,
-                      NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64);
+  const std::optional<unsigned> Opcode = pickOpcodeForVT(
+      SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
   if (!Opcode)
     return false;
 
@@ -1486,22 +1471,19 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
   default:
     return false;
   case NVPTXISD::StoreV2:
-    Opcode =
-        pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2,
-                        NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2,
-                        NVPTX::STV_f32_v2, NVPTX::STV_f64_v2);
+    Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2,
+                             NVPTX::STV_i16_v2, NVPTX::STV_i32_v2,
+                             NVPTX::STV_i64_v2);
     break;
   case NVPTXISD::StoreV4:
-    Opcode =
-        pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4,
-                        NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, NVPTX::STV_i64_v4,
-                        NVPTX::STV_f32_v4, NVPTX::STV_f64_v4);
+    Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4,
+                             NVPTX::STV_i16_v4, NVPTX::STV_i32_v4,
+                             NVPTX::STV_i64_v4);
     break;
   case NVPTXISD::StoreV8:
     Opcode =
         pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, {/* no v8i8 */},
-                        {/* no v8i16 */}, NVPTX::STV_i32_v8, {/* no v8i64 */},
-                        NVPTX::STV_f32_v8, {/* no v8f64 */});
+                        {/* no v8i16 */}, NVPTX::STV_i32_v8, {/* no v8i64 */});
     break;
   }
 
@@ -1550,21 +1532,18 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
   case 1:
     Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
                              NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
-                             NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
-                             NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
+                             NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64);
     break;
   case 2:
     Opcode =
         pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
                         NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
-                        NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
-                        NVPTX::LoadParamMemV2F64);
+                        NVPTX::LoadParamMemV2I64);
     break;
   case 4:
     Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
                              NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16,
-                             NVPTX::LoadParamMemV4I32, {/* no v4i64 */},
-                             NVPTX::LoadParamMemV4F32, {/* no v4f64 */});
+                             NVPTX::LoadParamMemV4I32, {/* no v4i64 */});
     break;
   }
   if (!Opcode)
@@ -1628,8 +1607,7 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
   case 1:
     Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
                              NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
-                             NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
-                             NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
+                             NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64);
     if (Opcode == NVPTX::StoreRetvalI8) {
       // Fine tune the opcode depending on the size of the operand.
       // This helps to avoid creating redundant COPY instructions in
@@ -1649,14 +1627,12 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
   case 2:
     Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
                              NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
-                             NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
-                             NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
+                             NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64);
     break;
   case 4:
     Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
                              NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
-                             NVPTX::StoreRetvalV4I32, {/* no v4i64 */},
-                             NVPTX::StoreRetvalV4F32, {/* no v4f64 */});
+                             NVPTX::StoreRetvalV4I32, {/* no v4i64 */});
     break;
   }
   if (!Opcode)
@@ -1827,14 +1803,12 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
         // Use immediate version of store param
         Opcode = pickOpcodeForVT(MemTy, NVPTX::StoreParamI8_i,
                                  NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
-                                 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
-                                 NVPTX::StoreParamF64_i);
+                                 NVPTX::StoreParamI64_i);
       } else
         Opcode =
             pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
                             NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
-                            NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
-                            NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
+                            NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r);
       if (Opcode == NVPTX::StoreParamI8_r) {
         // Fine tune the opcode depending on the size of the operand.
         // This helps to avoid creating redundant COPY instructions in
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index da50c1fa68b69..883a2ddf80d4c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -208,6 +208,9 @@ class ValueToRegClass<ValueType T> {
 
 class OneUse1<SDPatternOperator operator>
     : PatFrag<(ops node:$A), (operator node:$A), [{ return N->hasOneUse(); }]>;
+class OneUse2<SDPatternOperator operator>
+    : PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>;
+
 
 class fpimm_pos_inf<ValueType vt>
     : FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
@@ -282,22 +285,20 @@ class BasicNVPTXInst<dag outs, dag insv, string asmstr, list<dag> pattern = []>
 
 multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
                   bit commutative, list<Predicate> requires = []> {
-  defvar asmstr = op_str # " \t$dst, $a, $b;";
-
   def rr :
-    NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
-              asmstr,
+    BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
+              op_str,
               [(set t.Ty:$dst, (op_node t.Ty:$a, t.Ty:$b))]>,
               Requires<requires>;
   def ri :
-    NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
-              asmstr,
+    BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
+              op_str,
               [(set t.Ty:$dst, (op_node t.Ty:$a, (t.Ty imm:$b)))]>,
               Requires<requires>;
   if !not(commutative) then
     def ir :
-      NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
-                asmstr,
+      BasicNVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
+                op_str,
                 [(set t.Ty:$dst, (op_node (t.Ty imm:$a), t.Ty:$b))]>,
                 Requires<requires>;
 }
@@ -310,8 +311,8 @@ multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
 }
 
 class I16x2<string OpcStr, SDNode OpNode> :
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
-              !strconcat(OpcStr, "16x2 \t$dst, $a, $b;"),
+  BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+              OpcStr # "16x2",
               [(set v2i16:$dst, (OpNode v2i16:$a, v2i16:$b))]>,
               Requires<[hasPTX<80>, hasSM<90>]>;
 
@@ -332,74 +333,74 @@ multiclass ADD_SUB_INT_CARRY<string op_str, SDNode op_node, bit commutative> {
 multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
   if !not(NaN) then {
    def f64rr :
-     NVPTXInst<(outs Float64Regs:$dst),
+     BasicNVPTXInst<(outs Float64Regs:$dst),
                (ins Float64Regs:$a, Float64Regs:$b),
-               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+               OpcStr # ".f64",
                [(set f64:$dst, (OpNode f64:$a, f64:$b))]>;
    def f64ri :
-     NVPTXInst<(outs Float64Regs:$dst),
+     BasicNVPTXInst<(outs Float64Regs:$dst),
                (ins Float64Regs:$a, f64imm:$b),
-               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+               OpcStr # ".f64",
                [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>;
   }
    def f32rr_ftz :
-     NVPTXInst<(outs Float32Regs:$dst),
+     BasicNVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, Float32Regs:$b),
-               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+               OpcStr # ".ftz.f32",
                [(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
                Requires<[doF32FTZ]>;
    def f32ri_ftz :
-     NVPTXInst<(outs Float32Regs:$dst),
+     BasicNVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, f32imm:$b),
-               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+               OpcStr # ".ftz.f32",
                [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
                Requires<[doF32FTZ]>;
    def f32rr :
-     NVPTXInst<(outs Float32Regs:$dst),
+     BasicNVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, Float32Regs:$b),
-               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+               OpcStr # ".f32",
                [(set f32:$dst, (OpNode f32:$a, f32:$b))]>;
    def f32ri :
-     NVPTXInst<(outs Float32Regs:$dst),
+     BasicNVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, f32imm:$b),
-               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+               OpcStr # ".f32",
                [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>;
 
    def f16rr_ftz :
-     NVPTXInst<(outs Int16Regs:$dst),
+     BasicNVPTXInst<(outs Int16Regs:$dst),
                (ins Int16Regs:$a, Int16Regs:$b),
-               !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
+               OpcStr # ".ftz.f16",
                [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
                Requires<[useFP16Math, doF32FTZ]>;
    def f16rr :
-     NVPTXInst<(outs Int16Regs:$dst),
+     BasicNVPTXInst<(outs Int16Regs:$dst),
                (ins Int16Regs:$a, Int16Regs:$b),
-               !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
+               OpcStr # ".f16",
                [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
                Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
 
    def f16x2rr_ftz :
-     NVPTXInst<(outs Int32Regs:$dst),
+     BasicNVPTXInst<(outs Int32Regs:$dst),
                (ins Int32Regs:$a, Int32Regs:$b),
-               !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
+               OpcStr # ".ftz.f16x2",
                [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
                Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>;
    def f16x2rr :
-     NVPTXInst<(outs Int32Regs:$dst),
+     BasicNVPTXInst<(outs Int32Regs:$dst),
                (ins Int32Regs:$a, Int32Regs:$b),
-               !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
+               OpcStr # ".f16x2",
                [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
                Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
    def bf16rr :
-     NVPTXInst<(outs Int16Regs:$dst),
+     BasicNVPTXInst<(outs Int16Regs:$dst),
                (ins Int16Regs:$a, Int16Regs:$b),
-               !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
+               OpcStr # ".bf16",
                [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
                Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
    def bf16x2rr :
-     NVPTXInst<(outs Int32Regs:$dst),
+     BasicNVPTXInst<(outs Int32Regs:$dst),
                (ins Int32Regs:$a, Int32Regs:$b),
-               !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
+               OpcStr # ".bf16x2",
                [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
                Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
 }
@@ -415,74 +416,74 @@ multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
 // just like the non ".rn" op, but prevents ptxas from creating FMAs.
 multiclass F3<string op_str, SDPatternOperator op_pat> {
   def f64rr :
-    NVPTXInst<(outs Float64Regs:$dst),
+    BasicNVPTXInst<(outs Float64Regs:$dst),
               (ins Float64Regs:$a, Float64Regs:$b),
-              op_str # ".f64 \t$dst, $a, $b;",
+              op_str # ".f64",
               [(set f64:$dst, (op_pat f64:$a, f64:$b))]>;
   def f64ri :
-    NVPTXInst<(outs Float64Regs:$dst),
+    BasicNVPTXInst<(outs Float64Regs:$dst),
               (ins Float64Regs:$a, f64imm:$b),
-              op_str # ".f64 \t$dst, $a, ...
[truncated]

@AlexMaclean AlexMaclean force-pushed the dev/amaclean/upstream/misc-cleanup branch from cee85fa to e96fee7 Compare May 28, 2025 02:47
@AlexMaclean AlexMaclean merged commit c3bbc06 into llvm:main May 29, 2025
9 of 11 checks passed
google-yfyang pushed a commit to google-yfyang/llvm-project that referenced this pull request May 29, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants