Skip to content

Commit 0873035

Browse files
fangliu2020igcbot
authored andcommitted
Changes in code.
1 parent c80648e commit 0873035

File tree

1 file changed

+58
-11
lines changed

1 file changed

+58
-11
lines changed

visa/HWConformityPro.cpp

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2077,10 +2077,22 @@ void HWConformityPro::fixMadw(INST_LIST_ITER it, G4_BB *bb) {
20772077
mullhDst = builder.duplicateOperand(dst);
20782078
tmpType = dstType;
20792079
} else {
2080-
// If src2 is not 0, then madw will convert to gen mullh + addc + add:
2081-
// mullh (16) mullh_dst<1>:d src0<1;1,0>:d src1<1;1,0>:d
2082-
// addc (16) dst_lo32<1>:d mullh_dst_lo32<1;1,0>:d src2<1;1,0>:d
2083-
// add (16) dst_hi32<1>:d acc0.0<1;1,0>:d mullh_dst_hi32<1;1,0>:d
2080+
// If src2 is not 0 and has unsigned datatype, then madw will convert to gen
2081+
// mullh + addc + add:
2082+
// madw (16) dst<1>:ud src0<1;1,0>:ud src0<1;1,0>:ud src2<1;1,0>:ud
2083+
// =>
2084+
// mullh (16) mullh_dst<1>:ud src0<1;1,0>:ud src0<1;1,0>:ud
2085+
// addc (16) dst_lo32<1>:ud mullh_dst_lo32<1;1,0>:ud src2<1;1,0>:ud
2086+
// add (16) dst_hi32<1>:ud acc0.0<1;1,0>:ud mullh_dst_hi32<1;1,0>:ud
2087+
// If src2 is not 0 and has signed datatype, then madw will convert to gen
2088+
// mullh + addc + mov + add3:
2089+
// madw (16) dst<1>:d src0<1;1,0>:ud src0<1;1,0>:ud src2<1;1,0>:d
2090+
// =>
2091+
// mullh (16) mullh_dst<1>:d src0<1;1,0>:ud src1<1;1,0>:ud
2092+
// addc (16) dst_lo32<1>:ud mullh_dst_lo32<1;1,0>:ud src2<1;1,0>:ud
2093+
// mov (16) signExt<1>:q src2<1;1,0>:d
2094+
// add3 (16) dst_hi32<1>:d signExt.1<2;1,0>:d acc0.0<1;1,0>:d
2095+
// mullh_dst_hi32<1;1,0>:d
20842096
tmpType =
20852097
(IS_UNSIGNED_INT(src0->getType()) && IS_UNSIGNED_INT(src1->getType()) &&
20862098
IS_UNSIGNED_INT(src2->getType()))
@@ -2133,8 +2145,27 @@ void HWConformityPro::fixMadw(INST_LIST_ITER it, G4_BB *bb) {
21332145
addcInst->setOptionOn(InstOpt_AccWrCtrl);
21342146
auto insertIter = bb->insertAfter(it, addcInst);
21352147

2136-
// Create add instruction:
2148+
// If src2 is signed datatype, we need to extend the sign bit of src2 which
2149+
// is the addend for higher 32-bits result calculation:
2150+
// mov (16) signExt<1>:q src2<1;1,0>:d
2151+
G4_Declare *signExtDclQword = nullptr;
2152+
if (src2->getType() == Type_D) {
2153+
signExtDclQword = builder.createTempVar(
2154+
builder.numEltPerGRF(Type_Q) * execSize, Type_Q, builder.getGRFAlign());
2155+
auto movDst = builder.createDstRegRegion(signExtDclQword, 1);
2156+
auto movInst = builder.createMov(
2157+
execSize, movDst, builder.duplicateOperand(src2), origOptions, false);
2158+
movInst->setPredicate(builder.duplicateOperand(origPredicate));
2159+
movInst->setOptionOff(InstOpt_AccWrCtrl);
2160+
insertIter = bb->insertAfter(insertIter, movInst);
2161+
}
2162+
2163+
// Create add or add3 instruction:
2164+
// If src2 is unsigned datatype:
21372165
// add (16) dst_hi32<1>:d acc0.0<1;1,0>:d mullh_dst_hi32<1;1,0>:d
2166+
// Otherwise:
2167+
// add3 (16) dst_hi32<1>:d signExt.1<2;1,0>:d acc0.0<1;1,0>:d
2168+
// mullh_dst_hi32<1;1,0>:d
21382169
int DstHiRegOffset = (int)std::ceil(
21392170
(float)(execSize * dst->getExecTypeSize()) / builder.getGRFSize());
21402171
auto *dstHi32 =
@@ -2145,19 +2176,35 @@ void HWConformityPro::fixMadw(INST_LIST_ITER it, G4_BB *bb) {
21452176
tmpType, builder.getGRFAlign());
21462177
mullhTmpDclHi->setAliasDeclare(mullhTmpDcl,
21472178
mullhDstLowGRFNum * builder.getGRFSize());
2148-
auto src1Add = builder.createSrcRegRegion(
2179+
auto srcAdd = builder.createSrcRegRegion(
21492180
mullhTmpDclHi, execSize == g4::SIMD1 ? builder.getRegionScalar()
21502181
: builder.getRegionStride1());
21512182
auto accSrcOpnd =
21522183
builder.createSrc(builder.phyregpool.getAcc0Reg(), 0, 0,
21532184
execSize == g4::SIMD1 ? builder.getRegionScalar()
21542185
: builder.getRegionStride1(),
21552186
tmpType);
2156-
auto addInst = builder.createBinOp(G4_add, execSize, dstHi32, accSrcOpnd,
2157-
src1Add, origOptions, false);
2158-
addInst->setPredicate(builder.duplicateOperand(origPredicate));
2159-
addInst->setOptionOff(InstOpt_AccWrCtrl);
2160-
bb->insertAfter(insertIter, addInst);
2187+
G4_INST *addOrAdd3Inst = nullptr;
2188+
if (src2->getType() == Type_D) {
2189+
G4_Declare *signExtDclDword =
2190+
builder.createTempVar(builder.numEltPerGRF(tmpType) * execSize * 2,
2191+
tmpType, builder.getGRFAlign());
2192+
signExtDclDword->setAliasDeclare(signExtDclQword, 0);
2193+
auto src0Add3 =
2194+
builder.createSrc(signExtDclDword->getRegVar(), 0, 1,
2195+
execSize == g4::SIMD1 ? builder.getRegionScalar()
2196+
: builder.getRegionStride2(),
2197+
tmpType);
2198+
addOrAdd3Inst = builder.createInternalInst(
2199+
nullptr, G4_add3, nullptr, g4::NOSAT, execSize, dstHi32, src0Add3,
2200+
accSrcOpnd, srcAdd, origOptions);
2201+
} else {
2202+
addOrAdd3Inst = builder.createBinOp(G4_add, execSize, dstHi32, accSrcOpnd,
2203+
srcAdd, origOptions, false);
2204+
}
2205+
addOrAdd3Inst->setPredicate(builder.duplicateOperand(origPredicate));
2206+
addOrAdd3Inst->setOptionOff(InstOpt_AccWrCtrl);
2207+
bb->insertAfter(insertIter, addOrAdd3Inst);
21612208
}
21622209

21632210
// Restrictions for fcvt instruction:

0 commit comments

Comments
 (0)