@@ -314,8 +314,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
314
314
return V_MOVRELS_B32 (inst);
315
315
case Opcode::V_MOVRELD_B32:
316
316
return V_MOVRELD_B32 (inst);
317
- // case Opcode::V_MOVRELSD_B32:
318
- // return V_MOVRELSD_B32(inst);
317
+ case Opcode::V_MOVRELSD_B32:
318
+ return V_MOVRELSD_B32 (inst);
319
319
case Opcode::V_NOP:
320
320
return ;
321
321
@@ -996,27 +996,21 @@ void Translator::V_FFBH_U32(const GcnInst& inst) {
996
996
SetDst (inst.dst [0 ], IR::U32{ir.Select (cond, pos_from_left, ir.Imm32 (~0U ))});
997
997
}
998
998
999
- // void Translator::VMovRelHelper(const IR::U32 src_idx, const IR::U32 dst_idx) {}
1000
-
1001
- void Translator::V_MOVRELS_B32 (const GcnInst& inst) {
1002
- const IR::U32 m0 = ir.GetM0 ();
1003
- ASSERT (inst.src [0 ].field == OperandField::VectorGPR);
1004
- u32 src_vgrpno = inst.src [0 ].code - static_cast <u32 >(IR::VectorReg::V0);
999
+ // TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of
1000
+ // [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to
1005
1001
1002
+ IR::U32 Translator::VMovRelSHelper (u32 src_vgprno, const IR::U32 m0) {
1003
+ // Read from VGPR0 by default when src_vgprno + m0 > num_allocated_vgprs
1006
1004
IR::U32 src_val = ir.GetVectorReg <IR::U32>(IR::VectorReg::V0);
1007
- for (u32 i = src_vgrpno ; i < info.num_allocated_vgprs ; i++) {
1008
- const IR::U1 cond = ir.IEqual (m0, ir.Imm32 (i - src_vgrpno ));
1005
+ for (u32 i = src_vgprno ; i < info.num_allocated_vgprs ; i++) {
1006
+ const IR::U1 cond = ir.IEqual (m0, ir.Imm32 (i - src_vgprno ));
1009
1007
src_val =
1010
1008
IR::U32{ir.Select (cond, ir.GetVectorReg <IR::U32>(IR::VectorReg::V0 + i), src_val)};
1011
1009
}
1012
- SetDst (inst. dst [ 0 ], src_val) ;
1010
+ return src_val;
1013
1011
}
1014
1012
1015
- void Translator::V_MOVRELD_B32 (const GcnInst& inst) {
1016
- const IR::U32 src_val{GetSrc (inst.src [0 ])};
1017
- IR::U32 m0 = ir.GetM0 ();
1018
- u32 dst_vgprno = inst.dst [0 ].code - static_cast <u32 >(IR::VectorReg::V0);
1019
-
1013
+ void Translator::VMovRelDHelper (u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0) {
1020
1014
for (u32 i = dst_vgprno; i < info.num_allocated_vgprs ; i++) {
1021
1015
const IR::U1 cond = ir.IEqual (m0, ir.Imm32 (i - dst_vgprno));
1022
1016
const IR::U32 dst_val =
@@ -1025,12 +1019,29 @@ void Translator::V_MOVRELD_B32(const GcnInst& inst) {
1025
1019
}
1026
1020
}
1027
1021
1028
- // void Translator::V_MOVRELSD_B32(const GcnInst& inst) {
1029
- // IR::U32 src_idx{GetSrc(inst.src[0])};
1030
- // IR::U32 dst_idx{GetSrc(inst.dst[0])};
1031
- // src_idx = ir.IAdd(src_idx, ir.GetM0());
1032
- // dst_idx = ir.IAdd(dst_idx, ir.GetM0());
1033
- // VMovRelHelper(src_idx, dst_idx);
1034
- // }
1022
+ void Translator::V_MOVRELS_B32 (const GcnInst& inst) {
1023
+ u32 src_vgprno = inst.src [0 ].code - static_cast <u32 >(IR::VectorReg::V0);
1024
+ const IR::U32 m0 = ir.GetM0 ();
1025
+
1026
+ const IR::U32 src_val = VMovRelSHelper (src_vgprno, m0);
1027
+ SetDst (inst.dst [0 ], src_val);
1028
+ }
1029
+
1030
+ void Translator::V_MOVRELD_B32 (const GcnInst& inst) {
1031
+ const IR::U32 src_val{GetSrc (inst.src [0 ])};
1032
+ u32 dst_vgprno = inst.dst [0 ].code - static_cast <u32 >(IR::VectorReg::V0);
1033
+ IR::U32 m0 = ir.GetM0 ();
1034
+
1035
+ VMovRelDHelper (dst_vgprno, src_val, m0);
1036
+ }
1037
+
1038
+ void Translator::V_MOVRELSD_B32 (const GcnInst& inst) {
1039
+ u32 src_vgprno = inst.src [0 ].code - static_cast <u32 >(IR::VectorReg::V0);
1040
+ u32 dst_vgprno = inst.dst [0 ].code - static_cast <u32 >(IR::VectorReg::V0);
1041
+ IR::U32 m0 = ir.GetM0 ();
1042
+
1043
+ const IR::U32 src_val = VMovRelSHelper (src_vgprno, m0);
1044
+ VMovRelDHelper (dst_vgprno, src_val, m0);
1045
+ }
1035
1046
1036
1047
} // namespace Shader::Gcn
0 commit comments