Skip to content

Commit 1e82d04

Browse files
committed
[AMDGPU] Allow allocation of lo128 registers from all banks
We can encode 16-bit operands in a short form for VGPRs [0..127]. When we have 1K registers available we can in fact allocate 4 times more from all 4 banks. That, however, requires an allocatable class for these operands. When for most of the instructions it will result in the VOP3 longer form, for V_FMAAMK/FMADAK_F16 it will simply prohibit the encoding because these do not have VOP3 forms. A straight forward solution would be to create a register class with all registers having bit 8 of the encoding zero, i.e. to create a register class with holes punched in it: [0-127, 256-383, 512-639, 768-895]. LLVM, however, does not like register classes with punched holes when they also have subregisters. The cross- product of all classes explodes and some combinations of a 'class having a common subreg with another' becomeing impossible. Just doing so explodes our register info to 4+Gb, uncompilable too. The solution proposed is to define _lo128 RC with contigous 896 VGPRs, but the allocation order of it hiding prohibited registers. That keeps generated register info a reasonable size (+~50%). The same is needed to VGPR_16_Lo128 in true16 mode. In general we could later reuse VGPR_32 with AltOrderSelect, but we would need to pass there operand type and deal with the AsmParser. One other consideration is that we can allocate a register leaving a hole of the whole 128 registers, but a subsequent patch can fix it, i.e. by the time of the RA we really know estimated register pressure and can further limit allocation order. For the short: w/o it we either have spilling when we have VGPRs available, or outright have 'run out of registers' when we have a lot of 16-bit registers used, a lot of budget, but we cannot encode it.
1 parent ab665f2 commit 1e82d04

File tree

45 files changed

+1018
-747
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1018
-747
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
426426
return isRegOrInline(RCID, type) && !hasModifiers();
427427
}
428428

429+
bool isVGPR32_Lo128() const;
430+
429431
bool isSCSrcB16() const {
430432
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
431433
}
@@ -2243,7 +2245,19 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
22432245
}
22442246

22452247
bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2246-
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2248+
if (!isRegKind() ||
2249+
!AsmParser->getMRI()->getRegClass(RCID).contains(getReg()))
2250+
return false;
2251+
if (RCID == AMDGPU::VGPR_32_Lo128RegClassID ||
2252+
RCID == AMDGPU::VS_32_Lo128RegClassID)
2253+
return !AMDGPU::isHi128VGPR32(getReg());
2254+
return true;
2255+
}
2256+
2257+
bool AMDGPUOperand::isVGPR32_Lo128() const {
2258+
if (!isRegKind())
2259+
return false;
2260+
return AMDGPU::isLo128VGPR32(getReg());
22472261
}
22482262

22492263
bool AMDGPUOperand::isVRegWithInputMods() const {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,11 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
813813
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
814814
const MCRegisterClass &RC = MRI.getRegClass(RCID);
815815
auto Reg = mc2PseudoReg(Op.getReg());
816-
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
816+
bool Err = !RC.contains(Reg) && !isInlineValue(Reg);
817+
if (!Err && (RCID == AMDGPU::VGPR_32_Lo128RegClassID ||
818+
RCID == AMDGPU::VS_32_Lo128RegClassID))
819+
Err = AMDGPU::isHi128VGPR32(Reg);
820+
if (Err) {
817821
bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
818822
(OpInfo.RegClass == AMDGPU::SReg_1 ||
819823
OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);

llvm/lib/Target/AMDGPU/SIInstrFormats.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,12 @@ def CPolBit {
323323

324324
class VOPDstOperand<RegisterClassLike rc> : RegisterOperand<rc, "printVOPDst">;
325325

326+
def Vgpr32Lo128 : AsmOperandClass {
327+
let Name = "Vgpr32Lo128";
328+
let PredicateMethod = "isVGPR32_Lo128";
329+
let RenderMethod = "addRegOperands";
330+
}
331+
326332
def VOPDstOperand_t16 : VOPDstOperand <VGPR_16> {
327333
let EncoderMethod = "getMachineOpValueT16";
328334
let DecoderMethod = "DecodeVGPR_16RegisterClass";
@@ -333,12 +339,27 @@ def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
333339
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
334340
}
335341

342+
def VOPDstOperand_Vgpr32Lo128 : VOPDstOperand <VGPR_32_Lo128> {
343+
let ParserMatchClass = Vgpr32Lo128;
344+
}
345+
336346
// Source-encoded destination operand for instructions like v_swap_b16.
337347
def VOPSrcEncodedDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
338348
let EncoderMethod = VSrcT_b16_Lo128.EncoderMethod;
339349
let DecoderMethod = VSrcT_b16_Lo128.DecoderMethod;
340350
}
341351

352+
353+
def VGPROp_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
354+
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
355+
let EncoderMethod = "getMachineOpValueT16Lo128";
356+
}
357+
358+
def VGPROp_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
359+
let DecoderMethod = "DecodeVGPR_32RegisterClass";
360+
let ParserMatchClass = Vgpr32Lo128;
361+
}
362+
342363
class VINTRPe <bits<2> op> : Enc32 {
343364
bits<8> vdst;
344365
bits<8> vsrc;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1825,7 +1825,7 @@ class getVALUDstForVT_fake16<ValueType VT> {
18251825
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
18261826
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128_AlignTarget>,
18271827
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64_AlignTarget>,
1828-
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
1828+
!if(!eq(VT.Size, 16), VOPDstOperand_Vgpr32Lo128,
18291829
VOPDstS64orS32)))); // else VT == i1
18301830
}
18311831

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -632,13 +632,20 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
632632
let BaseClassOrder = 32;
633633
}
634634

635-
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
635+
// Identical to VGPR_32 except only the low 128 (Lo128) registers in each
636+
// register bank are allocatable.
636637
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
637-
(add (sequence "VGPR%u", 0, 127))> {
638+
(add (sequence "VGPR%u", 0, 895))> {
639+
let AltOrders = [(add (sequence "VGPR%u", 0, 127),
640+
(sequence "VGPR%u", 256, 383),
641+
(sequence "VGPR%u", 512, 639),
642+
(sequence "VGPR%u", 768, 895))];
643+
let AltOrderSelect = [{ return 1; }];
638644
let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
639645
let GeneratePressureSet = 0;
640646
let Size = 32;
641647
let Weight = 1;
648+
let BaseClassOrder = 33;
642649
}
643650

644651
// Identical to VGPR_32 except it only contains the low 256 (Lo256) registers.
@@ -1487,15 +1494,6 @@ foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "320", "35
14871494
def VGPROp_#size#_Align2 : RegisterOperand<!cast<RegisterClassLike>("VReg_"#size#_Align2)>;
14881495
}
14891496

1490-
def VGPROp_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
1491-
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
1492-
let EncoderMethod = "getMachineOpValueT16Lo128";
1493-
}
1494-
1495-
def VGPROp_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
1496-
let DecoderMethod = "DecodeVGPR_32RegisterClass";
1497-
}
1498-
14991497
//===----------------------------------------------------------------------===//
15001498
// ASrc_* Operands with an AccVGPR
15011499
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
157157
assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
158158
"True16 Instructions post-RA");
159159
if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
160-
!AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
160+
!llvm::is_contained(
161+
AMDGPU::VGPR_32_Lo128RegClass.getRawAllocationOrder(*MF), Reg))
161162
return false;
162163

163164
if (AMDGPU::VGPR_16RegClass.contains(Reg) &&

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3632,6 +3632,17 @@ bool isPackedFP32Inst(unsigned Opc) {
36323632
}
36333633
}
36343634

3635+
bool isLo128VGPR32(MCPhysReg R) {
3636+
return R >= AMDGPU::VGPR0 && R <= AMDGPU::VGPR127;
3637+
}
3638+
3639+
bool isHi128VGPR32(MCPhysReg R) {
3640+
return ((R >= AMDGPU::VGPR128 && R < AMDGPU::VGPR256) ||
3641+
(R >= AMDGPU::VGPR384 && R < AMDGPU::VGPR512) ||
3642+
(R >= AMDGPU::VGPR640 && R < AMDGPU::VGPR768) ||
3643+
(R >= AMDGPU::VGPR896 && R < AMDGPU::VGPR1023));
3644+
}
3645+
36353646
const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
36363647
assert(isFixedDims() && "expect kind to be FixedDims");
36373648
return Dims;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,6 +1828,13 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
18281828
/// must be defined in terms of bytes.
18291829
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
18301830

1831+
/// @return true is register \R belongs to the range of [v0-v127].
1832+
bool isLo128VGPR32(MCPhysReg R);
1833+
1834+
/// @return true is register \R belongs to the range of [v128-v255] or it is any
1835+
/// other VGPR with bit 8 of address equal to 1, for example [v384-v511].
1836+
bool isHi128VGPR32(MCPhysReg R);
1837+
18311838
class ClusterDimsAttr {
18321839
public:
18331840
enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
6666
define i32 @asm_vgpr_early_clobber() {
6767
; CHECK-LABEL: name: asm_vgpr_early_clobber
6868
; CHECK: bb.1 (%ir-block.0):
69-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1245195 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1245195 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
69+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1376267 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1376267 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
7070
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
7171
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
7272
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
@@ -94,7 +94,7 @@ entry:
9494
define i32 @test_single_vgpr_output() nounwind {
9595
; CHECK-LABEL: name: test_single_vgpr_output
9696
; CHECK: bb.1.entry:
97-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8
97+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8
9898
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
9999
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
100100
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -106,7 +106,7 @@ entry:
106106
define i32 @test_single_sgpr_output_s32() nounwind {
107107
; CHECK-LABEL: name: test_single_sgpr_output_s32
108108
; CHECK: bb.1.entry:
109-
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8
109+
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
110110
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
111111
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
112112
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -119,7 +119,7 @@ entry:
119119
define float @test_multiple_register_outputs_same() #0 {
120120
; CHECK-LABEL: name: test_multiple_register_outputs_same
121121
; CHECK: bb.1 (%ir-block.0):
122-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8, 1245194 /* regdef:VGPR_32 */, def %9
122+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8, 1376266 /* regdef:VGPR_32 */, def %9
123123
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
124124
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
125125
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
@@ -136,7 +136,7 @@ define float @test_multiple_register_outputs_same() #0 {
136136
define double @test_multiple_register_outputs_mixed() #0 {
137137
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
138138
; CHECK: bb.1 (%ir-block.0):
139-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %8, 2818058 /* regdef:VReg_64 */, def %9
139+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %8, 2883594 /* regdef:VReg_64 */, def %9
140140
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
141141
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9
142142
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
@@ -171,7 +171,7 @@ define amdgpu_kernel void @test_input_vgpr_imm() {
171171
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
172172
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
173173
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32)
174-
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY1]]
174+
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1376265 /* reguse:VGPR_32 */, [[COPY1]]
175175
; CHECK-NEXT: S_ENDPGM 0
176176
call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42)
177177
ret void
@@ -185,7 +185,7 @@ define amdgpu_kernel void @test_input_sgpr_imm() {
185185
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
186186
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
187187
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32)
188-
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:SReg_32 */, [[COPY1]]
188+
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:SReg_32 */, [[COPY1]]
189189
; CHECK-NEXT: S_ENDPGM 0
190190
call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42)
191191
ret void
@@ -212,7 +212,7 @@ define float @test_input_vgpr(i32 %src) nounwind {
212212
; CHECK-NEXT: {{ $}}
213213
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
214214
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
215-
; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %9, 1245193 /* reguse:VGPR_32 */, [[COPY1]]
215+
; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %9, 1376265 /* reguse:VGPR_32 */, [[COPY1]]
216216
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
217217
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
218218
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -227,7 +227,7 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind {
227227
; CHECK-NEXT: liveins: $vgpr0
228228
; CHECK-NEXT: {{ $}}
229229
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
230-
; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1245194 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
230+
; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1376266 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
231231
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
232232
; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
233233
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -244,7 +244,7 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
244244
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
245245
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
246246
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
247-
; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1245194 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
247+
; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1376266 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
248248
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11
249249
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
250250
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -256,13 +256,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
256256
define i32 @test_sgpr_matching_constraint() nounwind {
257257
; CHECK-LABEL: name: test_sgpr_matching_constraint
258258
; CHECK: bb.1.entry:
259-
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8
259+
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
260260
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
261-
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %10
261+
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %10
262262
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10
263263
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32)
264264
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
265-
; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %12, 1835017 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
265+
; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %12, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
266266
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12
267267
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32)
268268
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -285,7 +285,7 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
285285
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
286286
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
287287
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
288-
; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1245194 /* regdef:VGPR_32 */, def %11, 1245194 /* regdef:VGPR_32 */, def %12, 1245194 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
288+
; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1376266 /* regdef:VGPR_32 */, def %11, 1376266 /* regdef:VGPR_32 */, def %12, 1376266 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
289289
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11
290290
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12
291291
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13
@@ -306,10 +306,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
306306
define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
307307
; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint
308308
; CHECK: bb.1.entry:
309-
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:SReg_32 */, def %8
309+
; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
310310
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
311311
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
312-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
312+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
313313
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
314314
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
315315
; CHECK-NEXT: SI_RETURN implicit $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-ignore-copies-crash.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ body: |
2424
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
2525
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
2626
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
27-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %5(s32)
27+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %5(s32)
2828
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
2929
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[FMUL]], %5, [[COPY2]]
3030
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
@@ -33,7 +33,7 @@ body: |
3333
%2:vgpr(s32) = COPY %1(s32)
3434
%3:vgpr(s32) = G_FMUL %0, %2
3535
%4:sgpr(s32) = G_FCONSTANT float 1.000000e+00
36-
INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1245194 /* regdef:VGPR_32 */, def %5:vgpr_32
36+
INLINEASM &"v_mov_b32 $0, 0", 0 /* attdialect */, 1376266 /* regdef:VGPR_32 */, def %5:vgpr_32
3737
%6:vgpr(s32) = COPY %4(s32)
3838
%7:vgpr(s32) = nnan G_AMDGPU_FMED3 %3(s32), %5(s32), %6(s32)
3939
$vgpr0 = COPY %7(s32)

0 commit comments

Comments
 (0)