Skip to content

Commit 534b6fe

Browse files
committed
[AMDGPU] Limit allocation of lo128 registers for occupancy
Parent change allows allocation of lo128 VGPRs from all 4 banks. That may result in the undesired allocation leaving a hole of maximum 128 registers in case if for example v0-v127 are allocated, and v128-v255 are free. Limit the available allocation order to the occupancy. Both hard occupancy limits and occupancy achieved during scheduling are considered. That is better to spill a register than to drop occupancy in this case.
1 parent 1e82d04 commit 534b6fe

File tree

3 files changed

+141
-4
lines changed

3 files changed

+141
-4
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -639,8 +639,20 @@ def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg1
639639
let AltOrders = [(add (sequence "VGPR%u", 0, 127),
640640
(sequence "VGPR%u", 256, 383),
641641
(sequence "VGPR%u", 512, 639),
642-
(sequence "VGPR%u", 768, 895))];
643-
let AltOrderSelect = [{ return 1; }];
642+
(sequence "VGPR%u", 768, 895)),
643+
(add (sequence "VGPR%u", 0, 127),
644+
(sequence "VGPR%u", 256, 383),
645+
(sequence "VGPR%u", 512, 639)),
646+
(add (sequence "VGPR%u", 0, 127),
647+
(sequence "VGPR%u", 256, 383)),
648+
(add (sequence "VGPR%u", 0, 127))];
649+
let AltOrderSelect = [{
650+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
651+
unsigned N = ST.getMaxNumVGPRs(MF);
652+
unsigned SchedWaves = MF.getInfo<SIMachineFunctionInfo>()->getMinAllowedOccupancy();
653+
N = std::min(N, ST.getMaxNumVGPRs(SchedWaves, 0));
654+
return N > 768 ? 1 : N > 512 ? 2 : N > 256 ? 3 : 4;
655+
}];
644656
let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
645657
let GeneratePressureSet = 0;
646658
let Size = 32;

0 commit comments

Comments
 (0)