mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 18:08:40 +02:00
nv50/ir: fix DCE to not generate 96-bit loads
A situation where there's a 128-bit load where the last component gets
DCE'd causes a 96-bit load to be generated, which no GPU can actually
emit. Avoid generating such instructions by scaling back to 64-bit on
the first load when splitting.
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 49692f86a1)
This commit is contained in:
parent
aff9f8a6f7
commit
4ae9142f8b
1 changed files with 31 additions and 1 deletions
|
|
@ -2962,6 +2962,16 @@ DeadCodeElim::visit(BasicBlock *bb)
|
|||
return true;
|
||||
}
|
||||
|
||||
// Each load can go into up to 4 destinations, any of which might potentially
|
||||
// be dead (i.e. a hole). These can always be split into 2 loads, independent
|
||||
// of where the holes are. We find the first contiguous region, put it into
|
||||
// the first load, and then put the second contiguous region into the second
|
||||
// load. There can be at most 2 contiguous regions.
|
||||
//
|
||||
// Note that there are some restrictions, for example it's not possible to do
|
||||
// a 64-bit load that's not 64-bit aligned, so such a load has to be split
|
||||
// up. Also hardware doesn't support 96-bit loads, so those also have to be
|
||||
// split into a 64-bit and 32-bit load.
|
||||
void
|
||||
DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
{
|
||||
|
|
@ -2982,6 +2992,8 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
|||
addr1 = ld1->getSrc(0)->reg.data.offset;
|
||||
n1 = n2 = 0;
|
||||
size1 = size2 = 0;
|
||||
|
||||
// Compute address/width for first load
|
||||
for (d = 0; ld1->defExists(d); ++d) {
|
||||
if (mask & (1 << d)) {
|
||||
if (size1 && (addr1 & 0x7))
|
||||
|
|
@ -2995,16 +3007,34 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale back the size of the first load until it can be loaded. This
|
||||
// typically happens for TYPE_B96 loads.
|
||||
while (n1 &&
|
||||
!prog->getTarget()->isAccessSupported(ld1->getSrc(0)->reg.file,
|
||||
typeOfSize(size1))) {
|
||||
size1 -= def1[--n1]->reg.size;
|
||||
d--;
|
||||
}
|
||||
|
||||
// Compute address/width for second load
|
||||
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
|
||||
if (mask & (1 << d)) {
|
||||
assert(!size2 || !(addr2 & 0x7));
|
||||
def2[n2] = ld1->getDef(d);
|
||||
size2 += def2[n2++]->reg.size;
|
||||
} else {
|
||||
} else if (!n2) {
|
||||
assert(!n2);
|
||||
addr2 += ld1->getDef(d)->reg.size;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that we've processed all the values
|
||||
for (; ld1->defExists(d); ++d)
|
||||
assert(!(mask & (1 << d)));
|
||||
|
||||
updateLdStOffset(ld1, addr1, func);
|
||||
ld1->setType(typeOfSize(size1));
|
||||
for (d = 0; d < 4; ++d)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue