nvc0/ir: fix second tex argument after levelZero optimization

We used to pre-set a bunch of extra arguments to a texture instruction
in order to force the RA to allocate a register at the boundary of 4.
However with the levelZero optimization, which removes a LOD argument
when it's uniformly equal to zero, we undid that logic by removing an
extra argument. As a result, we could end up with insufficient alignment
on the second wide texture argument.

Instead we switch to a different method of achieving the same result.
The logic runs during the constraint analysis of the RA, and adds unset
sources as necessary right before being merged into a wide argument.

Fixes MISALIGNED_REG errors in Hitman when run with bindless textures
enabled on a GK208.

Fixes: 9145873b15 ("nvc0/ir: use levelZero flag when the lod is set to 0")
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: 19.0 <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 5de5beedf2)
This commit is contained in:
Ilia Mirkin 2019-02-02 02:56:48 -05:00 committed by Dylan Baker
parent 94f0908216
commit 36d99d9ad0
2 changed files with 24 additions and 25 deletions

View file

@ -1063,22 +1063,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
}
}
if (chipset >= NVISA_GK104_CHIPSET) {
//
// If TEX requires more than 4 sources, the 2nd register tuple must be
// aligned to 4, even if it consists of just a single 4-byte register.
//
// XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case.
//
int s = i->srcCount(0xff, true);
if (s > 4 && s < 7) {
if (i->srcExists(s)) // move potential predicate out of the way
i->moveSources(s, 7 - s);
while (s < 7)
i->setSrc(s++, bld.loadImm(NULL, 0));
}
}
return true;
}

View file

@ -2341,9 +2341,19 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
if (!tex->tex.target.isArray() && tex->tex.useOffsets)
s++;
}
n = tex->srcCount(0xff) - s;
n = tex->srcCount(0xff, true) - s;
// TODO: Is this necessary? Perhaps just has to be aligned to the
// level that the first arg is, not necessarily to 4. This
// requirement has not been rigorously verified, as it has been on
// Kepler.
if (n > 0 && n < 3) {
if (tex->srcExists(n + s)) // move potential predicate out of the way
tex->moveSources(n + s, 3 - n);
while (n < 3)
tex->setSrc(s + n++, new_LValue(func, FILE_GPR));
}
} else {
s = tex->srcCount(0xff);
s = tex->srcCount(0xff, true);
n = 0;
}
@ -2366,14 +2376,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
} else
if (isTextureOp(tex->op)) {
int n = tex->srcCount(0xff, true);
if (n > 4) {
condenseSrcs(tex, 0, 3);
if (n > 5) // NOTE: first call modified positions already
condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
} else
if (n > 1) {
condenseSrcs(tex, 0, n - 1);
int s = n > 4 ? 4 : n;
if (n > 4 && n < 7) {
if (tex->srcExists(n)) // move potential predicate out of the way
tex->moveSources(n, 7 - n);
while (n < 7)
tex->setSrc(n++, new_LValue(func, FILE_GPR));
}
if (s > 1)
condenseSrcs(tex, 0, s - 1);
if (n > 4)
condenseSrcs(tex, 1, n - s);
}
}
@ -2510,6 +2524,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s)
assert(cst->getSrc(s)->defs.size() == 1); // still SSA
Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
bool imm = defi->op == OP_MOV &&
defi->src(0).getFile() == FILE_IMMEDIATE;
bool load = defi->op == OP_LOAD &&