freedreno/ir3: fix fixing-up register footprint

It isn't just vertex shaders that need to fixup reg footprint for inputs
populated before shader starts.

This problem showed up with compute shaders.  If you have (for example)
a localregid sysval, but only the .x component is used, the hw still
writes the .yz components, which could overflow into other threads
causing corruption.  Showed up in cl cts 'basic/test_basic intmath_int'.
But in theory the same problem could crop up elsewhere.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2018-02-28 17:33:29 -05:00
parent 9a62536108
commit 175d1b4372
2 changed files with 27 additions and 18 deletions

View file

@ -3418,7 +3418,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
so->varying_in++;
so->inputs[i].compmask = (1 << maxcomp) - 1;
inloc += maxcomp;
} else {
} else if (!so->inputs[i].sysval){
so->inputs[i].compmask = compmask;
}
so->inputs[i].regid = regid;

View file

@ -70,26 +70,35 @@ delete_variant(struct ir3_shader_variant *v)
static void
fixup_regfootprint(struct ir3_shader_variant *v)
{
if (v->type == SHADER_VERTEX) {
unsigned i;
for (i = 0; i < v->inputs_count; i++) {
/* skip frag inputs fetch via bary.f since their reg's are
* not written by gpu before shader starts (and in fact the
* regid's might not even be valid)
*/
if (v->inputs[i].bary)
continue;
unsigned i;
if (v->inputs[i].compmask) {
int32_t regid = (v->inputs[i].regid + 3) >> 2;
v->info.max_reg = MAX2(v->info.max_reg, regid);
}
}
for (i = 0; i < v->outputs_count; i++) {
int32_t regid = (v->outputs[i].regid + 3) >> 2;
for (i = 0; i < v->inputs_count; i++) {
/* skip frag inputs fetch via bary.f since their reg's are
* not written by gpu before shader starts (and in fact the
* regid's might not even be valid)
*/
if (v->inputs[i].bary)
continue;
/* ignore high regs that are global to all threads in a warp
* (they exist by default) (a5xx+)
*/
if (v->inputs[i].regid >= regid(48,0))
continue;
if (v->inputs[i].compmask) {
unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
int32_t regid = (v->inputs[i].regid + n) >> 2;
v->info.max_reg = MAX2(v->info.max_reg, regid);
}
} else if (v->type == SHADER_FRAGMENT) {
}
for (i = 0; i < v->outputs_count; i++) {
int32_t regid = (v->outputs[i].regid + 3) >> 2;
v->info.max_reg = MAX2(v->info.max_reg, regid);
}
if (v->type == SHADER_FRAGMENT) {
/* NOTE: not sure how to turn pos_regid off.. but this could
* be, for example, r1.x while max reg used by the shader is
* r0.*, in which case we need to fixup the reg footprint: