broadcom: disable perquad tmu loads after discards

Otherwise we may emit a load from an invalid offset from
a lane that was discarded.

This fixes an simulator assert from triggering when
executing:
dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_null_pointer_load

That test emits a conditional kill and then a buffer load
which would have invalid offsets for the lines killed. Since
the buffer load is in uniform control flow we were incorrectly
emitting a full quad load, including disabled lanes which would
prompt the simulator to assert on invalid offsets being loaded
coming from the lanes that had been killed in the shader.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26683>
This commit is contained in:
Iago Toral Quiroga 2023-12-14 09:47:48 +01:00 committed by Marge Bot
parent 2b04fb410f
commit 716847a77d
2 changed files with 15 additions and 4 deletions

View file

@ -648,10 +648,14 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
v3d_tmu_get_type_from_op(tmu_op, !is_load) ==
V3D_TMU_OP_TYPE_ATOMIC;
/* Only load per-quad if we can be certain that all
* lines in the quad are active.
*/
uint32_t perquad =
is_load && !vir_in_nonuniform_control_flow(c)
? GENERAL_TMU_LOOKUP_PER_QUAD
: GENERAL_TMU_LOOKUP_PER_PIXEL;
is_load && !vir_in_nonuniform_control_flow(c) &&
!c->emitted_discard ?
GENERAL_TMU_LOOKUP_PER_QUAD :
GENERAL_TMU_LOOKUP_PER_PIXEL;
config = 0xffffff00 | tmu_op << 3 | perquad;
if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
@ -3436,6 +3440,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_SETMSF_dest(c, vir_nop_reg(),
vir_uniform_ui(c, 0));
}
c->emitted_discard = true;
break;
case nir_intrinsic_discard_if: {
@ -3456,7 +3461,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
vir_uniform_ui(c, 0)), cond);
c->emitted_discard = true;
break;
}

View file

@ -914,6 +914,12 @@ struct v3d_compile {
bool tmu_dirty_rcl;
bool has_global_address;
/* If we have processed a discard/terminate instruction. This may
* cause some lanes to be inactive even during uniform control
* flow.
*/
bool emitted_discard;
};
struct v3d_uniform_list {