r300: pad short vertex shaders to avoid R3xx hangs
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Vertex shaders shorter than four instructions can hard-lock R3xx GPUs.
This seems to happen in combination with a small vertex count. This was
seen before, most notably with dummy shaders, but the earlier fix only
removed those dummy shaders, so some occurrences could still slip
through the cracks. Pad all vertex shaders to four instructions on R3xx.

Reviewed-by: Filip Gawin <filip@gawin.net>
Fixes: c6aa639ba9 ("r300: skip draws instead of using a dummy vertex shader")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/337
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40331>
This commit is contained in:
Pavel Ondračka 2026-03-10 20:33:42 +01:00 committed by Marge Bot
parent 5a84a6b775
commit 9b12664b72
2 changed files with 44 additions and 0 deletions

View file

@ -350,6 +350,42 @@ ei_pow(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsi
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
static void
ei_vector0(struct r300_vertex_program_code *vp,
struct rc_sub_instruction *vpi,
unsigned int hw_opcode,
unsigned int *inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, 0, 0, PVS_DST_REG_TEMPORARY, 0);
inst[1] = __CONST(0, RC_SWIZZLE_ZERO);
inst[2] = inst[1];
inst[3] = inst[1];
}
static void
pad_vertex_program_instructions(struct radeon_compiler *c)
{
const unsigned min_inst_count = 4;
unsigned inst_count = 0;
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next)
inst_count++;
unsigned orig_inst_count = inst_count;
while (inst_count < min_inst_count) {
struct rc_instruction *inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst->U.I.Opcode = RC_OPCODE_NOP;
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
inst_count++;
}
if (orig_inst_count < min_inst_count)
rc_debug(c, "r300: padded tiny VS from %u to %u instructions\n",
orig_inst_count, inst_count);
}
static void
translate_vertex_program(struct radeon_compiler *c, void *user)
{
@ -369,6 +405,10 @@ translate_vertex_program(struct radeon_compiler *c, void *user)
compiler->SetHwInputOutput(compiler);
/* Vertex shaders shorter than 4 instructions can hard-lock r3xx GPUs. */
if (!c->is_r400 && !c->is_r500)
pad_vertex_program_instructions(c);
for (rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions;
rci = rci->Next) {
struct rc_sub_instruction *vpi = &rci->U.I;
@ -396,6 +436,9 @@ translate_vertex_program(struct radeon_compiler *c, void *user)
(vpi->Opcode != RC_OPCODE_SEQ && vpi->Opcode != RC_OPCODE_SNE));
switch (vpi->Opcode) {
case RC_OPCODE_NOP:
ei_vector0(compiler->code, vpi, VECTOR_NO_OP, inst);
break;
case RC_OPCODE_ADD:
ei_vector2(compiler->code, VE_ADD, vpi, inst);
break;

View file

@ -183,6 +183,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.code = &vs->code;
compiler.UserData = vs;
compiler.Base.debug = &r300->context.debug;
compiler.Base.is_r400 = r300->screen->caps.is_r400;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
/* Only R500 has few IEEE math opcodes. */