From 9b12664b72bb71cf8697ef3dc764f4e04a89c03f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Tue, 10 Mar 2026 20:33:42 +0100 Subject: [PATCH] r300: pad short vertex shaders to avoid R3xx hangs Vertex shaders shorter than four instructions can hard-lock R3xx GPUs. This seems to happen in combination with a small vertex count. This was seen before, most notably with dummy shaders, but the earlier fix only removed those dummy shaders, so some occurrences could still slip through the cracks. Pad all vertex shaders to four instructions on R3xx. Reviewed-by: Filip Gawin Fixes: c6aa639ba9b ("r300: skip draws instead of using a dummy vertex shader") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/337 Part-of: --- .../drivers/r300/compiler/r3xx_vertprog.c | 43 +++++++++++++++++++ src/gallium/drivers/r300/r300_vs.c | 1 + 2 files changed, 44 insertions(+) diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c index 554e593cd17..19b0ce7e87b 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -350,6 +350,42 @@ ei_pow(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsi inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } +static void +ei_vector0(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int hw_opcode, + unsigned int *inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, 0, 0, PVS_DST_REG_TEMPORARY, 0); + inst[1] = __CONST(0, RC_SWIZZLE_ZERO); + inst[2] = inst[1]; + inst[3] = inst[1]; +} + +static void +pad_vertex_program_instructions(struct radeon_compiler *c) +{ + const unsigned min_inst_count = 4; + unsigned inst_count = 0; + + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) + inst_count++; + + unsigned orig_inst_count = inst_count; + + while (inst_count < min_inst_count) { + struct rc_instruction *inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_NOP; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_count++; + } + + if (orig_inst_count < min_inst_count) + rc_debug(c, "r300: padded tiny VS from %u to %u instructions\n", + orig_inst_count, inst_count); +} + static void translate_vertex_program(struct radeon_compiler *c, void *user) { @@ -369,6 +405,10 @@ translate_vertex_program(struct radeon_compiler *c, void *user) compiler->SetHwInputOutput(compiler); + /* Vertex shaders shorter than 4 instructions can hard-lock r3xx GPUs. */ + if (!c->is_r400 && !c->is_r500) + pad_vertex_program_instructions(c); + for (rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction *vpi = &rci->U.I; @@ -396,6 +436,9 @@ translate_vertex_program(struct radeon_compiler *c, void *user) (vpi->Opcode != RC_OPCODE_SEQ && vpi->Opcode != RC_OPCODE_SNE)); switch (vpi->Opcode) { + case RC_OPCODE_NOP: + ei_vector0(compiler->code, vpi, VECTOR_NO_OP, inst); + break; case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 30a249cb8d0..17d6bc2ebf9 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -183,6 +183,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.code = &vs->code; compiler.UserData = vs; compiler.Base.debug = &r300->context.debug; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.is_r500 = r300->screen->caps.is_r500; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); /* Only R500 has few IEEE math opcodes. */