r300: pad short vertex shaders to avoid R3xx hangs

Vertex shaders shorter than four instructions can hard-lock R3xx GPUs. This seems to happen in combination with a small vertex count. This was seen before, most notably with dummy shaders, but the earlier fix only removed those dummy shaders, so some occurrences could still slip through the cracks. Pad all vertex shaders to four instructions on R3xx. Reviewed-by: Filip Gawin <filip@gawin.net> Fixes: c6aa639ba9 ("r300: skip draws instead of using a dummy vertex shader") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/337 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40331>
2026-05-14 16:38:06 +02:00 · 2026-03-10 20:33:42 +01:00 · 2026-03-10 20:33:42 +01:00 · 9b12664b72
commit 9b12664b72
parent 5a84a6b775
2 changed files with 44 additions and 0 deletions
--- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
@ -350,6 +350,42 @@ ei_pow(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsi
   inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
 }

+static void
+ei_vector0(struct r300_vertex_program_code *vp,
+           struct rc_sub_instruction *vpi,
+           unsigned int hw_opcode,
+           unsigned int *inst)
+{
+   inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, 0, 0, PVS_DST_REG_TEMPORARY, 0);
+   inst[1] = __CONST(0, RC_SWIZZLE_ZERO);
+   inst[2] = inst[1];
+   inst[3] = inst[1];
+}
+
+static void
+pad_vertex_program_instructions(struct radeon_compiler *c)
+{
+   const unsigned min_inst_count = 4;
+   unsigned inst_count = 0;
+
+   for (struct rc_instruction *inst = c->Program.Instructions.Next;
+        inst != &c->Program.Instructions; inst = inst->Next)
+      inst_count++;
+
+   unsigned orig_inst_count = inst_count;
+
+   while (inst_count < min_inst_count) {
+      struct rc_instruction *inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+      inst->U.I.Opcode = RC_OPCODE_NOP;
+      inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+      inst_count++;
+   }
+
+   if (orig_inst_count < min_inst_count)
+      rc_debug(c, "r300: padded tiny VS from %u to %u instructions\n",
+               orig_inst_count, inst_count);
+}
+
 static void
 translate_vertex_program(struct radeon_compiler *c, void *user)
 {
@ -369,6 +405,10 @@ translate_vertex_program(struct radeon_compiler *c, void *user)

   compiler->SetHwInputOutput(compiler);

+   /* Vertex shaders shorter than 4 instructions can hard-lock r3xx GPUs. */
+   if (!c->is_r400 && !c->is_r500)
+      pad_vertex_program_instructions(c);
+
   for (rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions;
        rci = rci->Next) {
      struct rc_sub_instruction *vpi = &rci->U.I;
@ -396,6 +436,9 @@ translate_vertex_program(struct radeon_compiler *c, void *user)
             (vpi->Opcode != RC_OPCODE_SEQ && vpi->Opcode != RC_OPCODE_SNE));

      switch (vpi->Opcode) {
+      case RC_OPCODE_NOP:
+         ei_vector0(compiler->code, vpi, VECTOR_NO_OP, inst);
+         break;
      case RC_OPCODE_ADD:
         ei_vector2(compiler->code, VE_ADD, vpi, inst);
         break;
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@ -183,6 +183,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
    compiler.code = &vs->code;
    compiler.UserData = vs;
    compiler.Base.debug = &r300->context.debug;
+    compiler.Base.is_r400 = r300->screen->caps.is_r400;
    compiler.Base.is_r500 = r300->screen->caps.is_r500;
    compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
    /* Only R500 has few IEEE math opcodes. */