diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c index 554e593cd17..19b0ce7e87b 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -350,6 +350,42 @@ ei_pow(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsi inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } +static void +ei_vector0(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int hw_opcode, + unsigned int *inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, 0, 0, PVS_DST_REG_TEMPORARY, 0); + inst[1] = __CONST(0, RC_SWIZZLE_ZERO); + inst[2] = inst[1]; + inst[3] = inst[1]; +} + +static void +pad_vertex_program_instructions(struct radeon_compiler *c) +{ + const unsigned min_inst_count = 4; + unsigned inst_count = 0; + + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) + inst_count++; + + unsigned orig_inst_count = inst_count; + + while (inst_count < min_inst_count) { + struct rc_instruction *inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_NOP; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_count++; + } + + if (orig_inst_count < min_inst_count) + rc_debug(c, "r300: padded tiny VS from %u to %u instructions\n", + orig_inst_count, inst_count); +} + static void translate_vertex_program(struct radeon_compiler *c, void *user) { @@ -369,6 +405,10 @@ translate_vertex_program(struct radeon_compiler *c, void *user) compiler->SetHwInputOutput(compiler); + /* Vertex shaders shorter than 4 instructions can hard-lock r3xx GPUs. */ + if (!c->is_r400 && !c->is_r500) + pad_vertex_program_instructions(c); + for (rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction *vpi = &rci->U.I; @@ -396,6 +436,9 @@ translate_vertex_program(struct radeon_compiler *c, void *user) (vpi->Opcode != RC_OPCODE_SEQ && vpi->Opcode != RC_OPCODE_SNE)); switch (vpi->Opcode) { + case RC_OPCODE_NOP: + ei_vector0(compiler->code, vpi, VECTOR_NO_OP, inst); + break; case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 30a249cb8d0..17d6bc2ebf9 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -183,6 +183,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.code = &vs->code; compiler.UserData = vs; compiler.Base.debug = &r300->context.debug; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.is_r500 = r300->screen->caps.is_r500; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); /* Only R500 has few IEEE math opcodes. */