diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c index 5ad1f4eb857..4ee3b878b4c 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -237,6 +237,36 @@ static void ei_math1(struct r300_vertex_program_code *vp, inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } +static void ei_cmp(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_COND_MUX_GTE, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); + + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (unsigned i = 0; i < 3; i++) { + unsigned j = (i + 1) % 3; + if (vpi->SrcReg[i].File == RC_FILE_NONE && + (vpi->SrcReg[j].File == RC_FILE_NONE || + vpi->SrcReg[j].File == RC_FILE_TEMPORARY)) { + vpi->SrcReg[i].Index = vpi->SrcReg[j].Index; + break; + } + } + + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[2]); + inst[3] = t_src(vp, &vpi->SrcReg[1]); +} + static void ei_lit(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int * inst) @@ -414,6 +444,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; case RC_OPCODE_ARR: ei_vector1(compiler->code, VE_FLT2FIX_DX_RND, vpi, inst); break; case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; + case RC_OPCODE_CMP: ei_cmp(compiler->code, vpi, inst); break; case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index 314fa7655ee..2e2d75143e1 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -395,7 +395,18 @@ int radeonTransformALU( static void transform_r300_vertex_CMP(struct radeon_compiler* c, struct rc_instruction* inst) { - /* There is no decent CMP available, so let's rig one up. + /* R5xx has a CMP, but we can use it only if it reads from less than + * three different temps. */ + if (c->is_r500 && + (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY || + inst->U.I.SrcReg[1].File != RC_FILE_TEMPORARY || + inst->U.I.SrcReg[2].File != RC_FILE_TEMPORARY || + inst->U.I.SrcReg[0].Index == inst->U.I.SrcReg[1].Index || + inst->U.I.SrcReg[1].Index == inst->U.I.SrcReg[2].Index || + inst->U.I.SrcReg[0].Index == inst->U.I.SrcReg[2].Index)) + return; + + /* There is no decent CMP available on r300, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 1e920b0cc74..c7086853a8a 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -512,6 +512,7 @@ static int r300_get_video_param(struct pipe_screen *screen, static const nir_shader_compiler_options r500_vs_compiler_options = { COMMON_NIR_OPTIONS, + .has_fused_comp_and_csel = true, /* Have HW loops support and 1024 max instr count, but don't unroll *too* * hard. diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 269281cd235..bd1202be031 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1952,7 +1952,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, .ubo_vec4_max = 0x00ff, }; static const struct nir_to_tgsi_options hwtcl_r500_options = { - .lower_cmp = true, .ubo_vec4_max = 0x00ff, }; const struct nir_to_tgsi_options *ntt_options;