From c3f51a5dcf2d7f2987ee34e5c485f9fabfdddf61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Sun, 28 Aug 2022 18:41:08 +0200 Subject: [PATCH] r300: allow presubtract when both ADD sources are negative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current code doesn't handle this, however it is easy to make it work by moving the negate to the presubtract source. Minor win in shader-db, mostly with Unigine shaders. Shader-db RV530: total instructions in shared programs: 136382 -> 136236 (-0.11%) instructions in affected programs: 9911 -> 9765 (-1.47%) total temps in shared programs: 18939 -> 18942 (0.02%) temps in affected programs: 37 -> 40 (8.11%) Reviewed-by: Filip Gawin Signed-off-by: Pavel Ondračka Part-of: --- .../drivers/r300/compiler/radeon_optimize.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 4d8f5cbf031..bc4af2c81ee 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -504,25 +504,29 @@ static void presub_replace_add( { rc_presubtract_op presub_opcode; - /* This function assumes that inst_add->U.I.SrcReg[0] and - * inst_add->U.I.SrcReg[1] aren't both negative. - */ - assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate)); + unsigned int negates = 0; + if (inst_add->U.I.SrcReg[0].Negate) + negates++; + if (inst_add->U.I.SrcReg[1].Negate) + negates++; + assert(negates != 2 || inst_add->U.I.SrcReg[1].Negate == inst_add->U.I.SrcReg[0].Negate); - if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + if (negates == 1) presub_opcode = RC_PRESUB_SUB; else presub_opcode = RC_PRESUB_ADD; - if (inst_add->U.I.SrcReg[1].Negate) { + if (inst_add->U.I.SrcReg[1].Negate && negates == 1) { inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; } else { inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; } - inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; - inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + /* If both sources are negative we can move the negate to the presub. */ + unsigned negate_mask = negates == 1 ? 0 : inst_add->U.I.SrcReg[0].Negate; + inst_reader->U.I.PreSub.SrcReg[0].Negate = negate_mask; + inst_reader->U.I.PreSub.SrcReg[1].Negate = negate_mask; inst_reader->U.I.PreSub.Opcode = presub_opcode; inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], @@ -596,10 +600,6 @@ static int peephole_add_presub_add( if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) return 0; - /* presub_replace_add() assumes only one is negative */ - if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) - return 0; - /* if src0 is negative, at least all bits of dstmask have to be set */ if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) return 0;