From 0508db91556242c57029ad538613c2b1ee1969ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Wed, 9 Aug 2023 10:17:48 +0200 Subject: [PATCH] r300: implement bias presubtract RV530 shader-db: total instructions in shared programs: 129468 -> 128859 (-0.47%) instructions in affected programs: 34432 -> 33823 (-1.77%) helped: 362 HURT: 56 total presub in shared programs: 5411 -> 7635 (41.10%) presub in affected programs: 2069 -> 4293 (107.49%) helped: 8 HURT: 468 total temps in shared programs: 16918 -> 16944 (0.15%) temps in affected programs: 2022 -> 2048 (1.29%) helped: 73 HURT: 79 total lits in shared programs: 3555 -> 2913 (-18.06%) lits in affected programs: 2346 -> 1704 (-27.37%) helped: 479 HURT: 0 total cycles in shared programs: 194675 -> 194124 (-0.28%) cycles in affected programs: 62939 -> 62388 (-0.88%) helped: 343 HURT: 84 Also dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15 now fits into the instruction limit on RV370. Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/ci/r300-rv370-fails.txt | 1 - .../r300/ci/r300-rv370-swtcl-fails.txt | 1 - .../r300/compiler/r300_nir_algebraic.py | 6 ++ .../drivers/r300/compiler/radeon_optimize.c | 87 ++++++++++++++++++- 4 files changed, 91 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r300/ci/r300-rv370-fails.txt b/src/gallium/drivers/r300/ci/r300-rv370-fails.txt index 4097cfeb002..0c43c21baff 100644 --- a/src/gallium/drivers/r300/ci/r300-rv370-fails.txt +++ b/src/gallium/drivers/r300/ci/r300-rv370-fails.txt @@ -53,7 +53,6 @@ dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail -dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_l8_npot,Fail dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_rgb888_npot,Fail diff --git a/src/gallium/drivers/r300/ci/r300-rv370-swtcl-fails.txt b/src/gallium/drivers/r300/ci/r300-rv370-swtcl-fails.txt index 3ce139c3732..b5060ffc0e0 100644 --- a/src/gallium/drivers/r300/ci/r300-rv370-swtcl-fails.txt +++ b/src/gallium/drivers/r300/ci/r300-rv370-swtcl-fails.txt @@ -47,7 +47,6 @@ dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail -dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_l8_npot,Fail dEQP-GLES2.functional.texture.filtering.cube.linear_linear_clamp_rgb888_npot,Fail diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py index 7967f14f4f2..c27dba90ec4 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py +++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py @@ -70,6 +70,12 @@ r300_nir_prepare_presubtract = [ (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))), (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))), (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))), + # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form. + (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)), + (('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), + (('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), + (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), + (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), ] # Previous prepare_presubtract pass can sometimes produce double fneg patterns. diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 9ecb9eccab9..662346853e6 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -481,7 +481,7 @@ static int is_presub_candidate( unsigned int i; unsigned int is_constant[2] = {0, 0}; - assert(inst->U.I.Opcode == RC_OPCODE_ADD); + assert(inst->U.I.Opcode == RC_OPCODE_ADD || inst->U.I.Opcode == RC_OPCODE_MAD); if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode @@ -490,7 +490,7 @@ static int is_presub_candidate( return 0; } - /* If both sources use a constant swizzle, then we can't convert it to + /* If first two sources use a constant swizzle, then we can't convert it to * a presubtract operation. In fact for the ADD and SUB presubtract * operations neither source can contain a constant swizzle. This * specific case is checked in peephole_add_presub_add() when @@ -573,6 +573,23 @@ static void presub_replace_inv( inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; } +static void presub_replace_bias( + struct rc_instruction * inst_mad, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + /* We must be careful not to modify inst_mad, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_mad->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_BIAS; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_BIAS; +} + /** * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source @@ -622,6 +639,66 @@ static int peephole_add_presub_inv( return 0; } +/** + * PRESUB_BIAD: MAD -TEMP[0], 2.0, 1.0 + * Use the presubtract 1 - 2*src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constant 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the MAD instruction is still part of the program. + * 1 if the MAD instruction is no longer part of the program. + */ +static int peephole_mad_presub_bias( + struct radeon_compiler * c, + struct rc_instruction * inst_mad) +{ + unsigned int i, swz; + + if (!is_presub_candidate(c, inst_mad)) + return 0; + + /* Check if src2 is 1. */ + for(i = 0; i < 4; i++ ) { + if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) + continue; + + swz = GET_SWZ(inst_mad->U.I.SrcReg[2].Swizzle, i); + if (swz != RC_SWIZZLE_ONE || inst_mad->U.I.SrcReg[2].Negate & (1 << i)) + return 0; + } + + /* Check if src1 is 2. */ + struct rc_src_register src1_reg = inst_mad->U.I.SrcReg[1]; + if ((src1_reg.Negate & inst_mad->U.I.DstReg.WriteMask) != 0 || src1_reg.Abs) + return 0; + struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index]; + if (constant->Type != RC_CONSTANT_IMMEDIATE) + return 0; + for (i = 0; i < 4; i++) { + if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) + continue; + swz = GET_SWZ(src1_reg.Swizzle, i); + if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0) + return 0; + } + + /* Check src0. */ + if ((inst_mad->U.I.SrcReg[0].Negate & inst_mad->U.I.DstReg.WriteMask) != + inst_mad->U.I.DstReg.WriteMask + || inst_mad->U.I.SrcReg[0].Abs + || src_has_const_swz(inst_mad->U.I.SrcReg[0])) { + + return 0; + } + + if (presub_helper(c, inst_mad, RC_PRESUB_BIAS, presub_replace_bias)) { + rc_remove_instruction(inst_mad); + return 1; + } + return 0; +} + struct peephole_mul_cb_data { struct rc_dst_register * Writer; unsigned int Clobbered; @@ -821,6 +898,12 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) return 1; break; } + case RC_OPCODE_MAD: + { + if (peephole_mad_presub_bias(c, inst)) + return 1; + break; + } default: break; }