From 58f19311043c97fc20c077d04158519d01652703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Wed, 23 Aug 2023 13:55:48 +0200 Subject: [PATCH] r300: use w channel for scalar opcodes if possible The opcodes write to w by default so using anything else means we can't schedule anything in the rbg slot anyway becasue we have to replicate the result from w. We already attempt to do this during the scheduling, but at that point it is more tricky, so doing it early leads to much better code. Performance++ RV530 benchmarks: Lightsmark, 1280x800, fullscreen before: N Min Max Median Avg Stddev x 5 27.32 27.36 27.34 27.34 0.015811388 after: N Min Max Median Avg Stddev x 5 27.53 27.61 27.59 27.576 0.034351128 Unigine Sanctuary, 1280x800, fullscreen, medium shaders before: N Min Max Median Avg Stddev x 5 10.1211 10.1238 10.1214 10.12192 0.0011211601 after: N Min Max Median Avg Stddev x 5 10.4607 10.4637 10.4619 10.46206 0.0012441865 RV530 shader-db: total instructions in shared programs: 129643 -> 128038 (-1.24%) instructions in affected programs: 45415 -> 43810 (-3.53%) helped: 514 HURT: 43 total presub in shared programs: 4912 -> 5201 (5.88%) presub in affected programs: 752 -> 1041 (38.43%) helped: 40 HURT: 30 total omod in shared programs: 381 -> 383 (0.52%) omod in affected programs: 6 -> 8 (33.33%) helped: 1 HURT: 3 total temps in shared programs: 16904 -> 16841 (-0.37%) temps in affected programs: 1377 -> 1314 (-4.58%) helped: 81 HURT: 52 total lits in shared programs: 3555 -> 3550 (-0.14%) lits in affected programs: 294 -> 289 (-1.70%) helped: 13 HURT: 11 total cycles in shared programs: 194771 -> 193734 (-0.53%) cycles in affected programs: 79079 -> 78042 (-1.31%) helped: 452 HURT: 84 GAINED: shaders/glamor/82.shader_test FS RV370 shader-db: total instructions in shared programs: 82116 -> 81600 (-0.63%) instructions in affected programs: 11888 -> 11372 (-4.34%) helped: 273 HURT: 40 total temps in shared programs: 12438 -> 12441 (0.02%) temps in affected programs: 692 -> 695 (0.43%) helped: 36 HURT: 39 total cycles in shared programs: 128140 -> 127630 (-0.40%) cycles in affected programs: 25838 -> 25328 (-1.97%) helped: 266 HURT: 41 GAINED: shaders/0ad/12.shader_test FS GAINED: shaders/CC3-tiberium-wars/314.shader_test FS GAINED: shaders/lightsmark/16.shader_test FS GAINED: shaders/sanctuary/159.shader_test FS GAINED: shaders/sanctuary/162.shader_test FS GAINED: shaders/sanctuary/51.shader_test FS GAINED: shaders/sanctuary/54.shader_test FS GAINED: shaders/trine/fp-422.shader_test FS Partial fix for: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6661 Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/r3xx_fragprog.c | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c index 556a16a2e0d..67680915226 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -30,6 +30,8 @@ #include "radeon_program_tex.h" #include "radeon_rename_regs.h" #include "radeon_remove_constants.h" +#include "radeon_variable.h" +#include "radeon_list.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" @@ -65,6 +67,48 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) } } +/** + * This function will try to convert rgb instructions into alpha instructions + * and vice versa. While this is already attempted during the pair scheduling, + * it is much simpler to do it before pair conversion, so do it here at least for + * the simple cases. + * + * Currently only math opcodes writing to rgb (and with no friends) are + * converted to alpha. + * + * This function assumes all the instructions are still of type + * RC_INSTRUCTION_NORMAL, the conversion is much simpler. + * + * Beware that this needs to be also called before doing presubtract, because + * rc_get_variables can't get properly readers for normal instructions if presubtract + * is present (it works fine for pair instructions). + */ +static void rc_convert_rgb_alpha(struct radeon_compiler *c, void *user) +{ + struct rc_list * variables; + struct rc_list * var_ptr; + + variables = rc_get_variables(c); + + for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { + struct rc_variable * var = var_ptr->Item; + + if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + continue; + } + + /* Only rewrite scalar opcodes that are used separatelly for now. */ + if (var->Friend) + continue; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(var->Inst->U.I.Opcode); + if (opcode->IsStandardScalar && var->Dst.WriteMask != RC_MASK_W) { + unsigned index = rc_find_free_temporary(c); + rc_variable_change_dst(var, index, RC_MASK_W); + } + } +} + void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; @@ -104,6 +148,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, {"deadcode", 1, opt, rc_dataflow_deadcode, NULL}, + {"convert rgb<->alpha", 1, opt, rc_convert_rgb_alpha, NULL}, {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, {"dataflow optimize", 1, opt, rc_optimize, NULL}, {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},