From f82574fb2c58483cebcd89194d05018ed23fe6af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Wed, 7 Jun 2023 12:44:28 +0200 Subject: [PATCH] r300: move the ARL merging pass up in the opt loop Specifically after the first copy propagate run but before the second one. Removal of ARLs will enable the copy propagate to be more aggresive, as it is very carefull in such cases. shader-db RV530: total instructions in shared programs: 131861 -> 131503 (-0.27%) instructions in affected programs: 23949 -> 23591 (-1.49%) helped: 199 HURT: 15 total temps in shared programs: 16997 -> 16903 (-0.55%) temps in affected programs: 767 -> 673 (-12.26%) helped: 69 HURT: 9 RV370: total instructions in shared programs: 82360 -> 82027 (-0.40%) instructions in affected programs: 19516 -> 19183 (-1.71%) helped: 183 HURT: 15 total temps in shared programs: 12370 -> 12262 (-0.87%) temps in affected programs: 664 -> 556 (-16.27%) helped: 73 HURT: 0 The hurt programs are due to some constant load being copy propagated which leads to bad interaction with source conflict resolve pass later. v2: add missing shader type initialized to the tests. Previously we were checking for has_omod which also practically means we have a fragment shader, however its less readable. Reviewed-by: Emma Anholt Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/radeon_optimize.c | 37 ++++++++++++------- .../r300/compiler/tests/rc_test_helpers.c | 1 + 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 475d8964963..116d431227d 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -1374,6 +1374,21 @@ static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst) } } +/** + * Apply various optimizations specific to the A0 adress register loads. + */ +static void optimize_A0_loads(struct radeon_compiler * c) { + struct rc_instruction * inst = c->Program.Instructions.Next; + + while (inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_ARL) { + merge_ARL(c, cur); + } + } +} + void rc_optimize(struct radeon_compiler * c, void *user) { struct rc_instruction * inst = c->Program.Instructions.Next; @@ -1393,6 +1408,10 @@ void rc_optimize(struct radeon_compiler * c, void *user) } } + if (c->type == RC_VERTEX_PROGRAM) { + optimize_A0_loads(c); + } + /* Merge MOVs to same source in different channels using the constant * swizzle. */ @@ -1419,6 +1438,10 @@ void rc_optimize(struct radeon_compiler * c, void *user) } } + if (c->type != RC_FRAGMENT_PROGRAM) { + return; + } + /* Presubtract operations. */ inst = c->Program.Instructions.Next; while(inst != &c->Program.Instructions) { @@ -1427,19 +1450,7 @@ void rc_optimize(struct radeon_compiler * c, void *user) peephole(c, cur); } - - if (!c->has_omod) { - inst = c->Program.Instructions.Next; - while (inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - if (cur->U.I.Opcode == RC_OPCODE_ARL) { - merge_ARL(c, cur); - } - } - return; - } - + /* Output modifiers. */ inst = c->Program.Instructions.Next; struct rc_list * var_list = NULL; while(inst != &c->Program.Instructions) { diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c index 008bf5d3174..0c85579ca9c 100644 --- a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c @@ -513,6 +513,7 @@ void init_compiler( rc_init_regalloc_state(rs, program_type); rc_init(c, rs); + c->type = program_type; c->is_r500 = is_r500; c->max_temp_regs = is_r500 ? 128 : (is_r400 ? 64 : 32); c->max_constants = is_r500 ? 256 : 32;