r300: move the ARL merging pass up in the opt loop

Specifically after the first copy propagate run but before the
second one. Removal of ARLs will enable the copy propagate to be more
aggresive, as it is very carefull in such cases.

shader-db

RV530:
total instructions in shared programs: 131861 -> 131503 (-0.27%)
instructions in affected programs: 23949 -> 23591 (-1.49%)
helped: 199
HURT: 15
total temps in shared programs: 16997 -> 16903 (-0.55%)
temps in affected programs: 767 -> 673 (-12.26%)
helped: 69
HURT: 9

RV370:
total instructions in shared programs: 82360 -> 82027 (-0.40%)
instructions in affected programs: 19516 -> 19183 (-1.71%)
helped: 183
HURT: 15
total temps in shared programs: 12370 -> 12262 (-0.87%)
temps in affected programs: 664 -> 556 (-16.27%)
helped: 73
HURT: 0

The hurt programs are due to some constant load being copy propagated
which leads to bad interaction with source conflict resolve pass later.

v2: add missing shader type initialized to the tests. Previously we were
checking for has_omod which also practically means we have a fragment
shader, however its less readable.

Reviewed-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23560>
This commit is contained in:
Pavel Ondračka 2023-06-07 12:44:28 +02:00 committed by Marge Bot
parent 453201fe74
commit f82574fb2c
2 changed files with 25 additions and 13 deletions

View file

@ -1374,6 +1374,21 @@ static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst)
}
}
/**
* Apply various optimizations specific to the A0 adress register loads.
*/
static void optimize_A0_loads(struct radeon_compiler * c) {
struct rc_instruction * inst = c->Program.Instructions.Next;
while (inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
merge_ARL(c, cur);
}
}
}
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
@ -1393,6 +1408,10 @@ void rc_optimize(struct radeon_compiler * c, void *user)
}
}
if (c->type == RC_VERTEX_PROGRAM) {
optimize_A0_loads(c);
}
/* Merge MOVs to same source in different channels using the constant
* swizzle.
*/
@ -1419,6 +1438,10 @@ void rc_optimize(struct radeon_compiler * c, void *user)
}
}
if (c->type != RC_FRAGMENT_PROGRAM) {
return;
}
/* Presubtract operations. */
inst = c->Program.Instructions.Next;
while(inst != &c->Program.Instructions) {
@ -1427,19 +1450,7 @@ void rc_optimize(struct radeon_compiler * c, void *user)
peephole(c, cur);
}
if (!c->has_omod) {
inst = c->Program.Instructions.Next;
while (inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
merge_ARL(c, cur);
}
}
return;
}
/* Output modifiers. */
inst = c->Program.Instructions.Next;
struct rc_list * var_list = NULL;
while(inst != &c->Program.Instructions) {

View file

@ -513,6 +513,7 @@ void init_compiler(
rc_init_regalloc_state(rs, program_type);
rc_init(c, rs);
c->type = program_type;
c->is_r500 = is_r500;
c->max_temp_regs = is_r500 ? 128 : (is_r400 ? 64 : 32);
c->max_constants = is_r500 ? 256 : 32;