mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-09 16:20:27 +01:00
r300: move the ARL merging pass up in the opt loop
Specifically after the first copy propagate run but before the second one. Removal of ARLs will enable the copy propagate to be more aggresive, as it is very carefull in such cases. shader-db RV530: total instructions in shared programs: 131861 -> 131503 (-0.27%) instructions in affected programs: 23949 -> 23591 (-1.49%) helped: 199 HURT: 15 total temps in shared programs: 16997 -> 16903 (-0.55%) temps in affected programs: 767 -> 673 (-12.26%) helped: 69 HURT: 9 RV370: total instructions in shared programs: 82360 -> 82027 (-0.40%) instructions in affected programs: 19516 -> 19183 (-1.71%) helped: 183 HURT: 15 total temps in shared programs: 12370 -> 12262 (-0.87%) temps in affected programs: 664 -> 556 (-16.27%) helped: 73 HURT: 0 The hurt programs are due to some constant load being copy propagated which leads to bad interaction with source conflict resolve pass later. v2: add missing shader type initialized to the tests. Previously we were checking for has_omod which also practically means we have a fragment shader, however its less readable. Reviewed-by: Emma Anholt <emma@anholt.net> Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23560>
This commit is contained in:
parent
453201fe74
commit
f82574fb2c
2 changed files with 25 additions and 13 deletions
|
|
@ -1374,6 +1374,21 @@ static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply various optimizations specific to the A0 adress register loads.
|
||||
*/
|
||||
static void optimize_A0_loads(struct radeon_compiler * c) {
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
|
||||
while (inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * cur = inst;
|
||||
inst = inst->Next;
|
||||
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
|
||||
merge_ARL(c, cur);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
|
|
@ -1393,6 +1408,10 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
|||
}
|
||||
}
|
||||
|
||||
if (c->type == RC_VERTEX_PROGRAM) {
|
||||
optimize_A0_loads(c);
|
||||
}
|
||||
|
||||
/* Merge MOVs to same source in different channels using the constant
|
||||
* swizzle.
|
||||
*/
|
||||
|
|
@ -1419,6 +1438,10 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
|||
}
|
||||
}
|
||||
|
||||
if (c->type != RC_FRAGMENT_PROGRAM) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Presubtract operations. */
|
||||
inst = c->Program.Instructions.Next;
|
||||
while(inst != &c->Program.Instructions) {
|
||||
|
|
@ -1427,19 +1450,7 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
|||
peephole(c, cur);
|
||||
}
|
||||
|
||||
|
||||
if (!c->has_omod) {
|
||||
inst = c->Program.Instructions.Next;
|
||||
while (inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * cur = inst;
|
||||
inst = inst->Next;
|
||||
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
|
||||
merge_ARL(c, cur);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Output modifiers. */
|
||||
inst = c->Program.Instructions.Next;
|
||||
struct rc_list * var_list = NULL;
|
||||
while(inst != &c->Program.Instructions) {
|
||||
|
|
|
|||
|
|
@ -513,6 +513,7 @@ void init_compiler(
|
|||
rc_init_regalloc_state(rs, program_type);
|
||||
rc_init(c, rs);
|
||||
|
||||
c->type = program_type;
|
||||
c->is_r500 = is_r500;
|
||||
c->max_temp_regs = is_r500 ? 128 : (is_r400 ? 64 : 32);
|
||||
c->max_constants = is_r500 ? 256 : 32;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue