From 8d3f2d2c237fe88464b0cfc9de15a3d92390812d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Thu, 24 Oct 2024 18:54:22 +0200 Subject: [PATCH] r300: group KIL for R300/R400 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate KIL counts to the indirection limit, so try to schedule it within the TEX block on R300/R400, except for small shaders, where we already know we are fine. 42 gained shaders on R400 (40 Trine and 2 This war of mine) at the expense of some increase in register pressure. shader-db: total temps in shared programs: 18107 -> 18129 (0.12%) temps in affected programs: 262 -> 284 (8.40%) helped: 0 HURT: 22 Signed-off-by: Pavel Ondračka Reviewed-by: Emma Anholt Part-of: --- .../r300/compiler/radeon_pair_schedule.c | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c index 8b61a83c783..19b20e3b679 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -135,7 +135,10 @@ struct schedule_state { long max_tex_group; unsigned PrevBlockHasTex : 1; unsigned PrevBlockHasKil : 1; + /* Number of TEX in the current block */ unsigned TEXCount; + /* Total number of TEX in the whole program.*/ + unsigned totalTEXCount; unsigned Opt : 1; }; @@ -1079,7 +1082,12 @@ emit_instruction(struct schedule_state *s, struct rc_instruction *before) #endif for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { - if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { + /* In general we want to emit KIL ASAP, however KIL does count into + * the indirection limit, so for R300/R400 we only do this if we + * are sure we can fit in there. + */ + if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL && + (s->C->is_r500 || s->totalTEXCount <= 3)) { emit_all_tex(s, before); s->PrevBlockHasKil = 1; return; @@ -1308,6 +1316,19 @@ rc_pair_schedule(struct radeon_compiler *cc, void *user) s.CalcScore = calc_score_r300; } s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); + + /* First go over and count all TEX. */ + while (inst != &c->Base.Program.Instructions) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + if (info->HasTexture) { + s.totalTEXCount++; + } + } + inst = inst->Next; + } + + inst = c->Base.Program.Instructions.Next; while (inst != &c->Base.Program.Instructions) { struct rc_instruction *first;