From 06e44ded39ba2a75b410327c8a9175af286b9563 Mon Sep 17 00:00:00 2001 From: Radu Costas Date: Wed, 27 May 2026 11:13:20 +0300 Subject: [PATCH] pco: Try allocating with optimal temp registers Enable getting and using the optimal number of temps instead of the maximum. Instead of going straight to the maximum amount and then spilling, register allocation will now first try to allocate with the optimal amount of temps, then try with the maximum, then spill. Signed-off-by: Radu Costas Reviewed-by: Simon Perretta Part-of: --- src/imagination/pco/pco_ra.c | 46 ++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/imagination/pco/pco_ra.c b/src/imagination/pco/pco_ra.c index ebfe1d328b7..6fcdf5a4805 100644 --- a/src/imagination/pco/pco_ra.c +++ b/src/imagination/pco/pco_ra.c @@ -815,8 +815,12 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx) bool allocated = ra_allocate(ra_graph); bool force_spill = PCO_DEBUG(RA_FORCE_SPILL); + if (ctx->state == PCO_RA_CTX_STATE_OPTIMAL && !allocated && !force_spill) { + ralloc_free(ra_regs); + return false; + } if (!allocated || force_spill) { - if (ctx->state < PCO_RA_CTX_STATE_SPILLING) { + if (ctx->state < PCO_RA_CTX_STATE_SPILLING && !ctx->temp_alloc_offset) { ctx->spill_inst_addr_comps[0] = pco_ref_hwreg(0, PCO_REG_CLASS_TEMP); ctx->spill_inst_addr_comps[1] = pco_ref_hwreg(1, PCO_REG_CLASS_TEMP); @@ -1110,8 +1114,9 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx) if (pco_should_print_shader(func->parent_shader) && PCO_DEBUG_PRINT(RA)) { printf( - "RA allocated %u temps, %u vtxins, %u interns from %u SSA vars, %u vregs.\n", + "RA allocated %u (%s) temps, %u vtxins, %u interns from %u SSA vars, %u vregs.\n", temps, + (ctx->state == PCO_RA_CTX_STATE_OPTIMAL ? "opt" : "max"), vtxins, interns, num_ssas, @@ -1135,10 +1140,9 @@ bool pco_ra(pco_shader *shader) /* Instruction indices need to be ordered for live ranges. */ pco_index(shader, false); - unsigned hw_temps = rogue_get_temps(shader->ctx->dev_info); - /* TODO: - * unsigned opt_temps = rogue_get_optimal_temps(shader->ctx->dev_info); - */ + unsigned max_temps = rogue_get_temps(shader->ctx->dev_info); + unsigned opt_temps = rogue_get_optimal_temps(shader->ctx->dev_info); + bool alloc_max = PCO_DEBUG(RA_SKIP_OPT); /* If any vertex input registers are already used, round up to the nearest * multiple of 4 as vertex input registers are allocated in blocks of 4. @@ -1159,7 +1163,6 @@ bool pco_ra(pco_shader *shader) /* TODO: different number of temps available if barriers are in use. */ /* TODO: support for internal registers. */ pco_ra_ctx ctx = { - .allocable_temps = hw_temps, .allocable_vtxins = hw_vtxins, .allocable_interns = 0, }; @@ -1168,21 +1171,34 @@ bool pco_ra(pco_shader *shader) unsigned wg_size = shader->data.cs.workgroup_size[0] * shader->data.cs.workgroup_size[1] * shader->data.cs.workgroup_size[2]; - ctx.allocable_temps = - rogue_max_wg_temps(shader->ctx->dev_info, - ctx.allocable_temps, - wg_size, - shader->data.common.uses.barriers); + max_temps = rogue_max_wg_temps(shader->ctx->dev_info, + max_temps, + wg_size, + shader->data.common.uses.barriers); + if (max_temps <= opt_temps) + alloc_max |= true; } /* Perform register allocation for each function. */ bool progress = false; pco_foreach_func_in_shader (func, shader) { - ctx.state = PCO_RA_CTX_STATE_MAXIMUM; + ctx.state = PCO_RA_CTX_STATE_OPTIMAL; + ctx.allocable_temps = opt_temps; + if (alloc_max) { + ctx.state = PCO_RA_CTX_STATE_MAXIMUM; + ctx.allocable_temps = max_temps; + } progress |= preproc_vecs(func); - while (ctx.state != PCO_RA_CTX_STATE_DONE) + while (ctx.state != PCO_RA_CTX_STATE_DONE) { progress |= pco_ra_func(func, &ctx); - + if (ctx.state == PCO_RA_CTX_STATE_OPTIMAL) { + /* Fallback to maximum temp allocation in case optimal allocation + * fails + */ + ctx.allocable_temps = max_temps; + ctx.state = PCO_RA_CTX_STATE_MAXIMUM; + } + } shader->data.common.temps = MAX2(shader->data.common.temps, func->temps); shader->data.common.vtxins = MAX2(shader->data.common.vtxins, func->vtxins);