mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 05:48:07 +02:00
pan: Add spill cost metric
Our SSA spilling logic should avoid inserting spill code inside loops. Add a metric that reflects this goal. Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38181>
This commit is contained in:
parent
47f4b00cb2
commit
bb7b0b6b1b
4 changed files with 114 additions and 0 deletions
|
|
@ -1119,6 +1119,67 @@ bi_out_of_ssa(bi_context *ctx)
|
|||
return first_reg;
|
||||
}
|
||||
|
||||
static bool
|
||||
op_is_load_store(enum bi_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_OPCODE_STORE_I8:
|
||||
case BI_OPCODE_STORE_I16:
|
||||
case BI_OPCODE_STORE_I24:
|
||||
case BI_OPCODE_STORE_I32:
|
||||
case BI_OPCODE_STORE_I48:
|
||||
case BI_OPCODE_STORE_I64:
|
||||
case BI_OPCODE_STORE_I96:
|
||||
case BI_OPCODE_STORE_I128:
|
||||
return true;
|
||||
case BI_OPCODE_LOAD_I8:
|
||||
case BI_OPCODE_LOAD_I16:
|
||||
case BI_OPCODE_LOAD_I24:
|
||||
case BI_OPCODE_LOAD_I32:
|
||||
case BI_OPCODE_LOAD_I48:
|
||||
case BI_OPCODE_LOAD_I64:
|
||||
case BI_OPCODE_LOAD_I96:
|
||||
case BI_OPCODE_LOAD_I128:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
compute_spill_cost(bi_context *ctx)
|
||||
{
|
||||
/* The cost of a spill/fill is just 10*block_depth for now. */
|
||||
|
||||
uint32_t *block_depth = calloc(ctx->num_blocks, sizeof(uint32_t));
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
if (!block->loop_header)
|
||||
continue;
|
||||
|
||||
bool *loop_block = bi_find_loop_blocks(ctx, block);
|
||||
|
||||
for (uint32_t b = 0; b < ctx->num_blocks; ++b) {
|
||||
if (loop_block[b])
|
||||
block_depth[b] += 1;
|
||||
}
|
||||
|
||||
free(loop_block);
|
||||
}
|
||||
|
||||
uint64_t cost = 0;
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
if (op_is_load_store(I->op) && I->seg == BI_SEG_TL)
|
||||
cost += 10 * (block_depth[block->index] + 1);
|
||||
}
|
||||
}
|
||||
|
||||
free(block_depth);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
void
|
||||
bi_register_allocate(bi_context *ctx)
|
||||
{
|
||||
|
|
@ -1214,6 +1275,8 @@ bi_register_allocate(bi_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
ctx->spill_cost = compute_spill_cost(ctx);
|
||||
|
||||
assert(success);
|
||||
assert(l != NULL);
|
||||
|
||||
|
|
|
|||
|
|
@ -5095,6 +5095,7 @@ bi_gather_stats(bi_context *ctx, unsigned size, struct bifrost_stats *out)
|
|||
.loops = ctx->loop_count,
|
||||
.spills = ctx->spills,
|
||||
.fills = ctx->fills,
|
||||
.spill_cost = ctx->spill_cost,
|
||||
};
|
||||
|
||||
out->cycles = MAX2(out->arith, MAX3(out->t, out->v, out->ldst));
|
||||
|
|
@ -5134,6 +5135,7 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out)
|
|||
.loops = ctx->loop_count,
|
||||
.spills = ctx->spills,
|
||||
.fills = ctx->fills,
|
||||
.spill_cost = ctx->spill_cost,
|
||||
};
|
||||
struct valhall_stats stats = stats_abs;
|
||||
stats.fma /= model->rates.fma;
|
||||
|
|
@ -6615,3 +6617,45 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
|||
|
||||
info->ubo_mask &= (1 << nir->info.num_ubos) - 1;
|
||||
}
|
||||
|
||||
bool *
|
||||
bi_find_loop_blocks(const bi_context *ctx, bi_block *header)
|
||||
{
|
||||
/* A block is in the loop if it has the header both as the predecessor and
|
||||
* the successor. */
|
||||
|
||||
bool *h_as_suc = (bool *)calloc(ctx->num_blocks, sizeof(bool));
|
||||
bool *h_as_pred = (bool *)calloc(ctx->num_blocks, sizeof(bool));
|
||||
h_as_suc[header->index] = true;
|
||||
h_as_pred[header->index] = true;
|
||||
|
||||
/* If the CFG was one long chain, we would require |blocks|-1 iters to
|
||||
* propagate the in_loop info all the way through.
|
||||
*/
|
||||
for (uint32_t iter = 0; iter < ctx->num_blocks - 1; ++iter) {
|
||||
bi_foreach_block(ctx, block) {
|
||||
|
||||
bi_foreach_successor(block, succ) {
|
||||
if (h_as_suc[succ->index]) {
|
||||
h_as_suc[block->index] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bi_foreach_predecessor(block, pred) {
|
||||
if (h_as_pred[(*pred)->index]) {
|
||||
h_as_pred[block->index] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t bidx = 0; bidx < ctx->num_blocks - 1; ++bidx) {
|
||||
h_as_suc[bidx] &= h_as_pred[bidx];
|
||||
}
|
||||
|
||||
free(h_as_pred);
|
||||
|
||||
return h_as_suc;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1102,6 +1102,9 @@ typedef struct {
|
|||
|
||||
/* alignment needed for registers during register allocation */
|
||||
uint8_t *reg_alignment;
|
||||
|
||||
/* Computed after RA */
|
||||
uint64_t spill_cost;
|
||||
} bi_context;
|
||||
|
||||
static inline enum bi_round
|
||||
|
|
@ -1735,6 +1738,8 @@ bi_record_use(bi_instr **uses, BITSET_WORD *multiple, bi_instr *I, unsigned s)
|
|||
|
||||
bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
|
||||
|
||||
bool *bi_find_loop_blocks(const bi_context *ctx, bi_block *header);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@
|
|||
<stat name="Loops">Number of hardware loops</stat>
|
||||
<stat name="Spills">Number of spill instructions</stat>
|
||||
<stat name="Fills">Number of fill instructions</stat>
|
||||
<stat name="Spill cost">Cost of spill and fill instructions</stat>
|
||||
</isa>
|
||||
|
||||
<isa name="Valhall">
|
||||
|
|
@ -81,6 +82,7 @@
|
|||
<stat name="Loops">Number of hardware loops</stat>
|
||||
<stat name="Spills">Number of spill instructions</stat>
|
||||
<stat name="Fills">Number of fill instructions</stat>
|
||||
<stat name="Spill cost">Cost of spill and fill instructions</stat>
|
||||
</isa>
|
||||
</family>
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue