pan/bi: Fixup bi_repair_ssa.c for bi

Reviewed-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39305>
This commit is contained in:
Christoph Pillmayer 2025-12-12 12:51:10 +00:00 committed by Marge Bot
parent f29fa1e9df
commit dd94d1833f
3 changed files with 99 additions and 81 deletions

View file

@ -14,15 +14,15 @@
#include "util/hash_table.h"
#include "util/ralloc.h"
#include "util/u_dynarray.h"
#include "agx_builder.h"
#include "agx_compiler.h"
#include "agx_opcodes.h"
#include "bi_builder.h"
#include "compiler.h"
#include "bi_opcodes.h"
struct repair_block {
/* For a loop header, whether phis operands have been added */
bool sealed;
/* Sparse map: variable name -> agx_index.
/* Sparse map: variable name -> bi_index.
*
* Definition of a variable at the end of the block.
*/
@ -30,7 +30,7 @@ struct repair_block {
};
struct repair_ctx {
agx_context *shader;
bi_context *shader;
/* Number of variables */
unsigned n;
@ -40,14 +40,14 @@ struct repair_ctx {
};
static inline struct repair_block *
repair_block(struct repair_ctx *ctx, agx_block *block)
repair_block(struct repair_ctx *ctx, bi_block *block)
{
return &ctx->blocks[block->index];
}
static void
record_write(struct repair_ctx *ctx, agx_block *block, unsigned node,
agx_index val)
record_write(struct repair_ctx *ctx, bi_block *block, unsigned node,
bi_index val)
{
assert(node < ctx->n);
struct hash_table_u64 *defs = repair_block(ctx, block)->defs;
@ -55,26 +55,26 @@ record_write(struct repair_ctx *ctx, agx_block *block, unsigned node,
ralloc_memdup(defs, &val, sizeof(val)));
}
static void add_phi_operands(struct repair_ctx *ctx, agx_block *block,
agx_instr *phi, agx_index node);
static void add_phi_operands(struct repair_ctx *ctx, bi_block *block,
bi_instr *phi, bi_index node);
static agx_index
resolve_read(struct repair_ctx *ctx, agx_block *block, agx_index node)
static bi_index
resolve_read(struct repair_ctx *ctx, bi_block *block, bi_index node)
{
struct repair_block *rb = repair_block(ctx, block);
/* Local value numbering */
assert(node.type == AGX_INDEX_NORMAL);
agx_index *local = _mesa_hash_table_u64_search(rb->defs, node.value);
assert(node.type == BI_INDEX_NORMAL);
bi_index *local = _mesa_hash_table_u64_search(rb->defs, node.value);
if (local) {
assert(!agx_is_null(*local));
assert(!bi_is_null(*local));
return *local;
}
/* Global value numbering. readValueRecursive in the paper */
unsigned nr_preds = agx_num_predecessors(block);
agx_index val;
unsigned nr_preds = bi_num_predecessors(block);
bi_index val;
assert(nr_preds > 0);
@ -83,55 +83,54 @@ resolve_read(struct repair_ctx *ctx, agx_block *block, agx_index node)
* the loop is processed.
*/
if (block->loop_header && !rb->sealed) {
val = agx_temp_like(ctx->shader, node);
agx_builder b = agx_init_builder(ctx->shader, agx_before_block(block));
agx_instr *phi = agx_phi_to(&b, val, nr_preds);
val = bi_temp_like(ctx->shader, node);
bi_builder b = bi_init_builder(ctx->shader, bi_before_block(block));
bi_instr *phi = bi_phi_to(&b, val, nr_preds);
phi->shadow = true;
/* Stash the variable in for an intrusive incompletePhis map */
phi->imm = node.value + 1;
phi->index = node.value + 1;
} else if (nr_preds == 1) {
/* No phi needed */
agx_block *pred =
*util_dynarray_element(&block->predecessors, agx_block *, 0);
bi_block *pred =
*util_dynarray_element(&block->predecessors, bi_block *, 0);
val = resolve_read(ctx, pred, node);
} else {
/* Insert phi first to break cycles */
val = agx_temp_like(ctx->shader, node);
agx_builder b = agx_init_builder(ctx->shader, agx_before_block(block));
agx_instr *phi = agx_phi_to(&b, val, nr_preds);
val = bi_temp_like(ctx->shader, node);
bi_builder b = bi_init_builder(ctx->shader, bi_before_block(block));
bi_instr *phi = bi_phi_to(&b, val, nr_preds);
phi->shadow = true;
record_write(ctx, block, node.value, val);
add_phi_operands(ctx, block, phi, node);
}
assert(!agx_is_null(val));
assert(!bi_is_null(val));
record_write(ctx, block, node.value, val);
return val;
}
static void
add_phi_operands(struct repair_ctx *ctx, agx_block *block, agx_instr *phi,
agx_index node)
add_phi_operands(struct repair_ctx *ctx, bi_block *block, bi_instr *phi,
bi_index node)
{
/* Add phi operands */
agx_foreach_predecessor(block, pred) {
unsigned s = agx_predecessor_index(block, *pred);
bi_foreach_predecessor(block, pred) {
unsigned s = bi_predecessor_index(block, *pred);
phi->src[s] = resolve_read(ctx, *pred, node);
}
}
static void
seal_block(struct repair_ctx *ctx, agx_block *block)
seal_block(struct repair_ctx *ctx, bi_block *block)
{
agx_foreach_phi_in_block(block, phi) {
/* We use phi->imm as a sideband to pass the variable name. */
if (phi->imm) {
agx_index var = agx_get_vec_index(phi->imm - 1, phi->dest[0].size,
agx_channels(phi->dest[0]));
bi_foreach_phi_in_block(block, phi) {
/* We use phi->index as a sideband to pass the variable name. */
if (phi->index) {
bi_index var = bi_get_index(phi->index - 1);
var.memory = phi->dest[0].memory;
add_phi_operands(ctx, block, phi, var);
phi->imm = 0;
phi->index = 0;
}
}
@ -139,9 +138,9 @@ seal_block(struct repair_ctx *ctx, agx_block *block)
}
static void
seal_loop_headers(struct repair_ctx *ctx, struct agx_block *exit)
seal_loop_headers(struct repair_ctx *ctx, struct bi_block *exit)
{
agx_foreach_successor(exit, succ) {
bi_foreach_successor(exit, succ) {
/* Only loop headers need to be sealed late */
if (!succ->loop_header)
continue;
@ -149,7 +148,7 @@ seal_loop_headers(struct repair_ctx *ctx, struct agx_block *exit)
/* Check if all predecessors have been processed */
bool any_unprocessed = false;
agx_foreach_predecessor(succ, P) {
bi_foreach_predecessor(succ, P) {
if ((*P)->index > exit->index) {
any_unprocessed = true;
break;
@ -163,36 +162,37 @@ seal_loop_headers(struct repair_ctx *ctx, struct agx_block *exit)
}
static void
agx_opt_trivial_phi(agx_context *ctx)
bi_opt_trivial_phi(bi_context *ctx)
{
agx_index *remap = calloc(ctx->alloc, sizeof(*remap));
bi_index *remap = calloc(ctx->ssa_alloc, sizeof(*remap));
for (;;) {
bool progress = false;
memset(remap, 0, ctx->alloc * sizeof(*remap));
memset(remap, 0, ctx->ssa_alloc * sizeof(*remap));
agx_foreach_block(ctx, block) {
agx_foreach_phi_in_block_safe(block, phi) {
agx_index same = agx_null();
bi_foreach_block(ctx, block) {
bi_foreach_phi_in_block_safe(block, phi) {
bi_index same = bi_null();
bool all_same = true;
agx_foreach_src(phi, s) {
bi_foreach_src(phi, s) {
const bi_index src = phi->src[s];
/* TODO: Handle cycles faster */
if (!agx_is_null(remap[phi->src[s].value])) {
if (bi_is_ssa(src) && !bi_is_null(remap[src.value])) {
all_same = false;
break;
}
/* Same value or self-reference */
if (agx_is_equiv(phi->src[s], same) ||
agx_is_equiv(phi->src[s], phi->dest[0]))
if (bi_is_equiv(src, same) ||
bi_is_equiv(src, phi->dest[0]))
continue;
if (!agx_is_null(same)) {
if (!bi_is_null(same)) {
all_same = false;
break;
}
same = phi->src[s];
same = src;
}
/* Only optimize trivial phis with normal sources. It is possible
@ -207,9 +207,9 @@ agx_opt_trivial_phi(agx_context *ctx)
* So skip them for correctness in case NIR misses something (which
* can happen depending on pass order).
*/
if (all_same && same.type == AGX_INDEX_NORMAL) {
if (all_same && same.type == BI_INDEX_NORMAL) {
remap[phi->dest[0].value] = same;
agx_remove_instruction(phi);
bi_remove_instruction(phi);
progress = true;
}
}
@ -218,10 +218,10 @@ agx_opt_trivial_phi(agx_context *ctx)
if (!progress)
break;
agx_foreach_instr_global(ctx, I) {
agx_foreach_ssa_src(I, s) {
if (!agx_is_null(remap[I->src[s].value])) {
agx_replace_src(I, s, remap[I->src[s].value]);
bi_foreach_instr_global(ctx, I) {
bi_foreach_ssa_src(I, s) {
if (!bi_is_null(remap[I->src[s].value])) {
bi_replace_src(I, s, remap[I->src[s].value]);
}
}
}
@ -231,45 +231,45 @@ agx_opt_trivial_phi(agx_context *ctx)
}
void
agx_repair_ssa(agx_context *ctx)
bi_repair_ssa(bi_context *ctx)
{
struct repair_block *blocks =
rzalloc_array(NULL, struct repair_block, ctx->num_blocks);
agx_foreach_block(ctx, block) {
bi_foreach_block(ctx, block) {
struct repair_block *rb = &blocks[block->index];
rb->defs = _mesa_hash_table_u64_create(blocks);
}
unsigned n = ctx->alloc;
unsigned n = ctx->ssa_alloc;
agx_foreach_block(ctx, block) {
bi_foreach_block(ctx, block) {
struct repair_ctx rctx = {
.shader = ctx,
.n = n,
.blocks = blocks,
};
agx_foreach_instr_in_block(block, I) {
bi_foreach_instr_in_block(block, I) {
/* Repair SSA for the instruction */
if (I->op != AGX_OPCODE_PHI) {
agx_foreach_ssa_src(I, s) {
if (I->op != BI_OPCODE_PHI) {
bi_foreach_ssa_src(I, s) {
assert(I->src[s].value < n);
agx_replace_src(I, s, resolve_read(&rctx, block, I->src[s]));
bi_replace_src(I, s, resolve_read(&rctx, block, I->src[s]));
}
}
agx_foreach_ssa_dest(I, d) {
bi_foreach_ssa_dest(I, d) {
unsigned handle = I->dest[d].value;
/* Skip phis that we just created when processing loops */
if (handle >= n) {
assert(I->op == AGX_OPCODE_PHI);
assert(I->op == BI_OPCODE_PHI);
continue;
}
I->dest[d] =
agx_replace_index(I->dest[d], agx_temp_like(ctx, I->dest[d]));
bi_replace_index(I->dest[d], bi_temp_like(ctx, I->dest[d]));
record_write(&rctx, block, handle, I->dest[d]);
}
@ -278,8 +278,8 @@ agx_repair_ssa(agx_context *ctx)
seal_loop_headers(&rctx, block);
}
agx_foreach_block(ctx, block) {
agx_foreach_phi_in_block(block, phi) {
bi_foreach_block(ctx, block) {
bi_foreach_phi_in_block(block, phi) {
/* The kill bit is invalid (and meaningless) for phis. Liveness
* analysis does not produce it. However, we're ingesting broken SSA
* where we can have random kill bits set on phis. Strip them as part
@ -287,10 +287,10 @@ agx_repair_ssa(agx_context *ctx)
*
* The register allocator depends on this for correctness.
*/
phi->dest[0].kill = false;
phi->dest[0].kill_ssa = false;
agx_foreach_src(phi, s) {
phi->src[s].kill = false;
bi_foreach_src(phi, s) {
phi->src[s].kill_ssa = false;
}
/* Skip the phis that we just created */
@ -299,12 +299,12 @@ agx_repair_ssa(agx_context *ctx)
continue;
}
agx_foreach_ssa_src(phi, s) {
bi_foreach_ssa_src(phi, s) {
/* Phis (uniquely) read their sources in their corresponding
* predecessors, so chain through for that.
*/
agx_block *read_block =
*util_dynarray_element(&block->predecessors, agx_block *, s);
bi_block *read_block =
*util_dynarray_element(&block->predecessors, bi_block *, s);
assert(phi->src[s].value < n);
@ -314,7 +314,7 @@ agx_repair_ssa(agx_context *ctx)
.blocks = blocks,
};
agx_replace_src(phi, s,
bi_replace_src(phi, s,
resolve_read(&rctx, read_block, phi->src[s]));
}
}
@ -322,6 +322,6 @@ agx_repair_ssa(agx_context *ctx)
ralloc_free(blocks);
agx_opt_trivial_phi(ctx);
agx_reindex_ssa(ctx);
bi_opt_trivial_phi(ctx);
bi_reindex_ssa(ctx);
}

View file

@ -1195,6 +1195,21 @@ bi_temp(bi_context *ctx)
return bi_get_index(ctx->ssa_alloc++);
}
static inline bi_index
bi_temp_like(bi_context *ctx, bi_index idx)
{
idx.value = ctx->ssa_alloc++;
/* Reset the fields which don't make sense on a destination.
* When the temp replaces a source, bi_replace_index copies these from the
* old index.
*/
idx.abs = false;
idx.neg = false;
idx.swizzle = BI_SWIZZLE_H01;
idx.discard = false;
return idx;
}
static inline bi_index
bi_def_index(nir_def *def)
{
@ -1476,6 +1491,8 @@ uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
/* SSA spilling; returns number of spilled registers */
unsigned bi_spill_ssa(bi_context *ctx, unsigned num_registers, unsigned tls_size);
void bi_repair_ssa(bi_context *ctx);
/* Reindex SSA to reduce memory usage */
void bi_reindex_ssa(bi_context *ctx);

View file

@ -27,6 +27,7 @@ libpanfrost_bifrost_files = files(
'bi_ra_ssa.c',
'bi_spill_ssa.c',
'bi_reindex_ssa.c',
'bi_repair_ssa.c',
'bi_validate.c',
'bi_dominance.c',
'bir.c',