mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
panfrost: spill registers in SSA form
Before doing register allocation, use information available from the SSA representation to determine register pressure and to spill registers. This spilling doesn't have to be perfect (the register allocator is still allowed to spill) but it will be much faster to do the SSA spilling than RA spilling. In general this should vastly improve the performance of register allocation. Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34446>
This commit is contained in:
parent
67e452669e
commit
6c64ad934f
9 changed files with 1643 additions and 45 deletions
|
|
@ -32,8 +32,13 @@ bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *I)
|
|||
bi_foreach_dest(I, d)
|
||||
BITSET_CLEAR(live, I->dest[d].value);
|
||||
|
||||
bi_foreach_ssa_src(I, s)
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
/* If the source is not live after this instruction, but becomes live
|
||||
* at this instruction, this is the use that kills the source
|
||||
*/
|
||||
I->src[s].kill_ssa = !BITSET_TEST(live, I->src[s].value);
|
||||
BITSET_SET(live, I->src[s].value);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -89,21 +94,17 @@ bi_compute_liveness_ssa(bi_context *ctx)
|
|||
memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
|
||||
|
||||
/* Kill write */
|
||||
bi_foreach_instr_in_block(blk, I) {
|
||||
if (I->op != BI_OPCODE_PHI)
|
||||
break;
|
||||
|
||||
bi_foreach_phi_in_block(blk, I) {
|
||||
BITSET_CLEAR(live, I->dest[0].value);
|
||||
}
|
||||
|
||||
/* Make live the corresponding source */
|
||||
bi_foreach_instr_in_block(blk, I) {
|
||||
if (I->op != BI_OPCODE_PHI)
|
||||
break;
|
||||
|
||||
bi_foreach_phi_in_block(blk, I) {
|
||||
bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
|
||||
if (bi_is_ssa(operand))
|
||||
if (bi_is_ssa(operand)) {
|
||||
BITSET_SET(live, operand.value);
|
||||
I->src[bi_predecessor_index(blk, *pred)].kill_ssa = false;
|
||||
}
|
||||
}
|
||||
|
||||
BITSET_WORD progress = 0;
|
||||
|
|
|
|||
|
|
@ -88,6 +88,8 @@ bi_print_index(FILE *fp, bi_index index)
|
|||
{
|
||||
if (index.discard)
|
||||
fputs("^", fp);
|
||||
if (index.kill_ssa)
|
||||
fputs("!", fp);
|
||||
|
||||
if (bi_is_null(index))
|
||||
fprintf(fp, "_");
|
||||
|
|
@ -95,6 +97,8 @@ bi_print_index(FILE *fp, bi_index index)
|
|||
fprintf(fp, "#0x%x", index.value);
|
||||
else if (index.type == BI_INDEX_FAU && index.value >= BIR_FAU_UNIFORM)
|
||||
fprintf(fp, "u%u", index.value & ~BIR_FAU_UNIFORM);
|
||||
else if (index.type == BI_INDEX_FAU && index.memory)
|
||||
fprintf(fp, "m%u", index.value);
|
||||
else if (index.type == BI_INDEX_FAU)
|
||||
fprintf(fp, "%s", bir_fau_name(index.value));
|
||||
else if (index.type == BI_INDEX_PASS)
|
||||
|
|
|
|||
|
|
@ -1,27 +1,10 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
* Copyright (C) 2020,2025 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Authors (Collabora):
|
||||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
* Eric R. Smith <eric.smith@collabora.com>
|
||||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
|
|
@ -454,7 +437,7 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
|
|||
: (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
|
||||
|
||||
/* To test spilling, mimic a small register file */
|
||||
if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend)
|
||||
if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend && (bifrost_debug & BIFROST_DBG_NOSSARA))
|
||||
default_affinity &= BITFIELD64_MASK(48) << 8;
|
||||
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
|
|
@ -550,7 +533,7 @@ bi_reg_from_index(bi_context *ctx, struct lcra_state *l, bi_index index)
|
|||
/* LCRA didn't bother solving this index (how lazy!) */
|
||||
signed solution = l->solutions[index.value];
|
||||
if (solution < 0) {
|
||||
assert(!is_offset);
|
||||
assert(0 && "no solution for index");
|
||||
return index;
|
||||
}
|
||||
|
||||
|
|
@ -632,7 +615,7 @@ bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
|
|||
}
|
||||
}
|
||||
|
||||
unsigned best_benefit = 0.0;
|
||||
unsigned best_benefit = 0;
|
||||
signed best_node = -1;
|
||||
|
||||
if (nodearray_is_sparse(&l->linear[l->spill_node])) {
|
||||
|
|
@ -706,7 +689,7 @@ bi_tls_ptr(bool hi)
|
|||
return bi_fau(BIR_FAU_TLS_PTR, hi);
|
||||
}
|
||||
|
||||
static bi_instr *
|
||||
bi_instr *
|
||||
bi_load_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
|
||||
{
|
||||
if (b->shader->arch >= 9) {
|
||||
|
|
@ -718,7 +701,7 @@ bi_load_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
bi_store_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
|
||||
{
|
||||
if (b->shader->arch >= 9) {
|
||||
|
|
@ -946,6 +929,7 @@ bi_out_of_ssa(bi_context *ctx)
|
|||
{
|
||||
bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
|
||||
unsigned first_reg = ctx->ssa_alloc;
|
||||
bool allow_propagate;
|
||||
|
||||
/* Trivially lower phis */
|
||||
bi_foreach_block(ctx, block) {
|
||||
|
|
@ -969,16 +953,28 @@ bi_out_of_ssa(bi_context *ctx)
|
|||
assert(!I->src[i].neg);
|
||||
assert(I->src[i].swizzle == BI_SWIZZLE_H01);
|
||||
|
||||
/* MOV of immediate needs lowering on Valhall */
|
||||
if (ctx->arch >= 9 && I->src[i].type == BI_INDEX_CONSTANT)
|
||||
if (I->src[i].memory)
|
||||
/* spilled register, need to un-spill */
|
||||
bi_load_tl(&b, 32, reg, I->src[i].value);
|
||||
else if (ctx->arch >= 9 && I->src[i].type == BI_INDEX_CONSTANT)
|
||||
/* MOV of immediate needs lowering on Valhall */
|
||||
bi_iadd_imm_i32_to(&b, reg, zero, I->src[i].value);
|
||||
else
|
||||
bi_mov_i32_to(&b, reg, I->src[i]);
|
||||
}
|
||||
|
||||
/* Replace the phi with a move */
|
||||
allow_propagate = false;
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
bi_mov_i32_to(&b, I->dest[0], reg);
|
||||
if (I->dest[0].memory) {
|
||||
/* dest was spilled to memory */
|
||||
bi_store_tl(&b, 32, reg, I->dest[0].value);
|
||||
allow_propagate = false;
|
||||
} else if (ctx->arch >= 9 && reg.type == BI_INDEX_CONSTANT)
|
||||
/* MOV of immediate needs lowering on Valhall */
|
||||
bi_iadd_imm_i32_to(&b, I->dest[0], zero, reg.value);
|
||||
else
|
||||
bi_mov_i32_to(&b, I->dest[0], reg);
|
||||
bi_remove_instruction(I);
|
||||
|
||||
/* Propagate that move within the block. The destination
|
||||
|
|
@ -987,7 +983,7 @@ bi_out_of_ssa(bi_context *ctx)
|
|||
* possible in the next pass.
|
||||
*/
|
||||
bi_foreach_instr_in_block_rev(block, prop) {
|
||||
if (prop->op == BI_OPCODE_PHI)
|
||||
if (prop->op == BI_OPCODE_PHI || !allow_propagate)
|
||||
break;
|
||||
|
||||
bi_foreach_src(prop, s) {
|
||||
|
|
@ -1083,14 +1079,49 @@ bi_register_allocate(bi_context *ctx)
|
|||
struct lcra_state *l = NULL;
|
||||
bool success = false;
|
||||
|
||||
unsigned iter_count = 2000; /* max iterations */
|
||||
|
||||
unsigned iter_count = 0;
|
||||
unsigned max_iters = 2000;
|
||||
/* Number of bytes of memory we've spilled into */
|
||||
unsigned spill_count = ctx->info.tls_size;
|
||||
|
||||
if (ctx->arch >= 9)
|
||||
va_lower_split_64bit(ctx);
|
||||
|
||||
/* get estimate of register demand (must be done in SSA form)
|
||||
* and do a preliminary spill; this doesn't have to be perfect,
|
||||
* since register allocation can spill too, but RA is really slow
|
||||
* so the closer we get to having enough registers free, the better
|
||||
*/
|
||||
if (!(bifrost_debug & BIFROST_DBG_NOSSARA)) {
|
||||
unsigned regs_to_use =
|
||||
((bifrost_debug & BIFROST_DBG_SPILL) && !ctx->inputs->is_blend) ? 16 : BI_MAX_REGS;
|
||||
bool verbose = bifrost_debug & BIFROST_DBG_VERBOSE;
|
||||
|
||||
bi_compute_liveness_ssa(ctx);
|
||||
if (verbose) {
|
||||
bi_print_shader(ctx, stdout);
|
||||
}
|
||||
unsigned register_demand = bi_calc_register_demand(ctx);
|
||||
if (register_demand > regs_to_use) {
|
||||
/* spill registers if we can */
|
||||
if (ctx->inputs->is_blend)
|
||||
unreachable("Blend shaders may not spill");
|
||||
|
||||
spill_count = bi_spill_ssa(ctx, regs_to_use, spill_count);
|
||||
/* By default, we use packed TLS addressing on Valhall.
|
||||
* We cannot cross 16 byte boundaries with packed TLS
|
||||
* addressing. Align to ensure this doesn't happen. This
|
||||
* could be optimized a bit.
|
||||
*/
|
||||
if (ctx->arch >= 9)
|
||||
spill_count = ALIGN_POT(spill_count, 16);
|
||||
if (verbose) {
|
||||
printf("\nspill_registers=%d\n", spill_count);
|
||||
bi_print_shader(ctx, stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Lower tied operands. SSA is broken from here on. */
|
||||
unsigned first_reg = bi_out_of_ssa(ctx);
|
||||
bi_lower_vector(ctx, first_reg);
|
||||
|
|
@ -1110,11 +1141,11 @@ bi_register_allocate(bi_context *ctx)
|
|||
}
|
||||
|
||||
/* Otherwise, use the register file and spill until we succeed */
|
||||
while (!success && ((iter_count--) > 0)) {
|
||||
while (!success && ((iter_count++) < max_iters)) {
|
||||
l = bi_allocate_registers(ctx, &success, true);
|
||||
|
||||
if (success) {
|
||||
ctx->info.work_reg_count = 64;
|
||||
ctx->info.work_reg_count = BI_MAX_REGS;
|
||||
} else {
|
||||
signed spill_node = bi_choose_spill_node(ctx, l);
|
||||
lcra_free(l);
|
||||
|
|
|
|||
147
src/panfrost/compiler/bi_ra_ssa.c
Normal file
147
src/panfrost/compiler/bi_ra_ssa.c
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Copyright 2023-2024 Alyssa Rosenzweig
|
||||
* Copyright 2023-2024 Valve Corporation
|
||||
* Copyright 2022 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "util/list.h"
|
||||
#include "util/set.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "bifrost_compile.h"
|
||||
#include "bifrost_nir.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/*
|
||||
* RA treats the nesting counter, the divergent shuffle temporary, and the
|
||||
* spiller temporaries as alive throughout if used anywhere. This could be
|
||||
* optimized. Using a single power-of-two reserved region at the start ensures
|
||||
* these registers are never shuffled.
|
||||
*/
|
||||
static unsigned
|
||||
reserved_size(bi_context *ctx)
|
||||
{
|
||||
if (ctx->has_spill_pcopy_reserved)
|
||||
return 8;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate register demand in registers, while gathering widths and
|
||||
* classes. Becuase we allocate in SSA, this calculation is exact in
|
||||
* linear-time. Depends on SSA liveness information.
|
||||
*/
|
||||
unsigned
|
||||
bi_calc_register_demand(bi_context *ctx)
|
||||
{
|
||||
/* Print detailed demand calculation, helpful to debug spilling */
|
||||
bool debug = false;
|
||||
|
||||
if (debug) {
|
||||
bi_print_shader(ctx, stdout);
|
||||
}
|
||||
|
||||
uint8_t *widths = calloc(ctx->ssa_alloc, sizeof(uint8_t));
|
||||
enum ra_class *classes = calloc(ctx->ssa_alloc, sizeof(enum ra_class));
|
||||
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_ssa_dest(I, d) {
|
||||
unsigned v = I->dest[d].value;
|
||||
assert(widths[v] == 0 && "broken SSA");
|
||||
/* Round up vectors for easier live range splitting */
|
||||
widths[v] = 1;
|
||||
classes[v] = ra_class_for_index(I->dest[d]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate demand at the start of each block based on live-in, then update
|
||||
* for each instruction processed. Calculate rolling maximum.
|
||||
*/
|
||||
unsigned max_demand = 0;
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
unsigned demand = reserved_size(ctx);
|
||||
|
||||
/* Everything live-in */
|
||||
{
|
||||
int i;
|
||||
BITSET_FOREACH_SET(i, block->ssa_live_in, ctx->ssa_alloc) {
|
||||
if (classes[i] == RA_GPR)
|
||||
demand += widths[i];
|
||||
}
|
||||
}
|
||||
|
||||
max_demand = MAX2(demand, max_demand);
|
||||
|
||||
/* To handle non-power-of-two vectors, sometimes live range splitting
|
||||
* needs extra registers for 1 instruction. This counter tracks the number
|
||||
* of registers to be freed after 1 extra instruction.
|
||||
*/
|
||||
unsigned late_kill_count = 0;
|
||||
|
||||
if (debug) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
/* Phis happen in parallel and are already accounted for in the live-in
|
||||
* set, just skip them so we don't double count.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_PHI)
|
||||
continue;
|
||||
|
||||
if (debug) {
|
||||
printf("%u: ", demand);
|
||||
bi_print_instr(I, stdout);
|
||||
}
|
||||
|
||||
/* Handle late-kill registers from last instruction */
|
||||
demand -= late_kill_count;
|
||||
late_kill_count = 0;
|
||||
|
||||
/* Kill sources the first time we see them */
|
||||
bi_foreach_src(I, s) {
|
||||
if (!I->src[s].kill_ssa)
|
||||
continue;
|
||||
assert(I->src[s].type == BI_INDEX_NORMAL);
|
||||
if (ra_class_for_index(I->src[s]) != RA_GPR)
|
||||
continue;
|
||||
|
||||
bool skip = false;
|
||||
|
||||
for (unsigned backwards = 0; backwards < s; ++backwards) {
|
||||
if (bi_is_equiv(I->src[backwards], I->src[s])) {
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!skip)
|
||||
demand -= widths[I->src[s].value];
|
||||
}
|
||||
|
||||
/* Make destinations live */
|
||||
bi_foreach_ssa_dest(I, d) {
|
||||
if (ra_class_for_index(I->dest[d]) != RA_GPR)
|
||||
continue;
|
||||
|
||||
/* Live range splits allocate at power-of-two granularity. Round up
|
||||
* destination sizes (temporarily) to powers-of-two.
|
||||
*/
|
||||
unsigned real_width = widths[I->dest[d].value];
|
||||
unsigned pot_width = util_next_power_of_two(real_width);
|
||||
|
||||
demand += pot_width;
|
||||
late_kill_count += (pot_width - real_width);
|
||||
}
|
||||
|
||||
max_demand = MAX2(demand, max_demand);
|
||||
}
|
||||
|
||||
demand -= late_kill_count;
|
||||
}
|
||||
|
||||
free(widths);
|
||||
free(classes);
|
||||
return max_demand;
|
||||
}
|
||||
1332
src/panfrost/compiler/bi_spill_ssa.c
Normal file
1332
src/panfrost/compiler/bi_spill_ssa.c
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -49,6 +49,7 @@ extern "C" {
|
|||
#define BIFROST_DBG_NOPRELOAD 0x0800
|
||||
#define BIFROST_DBG_SPILL 0x1000
|
||||
#define BIFROST_DBG_NOPSCHED 0x2000
|
||||
#define BIFROST_DBG_NOSSARA 0x4000
|
||||
|
||||
extern int bifrost_debug;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ static const struct debug_named_value bifrost_debug_options[] = {
|
|||
{"nosb", BIFROST_DBG_NOSB, "Disable scoreboarding"},
|
||||
{"nopreload", BIFROST_DBG_NOPRELOAD, "Disable message preloading"},
|
||||
{"spill", BIFROST_DBG_SPILL, "Test register spilling"},
|
||||
{"nossara", BIFROST_DBG_NOSSARA, "Disable SSA in register allocation"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
/* clang-format on */
|
||||
|
|
|
|||
|
|
@ -193,8 +193,16 @@ typedef struct {
|
|||
uint32_t offset : 3;
|
||||
enum bi_index_type type : 3;
|
||||
|
||||
/* Last use of an SSA value; similar to discard, but applies to the
|
||||
* SSA analysis and does not have any HW restrictions (discard gets
|
||||
* sent to the hardware eventually. */
|
||||
bool kill_ssa : 1;
|
||||
|
||||
/* Register class */
|
||||
bool memory : 1;
|
||||
|
||||
/* Must be zeroed so we can hash the whole 64-bits at a time */
|
||||
unsigned padding : (32 - 14);
|
||||
unsigned padding : (32 - 16);
|
||||
} bi_index;
|
||||
|
||||
static inline bi_index
|
||||
|
|
@ -207,6 +215,23 @@ bi_get_index(unsigned value)
|
|||
};
|
||||
}
|
||||
|
||||
enum ra_class {
|
||||
/* General purpose register */
|
||||
RA_GPR,
|
||||
|
||||
/* Memory, used to assign stack slots */
|
||||
RA_MEM,
|
||||
|
||||
/* Keep last */
|
||||
RA_CLASSES,
|
||||
};
|
||||
|
||||
static inline enum ra_class
|
||||
ra_class_for_index(bi_index idx)
|
||||
{
|
||||
return idx.memory ? RA_MEM : RA_GPR;
|
||||
}
|
||||
|
||||
static inline bi_index
|
||||
bi_register(unsigned reg)
|
||||
{
|
||||
|
|
@ -911,6 +936,8 @@ enum bi_idvs_mode {
|
|||
BI_IDVS_ALL = 3,
|
||||
};
|
||||
|
||||
#define BI_MAX_REGS 64
|
||||
|
||||
typedef struct {
|
||||
const struct pan_compile_inputs *inputs;
|
||||
nir_shader *nir;
|
||||
|
|
@ -950,7 +977,7 @@ typedef struct {
|
|||
/* During NIR->BIR, table of preloaded registers, or NULL if never
|
||||
* preloaded.
|
||||
*/
|
||||
bi_index preloaded[64];
|
||||
bi_index preloaded[BI_MAX_REGS];
|
||||
|
||||
/* For creating temporaries */
|
||||
unsigned ssa_alloc;
|
||||
|
|
@ -964,6 +991,15 @@ typedef struct {
|
|||
*/
|
||||
struct hash_table_u64 *allocated_vec;
|
||||
|
||||
/* Beginning of our stack allocation used for spilling, below that is
|
||||
* NIR-level scratch.
|
||||
*/
|
||||
unsigned spill_base_B;
|
||||
|
||||
/* Beginning of stack allocation used for parallel copy lowering */
|
||||
bool has_spill_pcopy_reserved;
|
||||
unsigned spill_pcopy_base;
|
||||
|
||||
/* Stats for shader-db */
|
||||
unsigned loop_count;
|
||||
unsigned spills;
|
||||
|
|
@ -1148,13 +1184,19 @@ bi_src_index(nir_src *src)
|
|||
util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
|
||||
|
||||
#define bi_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
|
||||
#define bi_foreach_src_rev(ins, v) for (signed v = ins->nr_srcs-1; v >= 0; --v)
|
||||
|
||||
#define bi_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
|
||||
#define bi_foreach_dest_rev(ins, v) for (signed v = ins->nr_dests-1; v >= 0; --v)
|
||||
|
||||
#define bi_foreach_ssa_src(ins, v) \
|
||||
bi_foreach_src(ins, v) \
|
||||
if (ins->src[v].type == BI_INDEX_NORMAL)
|
||||
|
||||
#define bi_foreach_ssa_src_rev(ins, v) \
|
||||
bi_foreach_src_rev(ins, v) \
|
||||
if (ins->src[v].type == BI_INDEX_NORMAL)
|
||||
|
||||
#define bi_foreach_ssa_dest(ins, v) \
|
||||
bi_foreach_dest(ins, v) \
|
||||
if (ins->dest[v].type == BI_INDEX_NORMAL)
|
||||
|
|
@ -1163,6 +1205,25 @@ bi_src_index(nir_src *src)
|
|||
bi_foreach_instr_in_tuple(tuple, ins) \
|
||||
bi_foreach_src(ins, s)
|
||||
|
||||
#define bi_foreach_ssa_dest_rev(ins, v) \
|
||||
bi_foreach_dest_rev(ins, v) \
|
||||
if (ins->dest[v].type == BI_INDEX_NORMAL)
|
||||
|
||||
/* Phis only come at the start (after else instructions) so we stop as soon as
|
||||
* we hit a non-phi
|
||||
*/
|
||||
#define bi_foreach_phi_in_block(block, v) \
|
||||
bi_foreach_instr_in_block(block, v) \
|
||||
if (v->op != BI_OPCODE_PHI) \
|
||||
break; \
|
||||
else
|
||||
|
||||
#define bi_foreach_phi_in_block_safe(block, v) \
|
||||
bi_foreach_instr_in_block_safe(block, v) \
|
||||
if (v->op != BI_OPCODE_PHI) \
|
||||
break; \
|
||||
else
|
||||
|
||||
/*
|
||||
* Find the index of a predecessor, used as the implicit order of phi sources.
|
||||
*/
|
||||
|
|
@ -1285,9 +1346,14 @@ bool bi_opt_constant_fold(bi_context *ctx);
|
|||
void bi_compute_liveness_ssa(bi_context *ctx);
|
||||
void bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *ins);
|
||||
|
||||
unsigned bi_calc_register_demand(bi_context *ctx);
|
||||
|
||||
void bi_postra_liveness(bi_context *ctx);
|
||||
uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
|
||||
|
||||
/* SSA spilling; returns number of spilled registers */
|
||||
unsigned bi_spill_ssa(bi_context *ctx, unsigned num_registers, unsigned tls_size);
|
||||
|
||||
/* Layout */
|
||||
|
||||
signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
|
||||
|
|
@ -1477,6 +1543,15 @@ bi_after_clause(bi_clause *clause)
|
|||
return bi_after_instr(bi_last_instr_in_clause(clause));
|
||||
}
|
||||
|
||||
/* Get a cursor at the start of a function, after any preloads */
|
||||
static inline bi_cursor
|
||||
bi_before_function(bi_context *ctx)
|
||||
{
|
||||
bi_block *block = bi_start_block(&ctx->blocks);
|
||||
|
||||
return bi_before_block(block);
|
||||
}
|
||||
|
||||
/* IR builder in terms of cursor infrastructure */
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -1490,6 +1565,10 @@ bi_init_builder(bi_context *ctx, bi_cursor cursor)
|
|||
return (bi_builder){.shader = ctx, .cursor = cursor};
|
||||
}
|
||||
|
||||
/* insert load/store for spills */
|
||||
bi_instr *bi_load_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset);
|
||||
void bi_store_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset);
|
||||
|
||||
/* Insert an instruction at the cursor and move the cursor */
|
||||
|
||||
static inline void
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ libpanfrost_bifrost_files = files(
|
|||
'bi_opt_dual_tex.c',
|
||||
'bi_pressure_schedule.c',
|
||||
'bi_ra.c',
|
||||
'bi_ra_ssa.c',
|
||||
'bi_spill_ssa.c',
|
||||
'bi_validate.c',
|
||||
'bir.c',
|
||||
'bifrost_compile.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue