diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 7eb8ca36209..0a1d937ad7d 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -335,7 +335,14 @@ typedef enum ir3_instruction_flags { * before register assignment is done: */ IR3_INSTR_MARK = BIT(15), - IR3_INSTR_UNUSED = BIT(16), + + /* Used by shared register allocation when creating spill/reload instructions + * to inform validation that this is created by RA. This also may be set on + * an instruction where a spill has been folded into it. + */ + IR3_INSTR_SHARED_SPILL = IR3_INSTR_MARK, + + IR3_INSTR_UNUSED = BIT(17), } ir3_instruction_flags; struct ir3_instruction { diff --git a/src/freedreno/ir3/ir3_merge_regs.c b/src/freedreno/ir3/ir3_merge_regs.c index 1cdaad67b0e..7de6aaa4b18 100644 --- a/src/freedreno/ir3/ir3_merge_regs.c +++ b/src/freedreno/ir3/ir3_merge_regs.c @@ -377,6 +377,8 @@ static void aggressive_coalesce_split(struct ir3_liveness *live, struct ir3_instruction *split) { + if (!(split->dsts[0]->flags & IR3_REG_SSA)) + return; try_merge_defs(live, split->srcs[0]->def, split->dsts[0], split->split.off * reg_elem_size(split->dsts[0])); } @@ -409,6 +411,10 @@ create_parallel_copy(struct ir3_block *block) if (phi->opc != OPC_META_PHI) break; + /* Avoid phis we've already colored */ + if (!(phi->dsts[0]->flags & IR3_REG_SSA)) + continue; + /* Avoid undef */ if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) && !phi->srcs[pred_idx]->def) @@ -430,6 +436,8 @@ create_parallel_copy(struct ir3_block *block) foreach_instr (phi, &succ->instr_list) { if (phi->opc != OPC_META_PHI) break; + if (!(phi->dsts[0]->flags & IR3_REG_SSA)) + continue; if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) && !phi->srcs[pred_idx]->def) continue; @@ -456,6 +464,8 @@ create_parallel_copy(struct ir3_block *block) foreach_instr (phi, &succ->instr_list) { if (phi->opc != OPC_META_PHI) break; + if (!(phi->dsts[0]->flags & IR3_REG_SSA)) + continue; if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) && !phi->srcs[pred_idx]->def) continue; diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 070f67ba7ba..3b1b424ae22 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -193,6 +193,8 @@ void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval) { + assert(interval->inserted); + if (interval->parent) { rb_tree_remove(&interval->parent->children, &interval->node); } else { @@ -684,6 +686,8 @@ ra_pop_interval(struct ra_ctx *ctx, struct ra_file *file, struct ra_interval *interval) { assert(!interval->interval.parent); + /* shared live splitting is not allowed! */ + assert(!(interval->interval.reg->flags & IR3_REG_SHARED)); /* Check if we've already moved this reg before */ unsigned pcopy_index; @@ -1665,6 +1669,9 @@ handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr) struct ir3_register *dst = instr->dsts[0]; struct ir3_register *src = instr->srcs[0]; + if (!(dst->flags & IR3_REG_SSA)) + return; + if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) { handle_normal_instr(ctx, instr); return; @@ -1683,6 +1690,9 @@ handle_split(struct ra_ctx *ctx, struct ir3_instruction *instr) static void handle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr) { + if (!(instr->dsts[0]->flags & IR3_REG_SSA)) + return; + struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set; unsigned dst_offset = instr->dsts[0]->merge_set_offset; @@ -1798,7 +1808,8 @@ handle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr) static void handle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr) { - if (instr->dsts[0]->num == INVALID_REG) + if (instr->dsts[0]->num == INVALID_REG || + !(instr->dsts[0]->flags & IR3_REG_SSA)) return; struct ra_file *file = ra_get_file(ctx, instr->dsts[0]); @@ -1829,6 +1840,9 @@ handle_input(struct ra_ctx *ctx, struct ir3_instruction *instr) static void assign_input(struct ra_ctx *ctx, struct ir3_instruction *instr) { + if (!(instr->dsts[0]->flags & IR3_REG_SSA)) + return; + struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name]; struct ra_file *file = ra_get_file(ctx, instr->dsts[0]); @@ -1973,6 +1987,9 @@ handle_live_out(struct ra_ctx *ctx, struct ir3_register *def) static void handle_phi(struct ra_ctx *ctx, struct ir3_register *def) { + if (!(def->flags & IR3_REG_SSA)) + return; + struct ra_file *file = ra_get_file(ctx, def); struct ra_interval *interval = &ctx->intervals[def->name]; @@ -1999,6 +2016,9 @@ handle_phi(struct ra_ctx *ctx, struct ir3_register *def) static void assign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi) { + if (!(phi->dsts[0]->flags & IR3_REG_SSA)) + return; + struct ra_file *file = ra_get_file(ctx, phi->dsts[0]); struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name]; assert(!interval->interval.parent); @@ -2085,15 +2105,8 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval) { physreg_t physreg = ra_interval_get_physreg(interval); - bool shared = interval->interval.reg->flags & IR3_REG_SHARED; - struct ir3_block **predecessors = - shared ? ctx->block->physical_predecessors : ctx->block->predecessors; - unsigned predecessors_count = shared - ? ctx->block->physical_predecessors_count - : ctx->block->predecessors_count; - - for (unsigned i = 0; i < predecessors_count; i++) { - struct ir3_block *pred = predecessors[i]; + for (unsigned i = 0; i < ctx->block->predecessors_count; i++) { + struct ir3_block *pred = ctx->block->predecessors[i]; struct ra_block_state *pred_state = &ctx->blocks[pred->index]; if (!pred_state->visited) @@ -2101,28 +2114,8 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval) physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg); if (pred_reg != physreg) { + assert(!(interval->interval.reg->flags & IR3_REG_SHARED)); insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg); - - /* This is a bit tricky, but when visiting the destination of a - * physical-only edge, we have two predecessors (the if and the - * header block) and both have multiple successors. We pick the - * register for all live-ins from the normal edge, which should - * guarantee that there's no need for shuffling things around in - * the normal predecessor as long as there are no phi nodes, but - * we still may need to insert fixup code in the physical - * predecessor (i.e. the last block of the if) and that has - * another successor (the block after the if) so we need to update - * the renames state for when we process the other successor. This - * crucially depends on the other successor getting processed - * after this. - * - * For normal (non-physical) edges we disallow critical edges so - * that hacks like this aren't necessary. - */ - if (!pred_state->renames) - pred_state->renames = _mesa_pointer_hash_table_create(ctx); - _mesa_hash_table_insert(pred_state->renames, interval->interval.reg, - (void *)(uintptr_t)physreg); } } } @@ -2561,6 +2554,18 @@ ir3_ra(struct ir3_shader_variant *v) ir3_merge_regs(live, v->ir); + bool has_shared_vectors = false; + foreach_block (block, &v->ir->block_list) { + foreach_instr (instr, &block->instr_list) { + ra_foreach_dst (dst, instr) { + if ((dst->flags & IR3_REG_SHARED) && reg_elems(dst) > 1) { + has_shared_vectors = true; + break; + } + } + } + } + struct ir3_pressure max_pressure; ir3_calc_pressure(v, live, &max_pressure); d("max pressure:"); @@ -2590,10 +2595,17 @@ ir3_ra(struct ir3_shader_variant *v) if (ir3_shader_debug & IR3_DBG_SPILLALL) calc_min_limit_pressure(v, live, &limit_pressure); - if (max_pressure.shared > limit_pressure.shared) { - /* TODO shared reg -> normal reg spilling */ - d("shared max pressure exceeded!"); - goto fail; + if (max_pressure.shared > limit_pressure.shared || has_shared_vectors) { + ir3_ra_shared(v, live); + + /* Recalculate liveness and register pressure now that additional values + * have been added. + */ + ralloc_free(live); + live = ir3_calc_liveness(ctx, v->ir); + ir3_calc_pressure(v, live, &max_pressure); + + ir3_debug_print(v->ir, "AFTER: shared register allocation"); } bool spilled = false; @@ -2629,7 +2641,7 @@ ir3_ra(struct ir3_shader_variant *v) foreach_block (block, &v->ir->block_list) handle_block(ctx, block); - ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count); + ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count, false); /* Strip array-ness and SSA-ness at the end, because various helpers still * need to work even on definitions that have already been assigned. For diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index c6837aaae21..1c561a57d76 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -168,8 +168,10 @@ bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, bool ir3_lower_spill(struct ir3 *ir); +void ir3_ra_shared(struct ir3_shader_variant *v, struct ir3_liveness *live); + void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, - unsigned half_size, unsigned block_count); + unsigned half_size, unsigned block_count, bool shared_ra); void ir3_lower_copies(struct ir3_shader_variant *v); diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c index aab26760ab9..3d19e2b7431 100644 --- a/src/freedreno/ir3/ir3_ra_validate.c +++ b/src/freedreno/ir3/ir3_ra_validate.c @@ -92,13 +92,25 @@ struct reaching_state { struct ra_val_ctx { struct ir3_instruction *current_instr; + /* The current state of the dataflow analysis for the instruction we're + * processing. + */ struct reaching_state reaching; + + /* The state at the end of each basic block. */ struct reaching_state *block_reaching; unsigned block_count; + /* When validating shared RA, we have to take spill/reload instructions into + * account. This saves an array of reg_state for the source of each spill + * instruction, to be restored at the corresponding reload(s). + */ + struct hash_table *spill_reaching; + unsigned full_size, half_size; bool merged_regs; + bool shared_ra; bool failed; }; @@ -130,6 +142,28 @@ get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg) return ctx->half_size; } +static struct reg_state * +get_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst) +{ + struct hash_entry *entry = _mesa_hash_table_search(ctx->spill_reaching, dst); + if (entry) + return entry->data; + else + return NULL; +} + +static struct reg_state * +get_or_create_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst) +{ + struct reg_state *state = get_spill_state(ctx, dst); + if (state) + return state; + + state = rzalloc_array(ctx, struct reg_state, reg_size(dst)); + _mesa_hash_table_insert(ctx->spill_reaching, dst, state); + return state; +} + /* Validate simple things, like the registers being in-bounds. This way we * don't have to worry about out-of-bounds accesses later. */ @@ -139,6 +173,8 @@ validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { ctx->current_instr = instr; ra_foreach_dst (dst, instr) { + if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) + continue; unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst); validate_assert(ctx, dst_max <= get_file_size(ctx, dst)); if (dst->tied) @@ -146,6 +182,8 @@ validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) } ra_foreach_src (src, instr) { + if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED)) + continue; unsigned src_max = ra_reg_get_physreg(src) + reg_size(src); validate_assert(ctx, src_max <= get_file_size(ctx, src)); } @@ -219,6 +257,24 @@ static void propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { ra_foreach_dst (dst, instr) { + /* Process destinations from scalar ALU instructions that were demoted to + * normal ALU instructions. For these we must treat the instruction as a + * spill of itself and set the propagate state to itself. See + * try_demote_instructions(). + */ + if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) { + if (instr->flags & IR3_INSTR_SHARED_SPILL) { + struct reg_state *state = get_or_create_spill_state(ctx, dst); + for (unsigned i = 0; i < reg_size(dst); i++) { + state[i] = (struct reg_state){ + .def = dst, + .offset = i, + }; + } + } + continue; + } + struct file_state *file = ra_val_get_file(ctx, dst); physreg_t physreg = ra_reg_get_physreg(dst); for (unsigned i = 0; i < reg_size(dst); i++) { @@ -239,6 +295,16 @@ propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split) physreg_t src_physreg = ra_reg_get_physreg(src); struct file_state *file = ra_val_get_file(ctx, dst); + if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) { + struct reg_state *src_state = get_spill_state(ctx, src->def); + if (src_state) { + struct reg_state *dst_state = get_or_create_spill_state(ctx, dst); + memcpy(dst_state, &src_state[split->split.off * reg_elem_size(src)], + reg_size(dst) * sizeof(struct reg_state)); + } + return; + } + unsigned offset = split->split.off * reg_elem_size(src); for (unsigned i = 0; i < reg_elem_size(src); i++) { file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i]; @@ -249,30 +315,50 @@ static void propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect) { struct ir3_register *dst = collect->dsts[0]; - physreg_t dst_physreg = ra_reg_get_physreg(dst); - struct file_state *file = ra_val_get_file(ctx, dst); - unsigned size = reg_size(dst); - struct reg_state srcs[size]; - for (unsigned i = 0; i < collect->srcs_count; i++) { - struct ir3_register *src = collect->srcs[i]; - unsigned dst_offset = i * reg_elem_size(dst); - for (unsigned j = 0; j < reg_elem_size(dst); j++) { - if (!ra_reg_is_src(src)) { - srcs[dst_offset + j] = (struct reg_state){ - .def = dst, - .offset = dst_offset + j, - }; - } else { - physreg_t src_physreg = ra_reg_get_physreg(src); - srcs[dst_offset + j] = file->regs[src_physreg + j]; + if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) { + struct reg_state *dst_state = NULL; + + for (unsigned i = 0; i < collect->srcs_count; i++) { + struct ir3_register *src = collect->srcs[i]; + unsigned dst_offset = i * reg_elem_size(dst); + + if (ra_reg_is_src(src)) { + struct reg_state *src_state = get_spill_state(ctx, src->def); + if (src_state) { + if (!dst_state) + dst_state = get_or_create_spill_state(ctx, dst); + memcpy(&dst_state[dst_offset], src_state, + reg_size(src) * sizeof(struct reg_state)); + } } } - } + } else { + struct file_state *file = ra_val_get_file(ctx, dst); + physreg_t dst_physreg = ra_reg_get_physreg(dst); + struct reg_state srcs[size]; - for (unsigned i = 0; i < size; i++) - file->regs[dst_physreg + i] = srcs[i]; + for (unsigned i = 0; i < collect->srcs_count; i++) { + struct ir3_register *src = collect->srcs[i]; + unsigned dst_offset = i * reg_elem_size(dst); + + for (unsigned j = 0; j < reg_elem_size(dst); j++) { + if (!ra_reg_is_src(src)) { + srcs[dst_offset + j] = (struct reg_state){ + .def = dst, + .offset = dst_offset + j, + }; + } else { + physreg_t src_physreg = ra_reg_get_physreg(src); + srcs[dst_offset + j] = file->regs[src_physreg + j]; + } + } + } + + for (unsigned i = 0; i < size; i++) + file->regs[dst_physreg + i] = srcs[i]; + } } static void @@ -291,15 +377,25 @@ propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) struct ir3_register *src = pcopy->srcs[i]; struct file_state *file = ra_val_get_file(ctx, dst); - for (unsigned j = 0; j < reg_size(dst); j++) { - if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { - srcs[offset + j] = (struct reg_state){ - .def = dst, - .offset = j, - }; - } else { - physreg_t src_physreg = ra_reg_get_physreg(src); - srcs[offset + j] = file->regs[src_physreg + j]; + if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) { + if (ra_reg_is_src(src)) { + struct reg_state *src_state = get_spill_state(ctx, src->def); + if (src_state) { + struct reg_state *dst_state = get_or_create_spill_state(ctx, dst); + memcpy(dst_state, src_state, reg_size(dst) * sizeof(struct reg_state)); + } + } + } else { + for (unsigned j = 0; j < reg_size(dst); j++) { + if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { + srcs[offset + j] = (struct reg_state){ + .def = dst, + .offset = j, + }; + } else { + physreg_t src_physreg = ra_reg_get_physreg(src); + srcs[offset + j] = file->regs[src_physreg + j]; + } } } @@ -310,6 +406,12 @@ propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) offset = 0; for (unsigned i = 0; i < pcopy->dsts_count; i++) { struct ir3_register *dst = pcopy->dsts[i]; + + if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) { + offset += reg_size(dst); + continue; + } + physreg_t dst_physreg = ra_reg_get_physreg(dst); struct file_state *file = ra_val_get_file(ctx, dst); @@ -321,6 +423,23 @@ propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) assert(offset == size); } +static void +propagate_spill(struct ra_val_ctx *ctx, struct ir3_instruction *instr) +{ + if (instr->srcs[0]->flags & IR3_REG_SHARED) { /* spill */ + struct reg_state *state = get_or_create_spill_state(ctx, instr->dsts[0]); + physreg_t src_physreg = ra_reg_get_physreg(instr->srcs[0]); + memcpy(state, &ctx->reaching.shared.regs[src_physreg], + reg_size(instr->srcs[0]) * sizeof(struct reg_state)); + } else { /* reload */ + struct reg_state *state = get_spill_state(ctx, instr->srcs[0]->def); + assert(state); + physreg_t dst_physreg = ra_reg_get_physreg(instr->dsts[0]); + memcpy(&ctx->reaching.shared.regs[dst_physreg], state, + reg_size(instr->dsts[0]) * sizeof(struct reg_state)); + } +} + static void propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { @@ -330,6 +449,13 @@ propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) propagate_collect(ctx, instr); else if (instr->opc == OPC_META_PARALLEL_COPY) propagate_parallelcopy(ctx, instr); + else if (ctx->shared_ra && instr->opc == OPC_MOV && + /* Moves from immed/const with IR3_INSTR_SHARED_SPILL were demoted + * from scalar ALU, see try_demote_instruction(). + */ + !(instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)) && + (instr->flags & IR3_INSTR_SHARED_SPILL)) + propagate_spill(ctx, instr); else propagate_normal_instr(ctx, instr); } @@ -439,6 +565,8 @@ static void check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *src) { + if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED)) + return; struct file_state *file = ra_val_get_file(ctx, src); physreg_t physreg = ra_reg_get_physreg(src); for (unsigned i = 0; i < reg_size(src); i++) { @@ -541,7 +669,7 @@ check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir) void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, - unsigned half_size, unsigned block_count) + unsigned half_size, unsigned block_count, bool shared_ra) { #ifdef NDEBUG #define VALIDATE 0 @@ -557,6 +685,9 @@ ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, ctx->full_size = full_size; ctx->half_size = half_size; ctx->block_count = block_count; + ctx->shared_ra = shared_ra; + if (ctx->shared_ra) + ctx->spill_reaching = _mesa_pointer_hash_table_create(ctx); foreach_block (block, &v->ir->block_list) { foreach_instr (instr, &block->instr_list) { diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c new file mode 100644 index 00000000000..d00198b2a3c --- /dev/null +++ b/src/freedreno/ir3/ir3_shared_ra.c @@ -0,0 +1,1415 @@ +/* + * Copyright (C) 2021 Valve Corporation + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir3_ra.h" +#include "ir3_shader.h" + +#include "util/u_math.h" + +/* Allocating shared registers can pose a challenge, because their live + * intervals use the physical CFG which has extra edges inserted that are + * pretty much always critical edges. This causes problems with phi nodes, + * because copies for phi nodes have to happen "along the edge," and similarly + * causes problems when reunifying values that have had their live range split. + * Problematic phi nodes should be relatively rare, so we ban them for now. + * The solution we choose for live-range splitting is to integrate spilling and + * register allcoation and spill to vector registers rather than split a live + * range, which negates some of the advantages of SSA-based RA, but it isn't as + * bad as it seems because the conditions needed (vector shared registers, which + * only movmsk currently produces, or fixed registers which we don't do) are + * relatively rare. Spilling is also much cheaper than spilling vector registers + * to private memory. + */ + +struct ra_interval { + struct ir3_reg_interval interval; + + struct rb_node physreg_node; + physreg_t physreg_start, physreg_end; + + /* Where the shared register is spilled to. If there were no uses when it's + * spilled it could be the original defining instruction. + */ + struct ir3_register *spill_def; + + /* Whether this contains a source of the current instruction that can't be + * spilled. + */ + bool src; + + bool needs_reload; +}; + +struct ra_block_state { + bool visited; + + /* For blocks whose successors are visited first (i.e. loop backedges), which + * values should be live at the end. + */ + BITSET_WORD *live_out; +}; + +struct ra_ctx { + struct ir3_reg_ctx reg_ctx; + + BITSET_DECLARE(available, RA_MAX_FILE_SIZE); + + struct rb_tree physreg_intervals; + + struct ra_interval *intervals; + + struct ir3_liveness *live; + + struct hash_table *pcopy_src_map; + + struct ra_block_state *blocks; + + unsigned start; +}; + +static struct ra_interval * +ir3_reg_interval_to_ra_interval(struct ir3_reg_interval *interval) +{ + return rb_node_data(struct ra_interval, interval, interval); +} + +static struct ra_interval * +rb_node_to_interval(struct rb_node *node) +{ + return rb_node_data(struct ra_interval, node, physreg_node); +} + +static const struct ra_interval * +rb_node_to_interval_const(const struct rb_node *node) +{ + return rb_node_data(struct ra_interval, node, physreg_node); +} + +static struct ra_interval * +ra_interval_next(struct ra_interval *interval) +{ + struct rb_node *next = rb_node_next(&interval->physreg_node); + return next ? rb_node_to_interval(next) : NULL; +} + +static struct ra_interval * +ra_interval_next_or_null(struct ra_interval *interval) +{ + return interval ? ra_interval_next(interval) : NULL; +} + +static int +ra_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b) +{ + const struct ra_interval *a = rb_node_to_interval_const(_a); + const struct ra_interval *b = rb_node_to_interval_const(_b); + return b->physreg_start - a->physreg_start; +} + +static int +ra_interval_cmp(const struct rb_node *node, const void *data) +{ + physreg_t reg = *(const physreg_t *)data; + const struct ra_interval *interval = rb_node_to_interval_const(node); + if (interval->physreg_start > reg) + return -1; + else if (interval->physreg_end <= reg) + return 1; + else + return 0; +} + +static struct ra_ctx * +ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx) +{ + return rb_node_data(struct ra_ctx, ctx, reg_ctx); +} + +static struct ra_interval * +ra_interval_search_sloppy(struct rb_tree *tree, physreg_t reg) +{ + struct rb_node *node = rb_tree_search_sloppy(tree, ®, ra_interval_cmp); + return node ? rb_node_to_interval(node) : NULL; +} + +/* Get the interval covering the reg, or the closest to the right if it + * doesn't exist. + */ +static struct ra_interval * +ra_interval_search_right(struct rb_tree *tree, physreg_t reg) +{ + struct ra_interval *interval = ra_interval_search_sloppy(tree, reg); + if (!interval) { + return NULL; + } else if (interval->physreg_end > reg) { + return interval; + } else { + /* There is no interval covering reg, and ra_file_search_sloppy() + * returned the closest range to the left, so the next interval to the + * right should be the closest to the right. + */ + return ra_interval_next_or_null(interval); + } +} + +static struct ra_interval * +ra_ctx_search_right(struct ra_ctx *ctx, physreg_t reg) +{ + return ra_interval_search_right(&ctx->physreg_intervals, reg); +} + +static void +interval_add(struct ir3_reg_ctx *reg_ctx, struct ir3_reg_interval *_interval) +{ + struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval); + struct ra_ctx *ctx = ir3_reg_ctx_to_ctx(reg_ctx); + + /* We can assume in this case that physreg_start/physreg_end is already + * initialized. + */ + for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) { + BITSET_CLEAR(ctx->available, i); + } + + rb_tree_insert(&ctx->physreg_intervals, &interval->physreg_node, + ra_interval_insert_cmp); +} + +static void +interval_delete(struct ir3_reg_ctx *reg_ctx, struct ir3_reg_interval *_interval) +{ + struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval); + struct ra_ctx *ctx = ir3_reg_ctx_to_ctx(reg_ctx); + + for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) { + BITSET_SET(ctx->available, i); + } + + rb_tree_remove(&ctx->physreg_intervals, &interval->physreg_node); +} + +static void +interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_parent, + struct ir3_reg_interval *_child) +{ + struct ra_interval *parent = ir3_reg_interval_to_ra_interval(_parent); + struct ra_interval *child = ir3_reg_interval_to_ra_interval(_child); + + child->physreg_start = + parent->physreg_start + (child->interval.reg->interval_start - + parent->interval.reg->interval_start); + child->physreg_end = + child->physreg_start + + (child->interval.reg->interval_end - child->interval.reg->interval_start); + + interval_add(ctx, _child); +} + +static void +ra_ctx_init(struct ra_ctx *ctx) +{ + ctx->reg_ctx.interval_add = interval_add; + ctx->reg_ctx.interval_delete = interval_delete; + ctx->reg_ctx.interval_readd = interval_readd; +} + +static void +ra_ctx_reset_block(struct ra_ctx *ctx) +{ + for (unsigned i = 0; i < RA_SHARED_SIZE; i++) { + BITSET_SET(ctx->available, i); + } + + rb_tree_init(&ctx->reg_ctx.intervals); + rb_tree_init(&ctx->physreg_intervals); +} + +static void +ra_interval_init(struct ra_interval *interval, struct ir3_register *reg) +{ + ir3_reg_interval_init(&interval->interval, reg); +} + +static physreg_t +ra_interval_get_physreg(const struct ra_interval *interval) +{ + unsigned child_start = interval->interval.reg->interval_start; + + while (interval->interval.parent) { + interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); + } + + return interval->physreg_start + + (child_start - interval->interval.reg->interval_start); +} + +static unsigned +ra_interval_get_num(const struct ra_interval *interval) +{ + return ra_physreg_to_num(ra_interval_get_physreg(interval), + interval->interval.reg->flags); +} + +static void +ra_interval_dump(struct log_stream *stream, struct ra_interval *interval) +{ + mesa_log_stream_printf(stream, "physreg %u ", interval->physreg_start); + + ir3_reg_interval_dump(stream, &interval->interval); +} + +static void +ra_ctx_dump(struct ra_ctx *ctx) +{ + struct log_stream *stream = mesa_log_streami(); + + mesa_log_stream_printf(stream, "shared:\n"); + rb_tree_foreach (struct ra_interval, interval, &ctx->physreg_intervals, + physreg_node) { + ra_interval_dump(stream, interval); + } + + unsigned start, end; + mesa_log_stream_printf(stream, "available:\n"); + BITSET_FOREACH_RANGE (start, end, ctx->available, RA_SHARED_SIZE) { + mesa_log_stream_printf(stream, "%u-%u ", start, end); + } + mesa_log_stream_printf(stream, "\n"); + mesa_log_stream_printf(stream, "start: %u\n", ctx->start); +} + +static bool +get_reg_specified(struct ra_ctx *ctx, struct ir3_register *reg, physreg_t physreg) +{ + for (unsigned i = 0; i < reg_size(reg); i++) { + if (!BITSET_TEST(ctx->available, physreg + i)) + return false; + } + + return true; +} + +static unsigned +reg_file_size(struct ir3_register *reg) +{ + return RA_SHARED_SIZE; +} + +static physreg_t +find_best_gap(struct ra_ctx *ctx, struct ir3_register *dst, unsigned size, + unsigned align) +{ + unsigned file_size = reg_file_size(dst); + + /* This can happen if we create a very large merge set. Just bail out in that + * case. + */ + if (size > file_size) + return (physreg_t) ~0; + + unsigned start = ALIGN(ctx->start, align) % (file_size - size + align); + unsigned candidate = start; + do { + bool is_available = true; + for (unsigned i = 0; i < size; i++) { + if (!BITSET_TEST(ctx->available, candidate + i)) { + is_available = false; + break; + } + } + + if (is_available) { + ctx->start = (candidate + size) % file_size; + return candidate; + } + + candidate += align; + if (candidate + size > file_size) + candidate = 0; + } while (candidate != start); + + return (physreg_t)~0; +} + +static physreg_t +find_best_spill_reg(struct ra_ctx *ctx, struct ir3_register *reg, + unsigned size, unsigned align) +{ + unsigned file_size = reg_file_size(reg); + unsigned min_cost = UINT_MAX; + + unsigned start = ALIGN(ctx->start, align) % (file_size - size + align); + physreg_t candidate = start; + physreg_t best_reg = (physreg_t)~0; + do { + unsigned cost = 0; + + /* Iterate through intervals we'd need to spill to use this reg. */ + for (struct ra_interval *interval = ra_ctx_search_right(ctx, candidate); + interval && interval->physreg_start < candidate + size; + interval = ra_interval_next_or_null(interval)) { + /* We can't spill sources of the current instruction when reloading + * sources. + */ + if (interval->src) { + cost = UINT_MAX; + break; + } + + /* We prefer spilling intervals that already have been spilled, so we + * don't have to emit another mov. + */ + if (!interval->spill_def) + cost += (interval->physreg_end - interval->physreg_start); + } + + if (cost < min_cost) { + min_cost = cost; + best_reg = candidate; + } + + candidate += align; + if (candidate + size > file_size) + candidate = 0; + } while (candidate != start); + + return best_reg; +} + +static struct ir3_register * +split(struct ir3_register *def, unsigned offset, struct ir3_instruction *before) +{ + if (reg_elems(def) == 1) { + assert(offset == 0); + return def; + } + + struct ir3_instruction *split = + ir3_instr_create(before->block, OPC_META_SPLIT, 1, 1); + split->split.off = offset; + struct ir3_register *dst = __ssa_dst(split); + struct ir3_register *src = + ir3_src_create(split, INVALID_REG, def->flags & (IR3_REG_HALF | IR3_REG_SSA)); + src->wrmask = def->wrmask; + src->def = def; + ir3_instr_move_after(split, before); + return dst; +} + +static struct ir3_register * +extract(struct ir3_register *parent_def, unsigned offset, unsigned elems, + struct ir3_instruction *before) +{ + if (offset == 0 && elems == reg_elems(parent_def)) + return parent_def; + + if (elems == 1) + return split(parent_def, offset, before); + + struct ir3_instruction *collect = + ir3_instr_create(before->block, OPC_META_COLLECT, 1, elems); + struct ir3_register *dst = __ssa_dst(collect); + dst->flags |= parent_def->flags & IR3_REG_HALF; + dst->wrmask = MASK(elems); + + ir3_instr_move_after(collect, before); + + for (unsigned i = 0; i < elems; i++) { + ir3_src_create(collect, INVALID_REG, + parent_def->flags & (IR3_REG_HALF | IR3_REG_SSA))->def = + split(parent_def, offset + i, before); + } + + return dst; +} + +static void +spill_interval_children(struct ra_interval *interval, + struct ir3_instruction *before) +{ + rb_tree_foreach (struct ra_interval, child, &interval->interval.children, + interval.node) { + if (!child->spill_def) { + child->spill_def = extract(interval->spill_def, + (child->interval.reg->interval_start - + interval->interval.reg->interval_start) / + reg_elem_size(interval->interval.reg), + reg_elems(child->interval.reg), before); + } + spill_interval_children(child, before); + } +} + +static void +spill_interval(struct ra_ctx *ctx, struct ra_interval *interval) +{ + struct ir3_instruction *before = interval->interval.reg->instr; + + d("spilling ssa_%u:%u", before->serialno, interval->interval.reg->name); + + if (!interval->spill_def) { + /* If this is a phi node or input, we need to insert the demotion to a + * regular register after the last phi or input in the block. + */ + if (before->opc == OPC_META_PHI || + before->opc == OPC_META_INPUT) { + struct ir3_block *block = before->block; + struct ir3_instruction *last_phi_input = NULL; + foreach_instr_from (instr, before, &block->instr_list) { + if (instr->opc != before->opc) + break; + last_phi_input = instr; + } + before = last_phi_input; + } + + struct ir3_instruction *mov = ir3_instr_create(before->block, OPC_MOV, 1, 1); + mov->flags |= IR3_INSTR_SHARED_SPILL; + struct ir3_register *dst = __ssa_dst(mov); + dst->flags |= (interval->interval.reg->flags & IR3_REG_HALF); + dst->wrmask = interval->interval.reg->wrmask; + mov->repeat = reg_elems(dst) - 1; + ir3_src_create(mov, interval->interval.reg->num, + IR3_REG_SHARED | (mov->repeat ? IR3_REG_R : 0) | + (interval->interval.reg->flags & IR3_REG_HALF))->wrmask = + interval->interval.reg->wrmask; + mov->cat1.src_type = mov->cat1.dst_type = + (interval->interval.reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; + + ir3_instr_move_after(mov, before); + interval->spill_def = dst; + } + + spill_interval_children(interval, interval->spill_def->instr); + + ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); +} + +/* Try to demote a scalar ALU instruction to a normal ALU instruction, using the + * spilled sources. We have to take into account restrictions on the number of + * shared sources that only exist for normal ALU instructions. + */ +static bool +try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr) +{ + /* First, check restrictions. */ + switch (opc_cat(instr->opc)) { + case 1: + if (!(instr->srcs[0]->flags & (IR3_REG_CONST | IR3_REG_IMMED))) + return false; + break; + case 2: { + /* We need one source to either be demotable or an immediate. */ + if (instr->srcs_count > 1) { + struct ra_interval *src0_interval = + (instr->srcs[0]->flags & IR3_REG_SSA) ? &ctx->intervals[instr->srcs[0]->def->name] : NULL; + struct ra_interval *src1_interval = + (instr->srcs[0]->flags & IR3_REG_SSA) ? &ctx->intervals[instr->srcs[0]->def->name] : NULL; + if (!(src0_interval && src0_interval->spill_def) && + !(src1_interval && src1_interval->spill_def) && + !(instr->srcs[0]->flags & IR3_REG_IMMED) && + !(instr->srcs[1]->flags & IR3_REG_IMMED)) + return false; + } + break; + } + case 3: { + struct ra_interval *src0_interval = + (instr->srcs[0]->flags & IR3_REG_SSA) ? &ctx->intervals[instr->srcs[0]->def->name] : NULL; + struct ra_interval *src1_interval = + (instr->srcs[1]->flags & IR3_REG_SSA) ? &ctx->intervals[instr->srcs[1]->def->name] : NULL; + + /* src1 cannot be shared */ + if (src1_interval && !src1_interval->spill_def) { + /* Try to swap src0 and src1, similar to what copy prop does. */ + if (!is_mad(instr->opc)) + return false; + + if ((src0_interval && src0_interval->spill_def) || + (instr->srcs[0]->flags & IR3_REG_IMMED)) { + struct ir3_register *src0 = instr->srcs[0]; + instr->srcs[0] = instr->srcs[1]; + instr->srcs[1] = src0; + } else { + return false; + } + } + break; + } + case 4: { + assert(instr->srcs[0]->flags & IR3_REG_SSA); + struct ra_interval *src_interval = &ctx->intervals[instr->srcs[0]->def->name]; + if (!src_interval->spill_def) + return false; + break; + } + + default: + return false; + } + + d("demoting instruction"); + + /* If the instruction is already not a scalar ALU instruction, we should've + * skipped reloading and just demoted sources directly, so we should never + * get here. + */ + assert(instr->dsts[0]->flags & IR3_REG_SHARED); + + /* Now we actually demote the instruction */ + ra_foreach_src (src, instr) { + assert(src->flags & IR3_REG_SHARED); + struct ra_interval *interval = &ctx->intervals[src->def->name]; + if (interval->spill_def) { + src->def = interval->spill_def; + src->flags &= ~IR3_REG_SHARED; + interval->needs_reload = false; + if (interval->interval.inserted) + ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); + while (interval->interval.parent) + interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); + interval->src = false; + } + } + + struct ra_interval *dst_interval = &ctx->intervals[instr->dsts[0]->name]; + instr->dsts[0]->flags &= ~IR3_REG_SHARED; + ra_interval_init(dst_interval, instr->dsts[0]); + dst_interval->spill_def = instr->dsts[0]; + + instr->flags |= IR3_INSTR_SHARED_SPILL; + + return true; +} + +/* Free up [start, start + size) by spilling live intervals. + */ +static void +free_space(struct ra_ctx *ctx, physreg_t start, unsigned size) +{ + struct ra_interval *interval = ra_ctx_search_right(ctx, start); + while (interval && interval->physreg_start < start + size) { + struct ra_interval *next = ra_interval_next_or_null(interval); + spill_interval(ctx, interval); + interval = next; + } +} + +static physreg_t +get_reg(struct ra_ctx *ctx, struct ir3_register *reg, bool src) +{ + if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) { + physreg_t preferred_reg = + reg->merge_set->preferred_reg + reg->merge_set_offset; + if (preferred_reg < reg_file_size(reg) && + preferred_reg % reg_elem_size(reg) == 0 && + get_reg_specified(ctx, reg, preferred_reg)) + return preferred_reg; + } + + /* If this register is a subset of a merge set which we have not picked a + * register for, first try to allocate enough space for the entire merge + * set. + */ + unsigned size = reg_size(reg); + if (reg->merge_set && reg->merge_set->preferred_reg == (physreg_t)~0 && + size < reg->merge_set->size) { + physreg_t best_reg = find_best_gap(ctx, reg, reg->merge_set->size, + reg->merge_set->alignment); + if (best_reg != (physreg_t)~0u) { + best_reg += reg->merge_set_offset; + return best_reg; + } + } + + /* For ALU and SFU instructions, if the src reg is avail to pick, use it. + * Because this doesn't introduce unnecessary dependencies, and it + * potentially avoids needing (ss) syncs for write after read hazards for + * SFU instructions: + */ + if (!src && (is_sfu(reg->instr) || is_alu(reg->instr))) { + for (unsigned i = 0; i < reg->instr->srcs_count; i++) { + struct ir3_register *src = reg->instr->srcs[i]; + if (!ra_reg_is_src(src)) + continue; + if ((src->flags & IR3_REG_SHARED) && reg_size(src) >= size) { + struct ra_interval *src_interval = &ctx->intervals[src->def->name]; + physreg_t src_physreg = ra_interval_get_physreg(src_interval); + if (src_physreg % reg_elem_size(reg) == 0 && + src_physreg + size <= reg_file_size(reg) && + get_reg_specified(ctx, reg, src_physreg)) + return src_physreg; + } + } + } + + return find_best_gap(ctx, reg, size, reg_elem_size(reg)); +} + +/* The reload process is split in two, first we allocate a register to reload to + * for all sources that need a reload and then we actually execute the reload. + * This is to allow us to demote shared ALU instructions to non-shared whenever + * we would otherwise need to spill to reload, without leaving dangling unused + * reload mov's from previously processed sources. So, for example, we could + * need to reload both sources of an add, but after reloading the first source + * we realize that we would need to spill to reload the second source and we + * should demote the add instead, which means cancelling the first reload. + */ +static void +reload_src(struct ra_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *src) +{ + struct ir3_register *reg = src->def; + struct ra_interval *interval = &ctx->intervals[reg->name]; + unsigned size = reg_size(reg); + + physreg_t best_reg = get_reg(ctx, reg, true); + + if (best_reg == (physreg_t)~0u) { + if (try_demote_instruction(ctx, instr)) + return; + + best_reg = find_best_spill_reg(ctx, reg, size, reg_elem_size(reg)); + assert(best_reg != (physreg_t)~0u); + + free_space(ctx, best_reg, size); + } + + d("reload src %u physreg %u", reg->name, best_reg); + interval->physreg_start = best_reg; + interval->physreg_end = best_reg + size; + interval->needs_reload = true; + ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); + interval->src = true; +} + +static void +reload_interval(struct ra_ctx *ctx, struct ir3_instruction *instr, + struct ir3_block *block, struct ra_interval *interval) +{ + struct ir3_register *def = interval->interval.reg; + struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); + mov->flags |= IR3_INSTR_SHARED_SPILL; + unsigned flags = IR3_REG_SHARED | (def->flags & IR3_REG_HALF); + ir3_dst_create(mov, ra_physreg_to_num(interval->physreg_start, flags), + flags)->wrmask = def->wrmask; + mov->repeat = reg_elems(def) - 1; + struct ir3_register *mov_src = + ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | (def->flags & IR3_REG_HALF) | + (mov->repeat ? IR3_REG_R : 0)); + assert(interval->spill_def); + mov_src->def = interval->spill_def; + mov_src->wrmask = def->wrmask; + mov->cat1.src_type = mov->cat1.dst_type = + (def->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; + + if (instr) + ir3_instr_move_before(mov, instr); +} + +static void +reload_src_finalize(struct ra_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *src) +{ + struct ir3_register *reg = src->def; + struct ra_interval *interval = &ctx->intervals[reg->name]; + + if (!interval->needs_reload) + return; + + reload_interval(ctx, instr, instr->block, interval); + + interval->needs_reload = false; +} + +static bool +can_demote_src(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_SCAN_MACRO: + case OPC_META_COLLECT: + return false; + case OPC_MOV: + /* non-shared -> shared floating-point conversions don't work */ + return (!(instr->dsts[0]->flags & IR3_REG_SHARED) || + (full_type(instr->cat1.src_type) != TYPE_F32 && + full_type(instr->cat1.dst_type) != TYPE_F32)); + default: + return (!is_alu(instr) && !is_sfu(instr)) || + !(instr->dsts[0]->flags & IR3_REG_SHARED); + } +} + +/* Ensure that this source is never spilled while reloading other sources. + */ +static void +mark_src(struct ra_ctx *ctx, struct ir3_register *src) +{ + if (!(src->flags & IR3_REG_SHARED)) + return; + + struct ra_interval *interval = &ctx->intervals[src->def->name]; + + if (interval->interval.inserted) { + while (interval->interval.parent) + interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); + + interval->src = true; + } +} + +static void +ensure_src_live(struct ra_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *src) +{ + if (!(src->flags & IR3_REG_SHARED)) + return; + + struct ra_interval *interval = &ctx->intervals[src->def->name]; + + if (!interval->interval.inserted) { + /* In some cases we cannot demote shared reg sources to non-shared regs, + * then we have to reload it. + */ + assert(interval->spill_def); + if (!can_demote_src(instr)) { + reload_src(ctx, instr, src); + } else { + if (instr->opc == OPC_META_PARALLEL_COPY) { + /* Stash away the original def to use later in case we actually have + * to insert a reload. + */ + _mesa_hash_table_insert(ctx->pcopy_src_map, src, src->def); + } + src->def = interval->spill_def; + src->flags &= ~IR3_REG_SHARED; + } + } +} + +static void +assign_src(struct ra_ctx *ctx, struct ir3_register *src) +{ + if (!(src->flags & IR3_REG_SHARED)) + return; + + struct ra_interval *interval = &ctx->intervals[src->def->name]; + assert(interval->interval.inserted); + src->num = ra_physreg_to_num(ra_interval_get_physreg(interval), src->flags); + + if ((src->flags & IR3_REG_FIRST_KILL) && + !interval->interval.parent && + rb_tree_is_empty(&interval->interval.children)) + ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); + + while (interval->interval.parent) + interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); + + interval->src = false; +} + +static void +handle_dst(struct ra_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *dst) +{ + if (!(dst->flags & IR3_REG_SHARED)) + return; + + struct ra_interval *interval = &ctx->intervals[dst->name]; + ra_interval_init(interval, dst); + interval->spill_def = NULL; + + if (dst->tied) { + struct ir3_register *tied_def = dst->tied->def; + struct ra_interval *tied_interval = &ctx->intervals[tied_def->name]; + if ((dst->tied->flags & IR3_REG_KILL) && + !tied_interval->interval.parent && + rb_tree_is_empty(&tied_interval->interval.children)) { + dst->num = dst->tied->num; + interval->physreg_start = tied_interval->physreg_start; + interval->physreg_end = tied_interval->physreg_end; + ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); + return; + } + } + + physreg_t physreg = get_reg(ctx, dst, false); + if (physreg == (physreg_t) ~0u) { + if (try_demote_instruction(ctx, instr)) + return; + + unsigned size = reg_size(dst); + physreg = find_best_spill_reg(ctx, dst, size, reg_elem_size(dst)); + assert(physreg != (physreg_t)~0u); + free_space(ctx, physreg, size); + } + + interval->physreg_start = physreg; + interval->physreg_end = physreg + reg_size(dst); + dst->num = ra_physreg_to_num(physreg, dst->flags); + ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); + d("insert dst %u physreg %u", dst->name, physreg); + + if (dst->tied) { + struct ir3_instruction *mov = ir3_instr_create(instr->block, OPC_META_PARALLEL_COPY, 1, 1); + unsigned flags = IR3_REG_SHARED | (dst->flags & IR3_REG_HALF); + ir3_dst_create(mov, dst->num, flags)->wrmask = dst->wrmask; + ir3_src_create(mov, dst->tied->num, flags)->wrmask = dst->wrmask; + mov->cat1.src_type = mov->cat1.dst_type = + (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;; + ir3_instr_move_before(mov, instr); + dst->tied->num = dst->num; + } +} + +static void +handle_src_late(struct ra_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *src) +{ + if (!(src->flags & IR3_REG_SHARED)) + return; + + struct ra_interval *interval = &ctx->intervals[src->def->name]; + reload_src_finalize(ctx, instr, src); + + /* Remove killed sources that have to be killed late due to being merged with + * other defs. + */ + if (!(src->flags & IR3_REG_KILL)) + return; + + if (interval->interval.inserted) + ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); +} + +static void +handle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) +{ + ra_foreach_src (src, instr) + mark_src(ctx, src); + + ra_foreach_src (src, instr) + ensure_src_live(ctx, instr, src); + + ra_foreach_src_rev (src, instr) + assign_src(ctx, src); + + ra_foreach_dst (dst, instr) + handle_dst(ctx, instr, dst); + + ra_foreach_src (src, instr) + handle_src_late(ctx, instr, src); +} + +static void +handle_split(struct ra_ctx *ctx, struct ir3_instruction *split) +{ + struct ir3_register *src = split->srcs[0]; + struct ir3_register *dst = split->dsts[0]; + + if (!(dst->flags & IR3_REG_SHARED)) + return; + + if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) { + handle_normal_instr(ctx, split); + return; + } + + struct ra_interval *src_interval = &ctx->intervals[src->def->name]; + struct ra_interval *dst_interval = &ctx->intervals[dst->name]; + + ra_interval_init(dst_interval, dst); + dst_interval->spill_def = NULL; + + if (src_interval->spill_def) { + struct ir3_instruction *spill_split = + ir3_instr_create(split->block, OPC_META_SPLIT, 1, 1); + struct ir3_register *dst = __ssa_dst(spill_split); + ir3_src_create(spill_split, INVALID_REG, IR3_REG_SSA)->def = + src_interval->spill_def; + spill_split->split.off = split->split.off; + ir3_instr_move_after(spill_split, split); + dst_interval->spill_def = dst; + return; + } + + dst_interval->physreg_start = + src_interval->physreg_start + dst->merge_set_offset - + src->def->merge_set_offset; + dst_interval->physreg_end = dst_interval->physreg_start + reg_size(dst); + ir3_reg_interval_insert(&ctx->reg_ctx, &dst_interval->interval); + src->num = ra_interval_get_num(src_interval); + dst->num = ra_interval_get_num(dst_interval); + d("insert dst %u physreg %u", dst->name, dst_interval->physreg_start); + + if (src->flags & IR3_REG_KILL) + ir3_reg_interval_remove(&ctx->reg_ctx, &src_interval->interval); +} + +static void +handle_phi(struct ra_ctx *ctx, struct ir3_instruction *phi) +{ + struct ir3_register *dst = phi->dsts[0]; + + if (!(dst->flags & IR3_REG_SHARED)) + return; + + struct ra_interval *dst_interval = &ctx->intervals[dst->name]; + ra_interval_init(dst_interval, dst); + + /* In some rare cases, it's possible to have a phi node with a physical-only + * source. Here's a contrived example: + * + * loop { + * if non-uniform { + * if uniform { + * x_1 = ...; + * continue; + * } + * x_2 = ...; + * } else { + * break; + * } + * // continue block + * x_3 = phi(x_1, x_2) + * } + * + * Assuming x_1 and x_2 are uniform, x_3 will also be uniform, because all + * threads that stay in the loop take the same branch to the continue block, + * however execution may fall through from the assignment to x_2 to the + * break statement because the outer if is non-uniform, and then it will fall + * through again to the continue block, so if x_3 is to be in a shared reg + * then the phi needs an extra source pointing to the break statement, which + * itself needs a phi node: + * + * loop { + * if non-uniform { + * if uniform { + * x_1 = ...; + * continue; + * } + * x_2 = ...; + * } else { + * x_4 = phi(undef, x_2) + * break; + * } + * // continue block + * x_3 = phi(x_1, x_2, x_4) + * } + */ + + /* phi nodes are special because we cannot spill them normally, instead we + * have to spill the parallel copies that their sources point to and make the + * entire phi not shared anymore. + */ + + physreg_t physreg = get_reg(ctx, dst, false); + if (physreg == (physreg_t) ~0u) { + d("spilling phi destination"); + dst->flags &= ~IR3_REG_SHARED; + dst_interval->spill_def = dst; + phi->flags |= IR3_INSTR_SHARED_SPILL; + + foreach_src (src, phi) { + src->flags &= ~IR3_REG_SHARED; + if (src->def) + src->def->flags &= ~IR3_REG_SHARED; + } + + return; + } + + dst->num = ra_physreg_to_num(physreg, dst->flags); + dst_interval->spill_def = NULL; + dst_interval->physreg_start = physreg; + dst_interval->physreg_end = physreg + reg_size(dst); + ir3_reg_interval_insert(&ctx->reg_ctx, &dst_interval->interval); + + ra_foreach_src_n (src, i, phi) { + /* We assume that any phis with non-logical sources aren't promoted. */ + assert(i < phi->block->predecessors_count); + src->num = dst->num; + src->def->num = dst->num; + } +} + +static void +handle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *pcopy) +{ + /* For parallel copies, we only handle the source. The destination is handled + * later when processing phi nodes. + */ + + ra_foreach_src (src, pcopy) + mark_src(ctx, src); + + ra_foreach_src (src, pcopy) + ensure_src_live(ctx, pcopy, src); + + ra_foreach_src_rev (src, pcopy) + assign_src(ctx, src); + + ra_foreach_src (src, pcopy) + handle_src_late(ctx, pcopy, src); +} + +static void +handle_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) +{ + instr->flags &= ~IR3_INSTR_SHARED_SPILL; + + switch (instr->opc) { + case OPC_META_SPLIT: + handle_split(ctx, instr); + break; + case OPC_META_PHI: + handle_phi(ctx, instr); + break; + case OPC_META_PARALLEL_COPY: + handle_pcopy(ctx, instr); + break; + default: + handle_normal_instr(ctx, instr); + } +} + +/* In case we define a value outside a loop, use it inside the loop, then spill + * it afterwards inside the same loop, we could lose the value so we have to + * reload it. We have to reload it after any parallel copy instruction, when the + * live shared registers equal the live-in of the backedge. lower_pcopy() will + * then move any non-shared parallel copies down past the reload. + */ +static void +reload_live_outs(struct ra_ctx *ctx, struct ir3_block *block) +{ + struct ra_block_state *state = &ctx->blocks[block->index]; + unsigned name; + BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) { + struct ir3_register *reg = ctx->live->definitions[name]; + + struct ra_interval *interval = &ctx->intervals[name]; + if (!interval->interval.inserted) { + d("reloading %d at end of backedge", reg->name); + reload_interval(ctx, NULL, block, interval); + } + } +} + +static void +record_pred_live_out(struct ra_ctx *ctx, + struct ra_interval *interval, + struct ir3_block *pred) +{ + struct ra_block_state *state = &ctx->blocks[pred->index]; + + struct ir3_register *def = interval->interval.reg; + BITSET_SET(state->live_out, def->name); + + rb_tree_foreach (struct ra_interval, child, + &interval->interval.children, interval.node) { + record_pred_live_out(ctx, child, pred); + } +} + +static void +record_pred_live_outs(struct ra_ctx *ctx, struct ir3_block *block) +{ + for (unsigned i = 0; i < block->predecessors_count; i++) { + struct ir3_block *pred = block->predecessors[i]; + struct ra_block_state *state = &ctx->blocks[pred->index]; + if (state->visited) + continue; + + state->live_out = rzalloc_array(NULL, BITSET_WORD, + BITSET_WORDS(ctx->live->definitions_count)); + + + rb_tree_foreach (struct ra_interval, interval, + &ctx->reg_ctx.intervals, interval.node) { + record_pred_live_out(ctx, interval, pred); + } + } +} + +static void +handle_block(struct ra_ctx *ctx, struct ir3_block *block) +{ + ra_ctx_reset_block(ctx); + + unsigned name; + BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], + ctx->live->definitions_count) { + struct ir3_register *def = ctx->live->definitions[name]; + struct ra_interval *interval = &ctx->intervals[name]; + + /* Non-shared definitions may still be definitions we spilled by demoting + * them, so we still need to initialize the interval. But we shouldn't + * make these intervals live. + */ + ra_interval_init(interval, def); + + if ((def->flags & IR3_REG_SHARED) && !interval->spill_def) { + ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); + } + } + + if (RA_DEBUG) { + d("after live-in block %u:\n", block->index); + ra_ctx_dump(ctx); + } + + if (block->predecessors_count > 1) + record_pred_live_outs(ctx, block); + + foreach_instr (instr, &block->instr_list) { + di(instr, "processing"); + + handle_instr(ctx, instr); + + if (RA_DEBUG) + ra_ctx_dump(ctx); + } + + if (block->successors[0]) { + struct ra_block_state *state = &ctx->blocks[block->successors[0]->index]; + + if (state->visited) { + assert(!block->successors[1]); + + reload_live_outs(ctx, block); + } + } + + ctx->blocks[block->index].visited = true; +} + +static void +lower_pcopy(struct ir3 *ir, struct ra_ctx *ctx) +{ + foreach_block (block, &ir->block_list) { + foreach_instr_safe (instr, &block->instr_list) { + /* At this point due to spilling there may be parallel copies from + * shared to non-shared registers and vice versa. Lowering these after + * RA may produce cycles involving shared and non-shared registers, + * which would need to be resolved by swapping a shared and non-shared + * register which is something we can't handle. However by lowering + * these to moves now, we can make sure that cycles only involve + * non-shared registers. To avoid illegally moving a shared register + * read or write across the parallel copy, which may have other + * conflicting reads/writes if there's a cycle, we need to move copies + * from non-shared to shared below the shared copies, and we need to + * move copies from shared to non-shared above them. So, we have the + * following order: + * + * 1. shared->non-shared copies (spills) + * 2. shared->shared copies (one parallel copy as there may be cycles) + * 3. non-shared->shared copies (reloads) + * 4. non-shared->non-shared copies + * + * We split out the non-shared->non-shared copies as a separate step. + */ + if (instr->opc == OPC_META_PARALLEL_COPY) { + for (unsigned i = 0; i < instr->srcs_count; i++) { + if ((instr->srcs[i]->flags & IR3_REG_SHARED) && + !(instr->dsts[i]->flags & IR3_REG_SHARED)) { + /* shared->non-shared. Create a spill move and rewrite the + * source to be the destination of the move (so that the + * original shared->non-shared copy becomes a + * non-shared->non-shared copy). + */ + struct ir3_instruction *mov = + ir3_instr_create(block, OPC_MOV, 1, 1); + mov->flags |= IR3_INSTR_SHARED_SPILL; + struct ir3_register *dst = + ir3_dst_create(mov, INVALID_REG, instr->dsts[i]->flags); + dst->wrmask = instr->dsts[i]->wrmask; + dst->instr = mov; + mov->repeat = reg_elems(mov->dsts[0]) - 1; + struct ir3_register *src = + ir3_src_create(mov, instr->srcs[i]->num, + instr->srcs[i]->flags | + (mov->repeat ? IR3_REG_R : 0)); + src->wrmask = instr->srcs[i]->wrmask; + mov->cat1.dst_type = mov->cat1.src_type = + (mov->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; + instr->srcs[i]->flags = mov->dsts[0]->flags; + instr->srcs[i]->def = mov->dsts[0]; + ir3_instr_move_before(mov, instr); + } + } + + for (unsigned i = 0; i < instr->dsts_count;) { + if ((instr->dsts[i]->flags & IR3_REG_SHARED) && + (instr->srcs[i]->flags & IR3_REG_SSA) && + !(instr->srcs[i]->flags & IR3_REG_SHARED)) { + /* non-shared->shared. Create a reload move. + */ + struct ir3_instruction *mov = + ir3_instr_create(block, OPC_MOV, 1, 1); + mov->flags |= IR3_INSTR_SHARED_SPILL; + struct ir3_register *dst = + ir3_dst_create(mov, instr->dsts[i]->num, + instr->dsts[i]->flags); + dst->instr = mov; + dst->wrmask = instr->dsts[i]->wrmask; + mov->repeat = reg_elems(mov->dsts[0]) - 1; + struct ir3_register *src = + ir3_src_create(mov, INVALID_REG, instr->srcs[i]->flags | + (mov->repeat ? IR3_REG_R : 0)); + src->def = instr->srcs[i]->def; + src->wrmask = instr->srcs[i]->wrmask; + mov->cat1.dst_type = mov->cat1.src_type = + (mov->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; + + /* When we spill a parallel copy source, we lose the + * information of where it originally points to since we make + * it point to the spill def. If we later decide not to also + * spill the phi associated with it, we have to restore it + * here using the stashed original source so that RA + * validation can check that we did the correct thing. + * + * Because SSA-ness goes away after validation, this is really + * just about validation. + */ + struct ir3_block *succ = block->successors[0]; + unsigned pred_idx = ir3_block_get_pred_index(succ, block); + foreach_instr (phi, &succ->instr_list) { + if (phi->opc != OPC_META_PHI) + break; + + if (phi->srcs[pred_idx]->def == instr->dsts[i]) { + struct ir3_register *def = + _mesa_hash_table_search(ctx->pcopy_src_map, + instr->srcs[i])->data; + phi->srcs[pred_idx]->def = def; + break; + } + } + + instr->srcs[i] = instr->srcs[instr->srcs_count - 1]; + instr->dsts[i] = instr->dsts[instr->dsts_count - 1]; + instr->srcs_count--; + instr->dsts_count--; + ir3_instr_move_after(mov, instr); + continue; + } + + i++; + } + + /* Move any non-shared copies to a separate parallel copy + * instruction right at the end of the block, after any reloads. At + * this point all copies should be {shared,immediate}->shared or + * {non-shared,immediate}->non-shared. + */ + unsigned non_shared_copies = 0; + for (unsigned i = 0; i < instr->dsts_count; i++) { + if (!(instr->dsts[i]->flags & IR3_REG_SHARED)) + non_shared_copies++; + } + + if (non_shared_copies != 0) { + struct ir3_instruction *pcopy = + ir3_instr_create(block, OPC_META_PARALLEL_COPY, + non_shared_copies, non_shared_copies); + + unsigned j = 0; + for (unsigned i = 0; i < instr->dsts_count;) { + if (!(instr->dsts[i]->flags & IR3_REG_SHARED)) { + pcopy->dsts[j] = instr->dsts[i]; + pcopy->srcs[j] = instr->srcs[i]; + pcopy->dsts[j]->instr = pcopy; + instr->srcs[i] = instr->srcs[instr->srcs_count - 1]; + instr->dsts[i] = instr->dsts[instr->dsts_count - 1]; + instr->srcs_count--; + instr->dsts_count--; + j++; + continue; + } + i++; + } + + pcopy->srcs_count = pcopy->dsts_count = j; + if (instr->dsts_count == 0) + list_del(&instr->node); + } + } + } + } +} + +static void +finalize(struct ir3 *ir) +{ + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + for (unsigned i = 0; i < instr->dsts_count; i++) { + if (instr->dsts[i]->flags & IR3_REG_SHARED) { + instr->dsts[i]->flags &= ~IR3_REG_SSA; + } + } + + for (unsigned i = 0; i < instr->srcs_count; i++) { + if (instr->srcs[i]->flags & IR3_REG_SHARED) { + instr->srcs[i]->flags &= ~IR3_REG_SSA; + instr->srcs[i]->def = NULL; + } + } + } + } +} + +void +ir3_ra_shared(struct ir3_shader_variant *v, struct ir3_liveness *live) +{ + struct ra_ctx ctx; + + ra_ctx_init(&ctx); + ctx.intervals = rzalloc_array(NULL, struct ra_interval, + live->definitions_count); + ctx.blocks = rzalloc_array(NULL, struct ra_block_state, + live->block_count); + ctx.start = 0; + ctx.live = live; + ctx.pcopy_src_map = _mesa_pointer_hash_table_create(NULL); + + foreach_block (block, &v->ir->block_list) { + handle_block(&ctx, block); + } + + lower_pcopy(v->ir, &ctx); + + for (unsigned i = 0; i < live->block_count; i++) { + if (ctx.blocks[i].live_out) + ralloc_free(ctx.blocks[i].live_out); + } + + ralloc_free(ctx.intervals); + ralloc_free(ctx.pcopy_src_map); + ralloc_free(ctx.blocks); + + ir3_ra_validate(v, RA_FULL_SIZE, RA_HALF_SIZE, live->block_count, true); + finalize(v->ir); +} + diff --git a/src/freedreno/ir3/ir3_spill.c b/src/freedreno/ir3/ir3_spill.c index 475c132f6fa..0ea80bca337 100644 --- a/src/freedreno/ir3/ir3_spill.c +++ b/src/freedreno/ir3/ir3_spill.c @@ -1193,20 +1193,23 @@ is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n) static void handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) { - foreach_dst (dst, pcopy) { + ra_foreach_dst (dst, pcopy) { struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; ra_spill_interval_init(dst_interval, dst); } foreach_src_n (src, i, pcopy) { - d("processing src %u", i); struct ir3_register *dst = pcopy->dsts[i]; + if (!(dst->flags & IR3_REG_SSA)) + continue; + + d("processing src %u", i); /* Skip the intermediate copy for cases where the source is merged with * the destination. Crucially this means that we also don't reload/spill * it if it's been spilled, because it shares the same spill slot. */ - if (src->def && src->def->merge_set && + if ((src->flags & IR3_REG_SSA) && src->def->merge_set && src->def->merge_set == dst->merge_set && src->def->merge_set_offset == dst->merge_set_offset) { struct ra_spill_interval *src_interval = ctx->intervals[src->def->name]; @@ -1221,7 +1224,7 @@ handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) dst_interval->cant_spill = false; dst_interval->dst = src_interval->dst; } - } else if (src->def) { + } else if (src->flags & IR3_REG_SSA) { struct ra_spill_interval *temp_interval = create_temp_interval(ctx, dst); struct ir3_register *temp = temp_interval->interval.reg; @@ -1251,15 +1254,17 @@ handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) foreach_src_n (src, i, pcopy) { struct ir3_register *dst = pcopy->dsts[i]; + if (!(dst->flags & IR3_REG_SSA)) + continue; - if (src->def && src->def->merge_set && + if ((src->flags & IR3_REG_SSA) && src->def->merge_set && src->def->merge_set == dst->merge_set && src->def->merge_set_offset == dst->merge_set_offset) continue; struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; - if (!src->def) { + if (!(src->flags & IR3_REG_SSA)) { dst_interval->cant_spill = true; ra_spill_ctx_insert(ctx, dst_interval); limit(ctx, pcopy); @@ -1292,6 +1297,9 @@ handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) static void handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) { + if (!(instr->dsts[0]->flags & IR3_REG_SSA)) + return; + init_dst(ctx, instr->dsts[0]); insert_dst(ctx, instr->dsts[0]); finish_dst(ctx, instr->dsts[0]); @@ -1300,6 +1308,9 @@ handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) static void remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) { + if (!(instr->dsts[0]->flags & IR3_REG_SSA)) + return; + if (instr->opc == OPC_META_TEX_PREFETCH) { ra_foreach_src (src, instr) remove_src(ctx, instr, src); @@ -1623,6 +1634,9 @@ static void rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi, struct ir3_block *block) { + if (!(phi->dsts[0]->flags & IR3_REG_SSA)) + return; + if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) { phi->flags |= IR3_INSTR_UNUSED; return; @@ -1977,8 +1991,25 @@ cleanup_dead(struct ir3 *ir) { foreach_block (block, &ir->block_list) { foreach_instr_safe (instr, &block->instr_list) { - if (instr->flags & IR3_INSTR_UNUSED) - list_delinit(&instr->node); + if (instr->flags & IR3_INSTR_UNUSED) { + if (instr->opc == OPC_META_PARALLEL_COPY) { + /* There may be non-SSA shared copies, we need to preserve these. + */ + for (unsigned i = 0; i < instr->dsts_count;) { + if (instr->dsts[i]->flags & IR3_REG_SSA) { + instr->dsts[i] = instr->dsts[--instr->dsts_count]; + instr->srcs[i] = instr->srcs[--instr->srcs_count]; + } else { + i++; + } + } + + if (instr->dsts_count == 0) + list_delinit(&instr->node); + } else { + list_delinit(&instr->node); + } + } } } } diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 1c74d711e0c..2b45559cab8 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -84,6 +84,9 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr, validate_assert(ctx, src->wrmask == reg->wrmask); validate_assert(ctx, reg_class_flags(src) == reg_class_flags(reg)); + if (src->flags & IR3_REG_CONST) + validate_assert(ctx, !(src->flags & IR3_REG_SHARED)); + if (reg->tied) { validate_assert(ctx, reg->tied->tied == reg); bool found = false; diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 954b0b88fdb..e7ba1d8fe5f 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -112,6 +112,7 @@ libfreedreno_ir3_files = files( 'ir3_sched.c', 'ir3_shader.c', 'ir3_shader.h', + 'ir3_shared_ra.c', 'ir3_spill.c', 'ir3_validate.c', )