From dcfbb60392fce468eb2bc2513c3b1ba5dfc1e62e Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 16 Aug 2022 14:38:59 +0200 Subject: [PATCH] ir3/spill: Fix extracting from a vector at the end of a block If this happens then "after" is NULL, so we can't use it to get the block, and the instruction is never moved at the end so we have to create the split instructions before creating the collect to make sure they are in the right order. This happens when reloading a complex vector value that has been coalesced at the end of a basic block, which apparently hasn't happened until a gfxbench5 shader on zink hit this case. This fixes it. Closes: #7054 Fixes: 613eaac7b53 ("ir3: Initial support for spilling non-shared registers") Part-of: --- src/freedreno/ir3/ir3_spill.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/freedreno/ir3/ir3_spill.c b/src/freedreno/ir3/ir3_spill.c index 84eef9446c9..b9b1565fe92 100644 --- a/src/freedreno/ir3/ir3_spill.c +++ b/src/freedreno/ir3/ir3_spill.c @@ -854,7 +854,7 @@ split(struct ir3_register *def, unsigned offset, assert(!(def->flags & IR3_REG_ARRAY)); assert(def->merge_set); struct ir3_instruction *split = - ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1); + ir3_instr_create(block, OPC_META_SPLIT, 1, 1); struct ir3_register *dst = __ssa_dst(split); dst->flags |= def->flags & IR3_REG_HALF; struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags); @@ -874,16 +874,20 @@ extract(struct ir3_register *parent_def, unsigned offset, unsigned elems, if (offset == 0 && elems == reg_elems(parent_def)) return parent_def; + struct ir3_register *srcs[elems]; + for (unsigned i = 0; i < elems; i++) { + srcs[i] = split(parent_def, offset + i, after, block); + } + struct ir3_instruction *collect = - ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems); + ir3_instr_create(block, OPC_META_COLLECT, 1, elems); struct ir3_register *dst = __ssa_dst(collect); dst->flags |= parent_def->flags & IR3_REG_HALF; dst->wrmask = MASK(elems); add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset); for (unsigned i = 0; i < elems; i++) { - ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = - split(parent_def, offset + i, after, block); + ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = srcs[i]; } if (after)