diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 0a1d937ad7d..c52f60f9a93 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -660,6 +660,8 @@ struct ir3_block { uint16_t start_ip, end_ip; + bool reconvergence_point; + /* Track instructions which do not write a register but other- * wise must not be discarded (such as kill, stg, etc) */ @@ -1927,10 +1929,12 @@ soft_sy_delay(struct ir3_instruction *instr, struct ir3 *shader) } } - /* unreachable block elimination: */ bool ir3_remove_unreachable(struct ir3 *ir); +/* calculate reconvergence information: */ +void ir3_calc_reconvergence(struct ir3_shader_variant *so); + /* dead code elimination: */ struct ir3_shader_variant; bool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index f22506989b1..dc8e5bb2a9f 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2537,14 +2537,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } case nir_intrinsic_elect: dst[0] = ir3_ELECT_MACRO(ctx->block); - /* This may expand to a divergent if/then, so allocate stack space for - * it. - */ - ctx->max_stack = MAX2(ctx->max_stack, ctx->stack + 1); break; case nir_intrinsic_preamble_start_ir3: dst[0] = ir3_SHPS_MACRO(ctx->block); - ctx->max_stack = MAX2(ctx->max_stack, ctx->stack + 1); break; case nir_intrinsic_read_invocation_cond_ir3: { @@ -2555,7 +2550,6 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) dst[0]->dsts[0]->flags |= IR3_REG_SHARED; dst[0]->srcs[0]->num = regid(REG_P0, 0); array_insert(ctx->ir, ctx->ir->predicates, dst[0]); - ctx->max_stack = MAX2(ctx->max_stack, ctx->stack + 1); break; } @@ -2563,7 +2557,6 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0]; dst[0] = ir3_READ_FIRST_MACRO(ctx->block, src, 0); dst[0]->dsts[0]->flags |= IR3_REG_SHARED; - ctx->max_stack = MAX2(ctx->max_stack, ctx->stack + 1); break; } @@ -2579,7 +2572,6 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) ballot = ir3_BALLOT_MACRO(ctx->block, pred, components); ballot->srcs[0]->num = regid(REG_P0, 0); array_insert(ctx->ir, ctx->ir->predicates, ballot); - ctx->max_stack = MAX2(ctx->max_stack, ctx->stack + 1); } ballot->barrier_class = IR3_BARRIER_ACTIVE_FIBERS_R; @@ -3747,20 +3739,6 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop) ctx->loop_id = old_loop_id; } -static void -stack_push(struct ir3_context *ctx) -{ - ctx->stack++; - ctx->max_stack = MAX2(ctx->max_stack, ctx->stack); -} - -static void -stack_pop(struct ir3_context *ctx) -{ - compile_assert(ctx, ctx->stack > 0); - ctx->stack--; -} - static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list) { @@ -3770,14 +3748,10 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list) emit_block(ctx, nir_cf_node_as_block(node)); break; case nir_cf_node_if: - stack_push(ctx); emit_if(ctx, nir_cf_node_as_if(node)); - stack_pop(ctx); break; case nir_cf_node_loop: - stack_push(ctx); emit_loop(ctx, nir_cf_node_as_loop(node)); - stack_pop(ctx); break; case nir_cf_node_function: ir3_context_error(ctx, "TODO\n"); @@ -3924,13 +3898,9 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl) { nir_metadata_require(impl, nir_metadata_block_index); - compile_assert(ctx, ctx->stack == 0); - emit_cf_list(ctx, &impl->body); emit_block(ctx, impl->end_block); - compile_assert(ctx, ctx->stack == 0); - /* at this point, we should have a single empty block, * into which we emit the 'end' instruction. */ @@ -4687,8 +4657,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, goto out; } - so->branchstack = ctx->max_stack; - ir = so->ir = ctx->ir; if (gl_shader_stage_is_compute(so->type)) { @@ -4879,6 +4847,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, IR3_PASS(ir, ir3_array_to_ssa); + ir3_calc_reconvergence(so); + do { progress = false; diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 97a845d8c63..322b5de9de3 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -107,11 +107,6 @@ struct ir3_context { unsigned num_arrays; - /* Tracking for max level of flowcontrol (branchstack) needed - * by a5xx+: - */ - unsigned stack, max_stack; - unsigned loop_id; unsigned loop_depth; diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 958a08d23a9..81c1536a6ea 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -523,44 +523,6 @@ remove_unused_block(struct ir3_block *old_target) { list_delinit(&old_target->node); - /* If there are any physical predecessors due to fallthroughs, then they may - * fall through to any of the physical successors of this block. But we can - * only fit two, so just pick the "earliest" one, i.e. the fallthrough if - * possible. - * - * TODO: we really ought to have unlimited numbers of physical successors, - * both because of this and because we currently don't model some scenarios - * with nested break/continue correctly. - */ - struct ir3_block *new_target; - if (old_target->physical_successors[1] && - old_target->physical_successors[1]->start_ip < - old_target->physical_successors[0]->start_ip) { - new_target = old_target->physical_successors[1]; - } else { - new_target = old_target->physical_successors[0]; - } - - for (unsigned i = 0; i < old_target->physical_predecessors_count; i++) { - struct ir3_block *pred = old_target->physical_predecessors[i]; - if (pred->physical_successors[0] == old_target) { - if (!new_target) { - /* If we remove a physical successor, make sure the only physical - * successor is the first one. - */ - pred->physical_successors[0] = pred->physical_successors[1]; - pred->physical_successors[1] = NULL; - } else { - pred->physical_successors[0] = new_target; - } - } else { - assert(pred->physical_successors[1] == old_target); - pred->physical_successors[1] = new_target; - } - if (new_target) - ir3_block_add_physical_predecessor(new_target, pred); - } - /* cleanup dangling predecessors: */ for (unsigned i = 0; i < ARRAY_SIZE(old_target->successors); i++) { if (old_target->successors[i]) { @@ -568,13 +530,6 @@ remove_unused_block(struct ir3_block *old_target) ir3_block_remove_predecessor(succ, old_target); } } - - for (unsigned i = 0; i < ARRAY_SIZE(old_target->physical_successors); i++) { - if (old_target->physical_successors[i]) { - struct ir3_block *succ = old_target->physical_successors[i]; - ir3_block_remove_physical_predecessor(succ, old_target); - } - } } static bool @@ -591,21 +546,16 @@ retarget_jump(struct ir3_instruction *instr, struct ir3_block *new_target) cur_block->successors[1] = new_target; } - /* also update physical_successors: */ - if (cur_block->physical_successors[0] == old_target) { - cur_block->physical_successors[0] = new_target; - } else { - assert(cur_block->physical_successors[1] == old_target); - cur_block->physical_successors[1] = new_target; - } - /* update new target's predecessors: */ ir3_block_add_predecessor(new_target, cur_block); - ir3_block_add_physical_predecessor(new_target, cur_block); /* and remove old_target's predecessor: */ ir3_block_remove_predecessor(old_target, cur_block); - ir3_block_remove_physical_predecessor(old_target, cur_block); + + /* If we reconverged at the old target, we'll reconverge at the new target + * too: + */ + new_target->reconvergence_point |= old_target->reconvergence_point; instr->cat0.target = new_target; @@ -627,6 +577,12 @@ opt_jump(struct ir3 *ir) block->index = index++; foreach_block (block, &ir->block_list) { + /* This pass destroys the physical CFG so don't keep it around to avoid + * validation errors. + */ + block->physical_successors[0] = block->physical_successors[1] = NULL; + block->physical_predecessors_count = 0; + foreach_instr (instr, &block->instr_list) { if (!is_flow(instr) || !instr->cat0.target) continue; @@ -707,51 +663,18 @@ mark_jp(struct ir3_block *block) target->flags |= IR3_INSTR_JP; } -/* Mark points where control flow converges or diverges. +/* Mark points where control flow reconverges. * - * Divergence points could actually be re-convergence points where - * "parked" threads are recoverged with threads that took the opposite - * path last time around. Possibly it is easier to think of (jp) as - * "the execution mask might have changed". + * Re-convergence points are where "parked" threads are reconverged with threads + * that took the opposite path last time around. We already calculated them, we + * just need to mark them with (jp). */ static void mark_xvergence_points(struct ir3 *ir) { foreach_block (block, &ir->block_list) { - /* We need to insert (jp) if an entry in the "branch stack" is created for - * our block. This happens if there is a predecessor to our block that may - * fallthrough to an earlier block in the physical CFG, either because it - * ends in a non-uniform conditional branch or because there's a - * fallthrough for an block in-between that also starts with (jp) and was - * pushed on the branch stack already. - */ - for (unsigned i = 0; i < block->predecessors_count; i++) { - struct ir3_block *pred = block->predecessors[i]; - - for (unsigned j = 0; j < ARRAY_SIZE(pred->physical_successors); j++) { - if (pred->physical_successors[j] != NULL && - pred->physical_successors[j]->start_ip < block->start_ip) - mark_jp(block); - - /* If the predecessor just falls through to this block, we still - * need to check if it "falls through" by jumping to the block. This - * can happen if opt_jump fails and the block ends in two branches, - * or if there's an empty if-statement (which currently can happen - * with binning shaders after dead-code elimination) and the block - * before ends with a conditional branch directly to this block. - */ - if (pred->physical_successors[j] == block) { - foreach_instr_rev (instr, &pred->instr_list) { - if (!is_flow(instr)) - break; - if (instr->cat0.target == block) { - mark_jp(block); - break; - } - } - } - } - } + if (block->reconvergence_point) + mark_jp(block); } } diff --git a/src/freedreno/ir3/ir3_lower_subgroups.c b/src/freedreno/ir3/ir3_lower_subgroups.c index afc88a1e9ad..6292b673fbd 100644 --- a/src/freedreno/ir3/ir3_lower_subgroups.c +++ b/src/freedreno/ir3/ir3_lower_subgroups.c @@ -264,6 +264,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in * exclusive = reduce; * inclusive = src OP exclusive; * reduce = inclusive; + * break; * } * footer: * } @@ -280,6 +281,9 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in struct ir3_block *footer = ir3_block_create(ir); list_add(&footer->node, &exit->node); + footer->reconvergence_point = true; + + after_block->reconvergence_point = true; link_blocks(before_block, header, 0); @@ -312,6 +316,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in before_block->brtype = IR3_BRANCH_GETONE; before_block->condition = NULL; mov_immed(instr->dsts[0], then_block, 0); + after_block->reconvergence_point = true; before_block = after_block; after_block = split_block(ir, before_block, instr); then_block = create_if(ir, before_block, after_block); @@ -333,6 +338,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in case OPC_BALLOT_MACRO: case OPC_READ_COND_MACRO: before_block->brtype = IR3_BRANCH_COND; + after_block->reconvergence_point = true; break; case OPC_ANY_MACRO: before_block->brtype = IR3_BRANCH_ANY; @@ -344,6 +350,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in case OPC_READ_FIRST_MACRO: case OPC_SWZ_SHARED_MACRO: before_block->brtype = IR3_BRANCH_GETONE; + after_block->reconvergence_point = true; break; default: unreachable("bad opcode"); diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index b1d87545118..8023439b7e5 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -489,7 +489,9 @@ print_block(struct ir3_block *block, int lvl) struct log_stream *stream = mesa_log_streami(); tab(stream, lvl); - mesa_log_stream_printf(stream, "block%u {\n", block_id(block)); + mesa_log_stream_printf(stream, "%sblock%u {\n", + block->reconvergence_point ? "(jp)" : "", + block_id(block)); if (block->predecessors_count > 0) { tab(stream, lvl + 1); diff --git a/src/freedreno/ir3/ir3_reconvergence.c b/src/freedreno/ir3/ir3_reconvergence.c new file mode 100644 index 00000000000..995dea8137c --- /dev/null +++ b/src/freedreno/ir3/ir3_reconvergence.c @@ -0,0 +1,300 @@ +/* + * Copyright (C) 2023 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* The pass uses information on which branches are divergent in order to + * determine which blocks are "reconvergence points" where parked threads may + * become reactivated as well as to add "physical" edges where the machine may + * fall through to the next reconvergence point. Reconvergence points need a + * (jp) added in the assembly, and physical edges are needed to model shared + * register liveness correctly. Reconvergence happens in the following two + * scenarios: + * + * 1. When there is a divergent branch, the later of the two block destinations + * becomes a reconvergence point. + * 2. When a forward edge crosses over a reconvergence point that may be + * outstanding at the start of the edge, we need to park the threads that + * take the edge and resume execution at the reconvergence point. This means + * that there is a physical edge from the start of the edge to the + * reconvergence point, and the destination of the edge becomes a new + * reconvergence point. + * + * For example, consider this simple if-else: + * + * bb0: + * ... + * br p0.x, #bb1, #bb2 + * bb1: + * ... + * jump bb3 + * bb2: + * ... + * jump bb3 + * bb3: + * ... + * + * The divergent branch at the end of bb0 makes bb2 a reconvergence point + * following (1), which starts being outstanding after the branch at the end of + * bb1. The jump to bb3 at the end of bb1 goes over bb2 while it is outstanding, + * so there is a physical edge from bb1 to bb2 and bb3 is a reconvergence point + * following (2). + * + * Note that (2) can apply recursively. To handle this efficiently we build an + * interval tree of forward edges that cross other blocks and whenever a block + * becomes a RP we iterate through the edges jumping across it using the tree. + * We also need to keep track of the range where each RP may be + * "outstanding." A RP becomes outstanding after a branch to it parks its + * threads there. This range may increase in size as we discover more and more + * branches to it that may park their threads there. + * + * Finally, we need to compute the branchstack value, which is the maximum + * number of outstanding reconvergence points. For the if-else, the branchstack + * is 2, because after the jump at the end of bb2 both reconvergence points are + * outstanding (although the first is removed immediately afterwards). Because + * we already computed the range where each RP is outstanding, this part is + * relatively straightforward. + */ + +#include + +#include "ir3_shader.h" + +#include "util/rb_tree.h" +#include "util/u_worklist.h" +#include "util/ralloc.h" + +struct logical_edge { + struct uinterval_node node; + struct ir3_block *start_block; + struct ir3_block *end_block; +}; + +struct block_data { + /* For a reconvergance point, the index of the first block where, upon + * exiting, the RP may be outstanding. Normally this is a predecessor but may + * be a loop header for loops. + */ + unsigned first_divergent_pred; + + /* The last processed first_divergent_pred. */ + unsigned first_processed_divergent_pred; + + /* The number of blocks that have this block as a first_divergent_pred. */ + unsigned divergence_count; +}; + +void +ir3_calc_reconvergence(struct ir3_shader_variant *so) +{ + void *mem_ctx = ralloc_context(NULL); + + /* It's important that the index we use corresponds to the final order blocks + * are emitted in! + */ + unsigned index = 0; + foreach_block (block, &so->ir->block_list) { + block->index = index++; + } + + /* Setup the tree of edges */ + unsigned edge_count = 0; + foreach_block (block, &so->ir->block_list) { + if (block->successors[0]) + edge_count++; + if (block->successors[1]) + edge_count++; + } + + struct rb_tree forward_edges, backward_edges; + rb_tree_init(&forward_edges); + rb_tree_init(&backward_edges); + + unsigned edge = 0; + struct logical_edge *edges = + ralloc_array(mem_ctx, struct logical_edge, edge_count); + struct block_data *blocks = + ralloc_array(mem_ctx, struct block_data, index); + foreach_block (block, &so->ir->block_list) { + blocks[block->index].divergence_count = 0; + blocks[block->index].first_divergent_pred = UINT_MAX; + blocks[block->index].first_processed_divergent_pred = UINT_MAX; + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { + if (block->successors[i]) { + if (block->successors[i]->index > block->index + 1) { + edges[edge] = (struct logical_edge) { + .node = { + .interval = { + block->index + 1, + block->successors[i]->index - 1 + }, + }, + .start_block = block, + .end_block = block->successors[i], + }; + + uinterval_tree_insert(&forward_edges, &edges[edge++].node); + } else if (block->successors[i]->index < block->index - 1) { + edges[edge] = (struct logical_edge) { + .node = { + .interval = { + block->successors[i]->index - 1, + block->index + 1 + }, + }, + .start_block = block->successors[i], + .end_block = block, + }; + + uinterval_tree_insert(&backward_edges, &edges[edge++].node); + } + } + } + } + + assert(edge <= edge_count); + + u_worklist worklist; + u_worklist_init(&worklist, index, mem_ctx); + + /* First, find and mark divergent branches. The later destination will be the + * reconvergence point. + */ + foreach_block (block, &so->ir->block_list) { + if (block->successors[0] && block->successors[1]) { + unsigned idx = block->successors[0]->index > + block->successors[1]->index ? 0 : 1; + block->successors[idx]->reconvergence_point = true; + blocks[block->successors[idx]->index].first_divergent_pred = + block->index; + u_worklist_push_tail(&worklist, block->successors[idx], index); + } + } + + while (!u_worklist_is_empty(&worklist)) { + struct ir3_block *block = + u_worklist_pop_head(&worklist, struct ir3_block, index); + assert(block->reconvergence_point); + + /* Iterate over all edges stepping over the block. */ + struct uinterval interval = { block->index, block->index }; + uinterval_tree_foreach (struct logical_edge, edge, interval, &forward_edges, + node) { + /* If "block" definitely isn't outstanding when the branch + * corresponding to "edge" is taken, then we don't need to park + * "edge->end_block" and we can ignore this. + * + * TODO: add uinterval_tree_foreach_from() and use that instead. + */ + if (edge->start_block->index <= blocks[block->index].first_divergent_pred) + continue; + + /* If we've already processed this edge + RP pair, don't process it + * again. Because edges are ordered by start point, we must have + * processed every edge after this too. + */ + if (edge->start_block->index > + blocks[block->index].first_processed_divergent_pred) + break; + + edge->end_block->reconvergence_point = true; + if (blocks[edge->end_block->index].first_divergent_pred > + edge->start_block->index) { + blocks[edge->end_block->index].first_divergent_pred = + edge->start_block->index; + u_worklist_push_tail(&worklist, edge->end_block, index); + } + + /* Backwards branches extend the range of divergence. For example, a + * divergent break creates a reconvergence point after the loop that + * stays outstanding throughout subsequent iterations, even at points + * before the break. This takes that into account. + * + * More precisely, a backwards edge that originates between the start + * and end of "edge" extends the divergence range to the beginning of + * its destination if it is taken, or alternatively to the end of the + * block before its destination. + * + * TODO: in case we ever start accepting weird non-structured control + * flow, we may also need to handle this above if a divergent branch + * crosses over a backwards edge. + */ + struct uinterval interval2 = { edge->start_block->index, edge->start_block->index }; + uinterval_tree_foreach (struct logical_edge, back_edge, interval2, &backward_edges, + node) { + if (back_edge->end_block->index < edge->end_block->index) { + if (blocks[edge->end_block->index].first_divergent_pred > + back_edge->start_block->index - 1) { + blocks[edge->end_block->index].first_divergent_pred = + back_edge->start_block->index - 1; + u_worklist_push_tail(&worklist, edge->end_block, index); + } + } + } + } + + blocks[block->index].first_processed_divergent_pred = + blocks[block->index].first_divergent_pred; + } + + /* For each reconvergent point p we have an open range + * (p->first_divergent_pred, p) where p may be outstanding. We need to keep + * track of the number of outstanding RPs and calculate the maximum. + */ + foreach_block (block, &so->ir->block_list) { + if (block->reconvergence_point) { + blocks[blocks[block->index].first_divergent_pred].divergence_count++; + } + } + + unsigned rc_level = 0; + so->branchstack = 0; + foreach_block (block, &so->ir->block_list) { + if (block->reconvergence_point) + rc_level--; + + /* Account for lowerings that produce divergent control flow. */ + foreach_instr (instr, &block->instr_list) { + switch (instr->opc) { + case OPC_SCAN_MACRO: + so->branchstack = MAX2(so->branchstack, rc_level + 2); + break; + case OPC_BALLOT_MACRO: + case OPC_READ_COND_MACRO: + case OPC_ELECT_MACRO: + case OPC_READ_FIRST_MACRO: + case OPC_SWZ_SHARED_MACRO: + so->branchstack = MAX2(so->branchstack, rc_level + 1); + break; + default: + break; + } + } + + rc_level += blocks[block->index].divergence_count; + + so->branchstack = MAX2(so->branchstack, rc_level); + } + assert(rc_level == 0); + + ralloc_free(mem_ctx); +} + diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index d2686fcb104..2f658cf521d 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1243,12 +1243,7 @@ ir3_shader_branchstack_hw(const struct ir3_shader_variant *v) if (v->compiler->gen < 5) return v->branchstack; - if (v->branchstack > 0) { - uint32_t branchstack = v->branchstack / 2 + 1; - return MIN2(branchstack, v->compiler->branchstack_size / 2); - } else { - return 0; - } + return DIV_ROUND_UP(MIN2(v->branchstack, v->compiler->branchstack_size), 2); } ENDC; diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 2b45559cab8..3c58e8b1fc0 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -426,7 +426,8 @@ ir3_validate(struct ir3 *ir) ctx->current_instr = NULL; /* Each logical successor should also be a physical successor: */ - validate_assert(ctx, is_physical_successor(block, block->successors[i])); + if (block->physical_successors[0]) + validate_assert(ctx, is_physical_successor(block, block->successors[i])); } } diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index e7ba1d8fe5f..6edae70e6dd 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -108,6 +108,7 @@ libfreedreno_ir3_files = files( 'ir3_ra.c', 'ir3_ra.h', 'ir3_ra_validate.c', + 'ir3_reconvergence.c', 'ir3_remove_unreachable.c', 'ir3_sched.c', 'ir3_shader.c',