diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index a5b770d4737..e31553f7ed0 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -706,6 +706,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_mbcnt_amd: case nir_intrinsic_lane_permute_16_amd: case nir_intrinsic_elect: + case nir_intrinsic_elect_any_ir3: case nir_intrinsic_load_tlb_color_brcm: case nir_intrinsic_load_tess_rel_patch_id_amd: case nir_intrinsic_load_gs_vertex_offset_amd: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 0db57c52f5d..d34bb159527 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1372,6 +1372,10 @@ intrinsic("preamble_start_ir3", [], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORD barrier("preamble_end_ir3") +# IR3-specific intrinsic to choose any invocation. This is implemented the same +# as elect, except that it doesn't require helper invocations. Used by preambles. +intrinsic("elect_any_ir3", dest_comp=1, flags=[CAN_ELIMINATE]) + # IR3-specific intrinsic for stc. Should be used in the shader preamble. store("uniform_ir3", [], indices=[BASE]) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 33eab610218..5b85cdec668 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1207,15 +1207,18 @@ uses_helpers(struct ir3_instruction *instr) case OPC_BALLOT_MACRO: case OPC_ANY_MACRO: case OPC_ALL_MACRO: - case OPC_ELECT_MACRO: case OPC_READ_FIRST_MACRO: case OPC_READ_COND_MACRO: case OPC_MOVMSK: case OPC_BRCST_ACTIVE: return true; - /* Catch lowered READ_FIRST/READ_COND. */ + /* Catch lowered READ_FIRST/READ_COND. For elect, don't include the getone + * in the preamble because it doesn't actually matter which fiber is + * selected. + */ case OPC_MOV: + case OPC_ELECT_MACRO: return instr->flags & IR3_INSTR_NEEDS_HELPERS; default: diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index a432507be57..fc2eb40253f 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2713,6 +2713,10 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) break; } case nir_intrinsic_elect: + dst[0] = ir3_ELECT_MACRO(ctx->block); + dst[0]->flags |= IR3_INSTR_NEEDS_HELPERS; + break; + case nir_intrinsic_elect_any_ir3: dst[0] = ir3_ELECT_MACRO(ctx->block); break; case nir_intrinsic_preamble_start_ir3: @@ -3987,6 +3991,7 @@ instr_can_be_predicated(nir_instr *instr) case nir_intrinsic_brcst_active_ir3: case nir_intrinsic_ballot: case nir_intrinsic_elect: + case nir_intrinsic_elect_any_ir3: case nir_intrinsic_read_invocation_cond_ir3: case nir_intrinsic_demote: case nir_intrinsic_demote_if: @@ -4126,7 +4131,8 @@ emit_if(struct ir3_context *ctx, nir_if *nif) ir3_BALL(ctx->block, pred, IR3_REG_PREDICATE); } else if (condition->opc == OPC_ELECT_MACRO && condition->block == ctx->block) { - ir3_GETONE(ctx->block); + struct ir3_instruction *branch = ir3_GETONE(ctx->block); + branch->flags |= condition->flags & IR3_INSTR_NEEDS_HELPERS; } else if (condition->opc == OPC_SHPS_MACRO && condition->block == ctx->block) { /* TODO: technically this only works if the block is the only user of the diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 4754cd7794e..3bb4cfa1232 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -1399,7 +1399,8 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, struct ir3_instruction *terminator = ir3_block_get_terminator(block); if (terminator) { if (terminator->opc == OPC_BALL || terminator->opc == OPC_BANY || - terminator->opc == OPC_GETONE) { + (terminator->opc == OPC_GETONE && + (terminator->flags & IR3_INSTR_NEEDS_HELPERS))) { bd->uses_helpers_beginning = true; bd->uses_helpers_end = true; non_prefetch_helpers = true; diff --git a/src/freedreno/ir3/ir3_lower_subgroups.c b/src/freedreno/ir3/ir3_lower_subgroups.c index 224c459cd11..62d43769016 100644 --- a/src/freedreno/ir3/ir3_lower_subgroups.c +++ b/src/freedreno/ir3/ir3_lower_subgroups.c @@ -223,11 +223,12 @@ link_blocks_jump(struct ir3_block *pred, struct ir3_block *succ) static void link_blocks_branch(struct ir3_block *pred, struct ir3_block *target, - struct ir3_block *fallthrough, unsigned opc, + struct ir3_block *fallthrough, unsigned opc, unsigned flags, struct ir3_instruction *condition) { unsigned nsrc = condition ? 1 : 0; struct ir3_instruction *branch = ir3_instr_create(pred, opc, 0, nsrc); + branch->flags |= flags; if (condition) { struct ir3_register *cond_dst = condition->dsts[0]; @@ -242,13 +243,14 @@ link_blocks_branch(struct ir3_block *pred, struct ir3_block *target, static struct ir3_block * create_if(struct ir3 *ir, struct ir3_block *before_block, - struct ir3_block *after_block, unsigned opc, + struct ir3_block *after_block, unsigned opc, unsigned flags, struct ir3_instruction *condition) { struct ir3_block *then_block = ir3_block_create(ir); list_add(&then_block->node, &before_block->node); - link_blocks_branch(before_block, then_block, after_block, opc, condition); + link_blocks_branch(before_block, then_block, after_block, opc, flags, + condition); link_blocks_jump(then_block, after_block); return then_block; @@ -320,7 +322,8 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in link_blocks_jump(before_block, header); - link_blocks_branch(header, exit, footer, OPC_GETONE, NULL); + link_blocks_branch(header, exit, footer, OPC_GETONE, + IR3_INSTR_NEEDS_HELPERS, NULL); link_blocks_jump(exit, after_block); ir3_block_link_physical(exit, footer); @@ -369,9 +372,10 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in link_blocks_jump(before_block, body); - link_blocks_branch(body, store, after_block, OPC_GETLAST, NULL); + link_blocks_branch(body, store, after_block, OPC_GETLAST, 0, NULL); - link_blocks_branch(store, after_block, body, OPC_GETONE, NULL); + link_blocks_branch(store, after_block, body, OPC_GETONE, + IR3_INSTR_NEEDS_HELPERS, NULL); struct ir3_register *reduce = instr->dsts[0]; struct ir3_register *inclusive = instr->dsts[1]; @@ -418,6 +422,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in struct ir3_instruction *condition = NULL; unsigned branch_opc = 0; + unsigned branch_flags = 0; switch (instr->opc) { case OPC_BALLOT_MACRO: @@ -445,13 +450,15 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in case OPC_ELECT_MACRO: after_block->reconvergence_point = true; branch_opc = OPC_GETONE; + branch_flags = instr->flags & IR3_INSTR_NEEDS_HELPERS; break; default: unreachable("bad opcode"); } struct ir3_block *then_block = - create_if(ir, before_block, after_block, branch_opc, condition); + create_if(ir, before_block, after_block, branch_opc, branch_flags, + condition); switch (instr->opc) { case OPC_ALL_MACRO: