diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index f5c0865393d..e3178d3124a 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -770,11 +770,6 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, uint64_t blend_desc = inputs->blend.bifrost_blend_desc; enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); - if (b->shader->arch >= 9 && !inputs->is_blend) { - bi_instr *I = bi_nop(b); - I->flow = 0x9; /* .wait */ - } - if (inputs->is_blend && inputs->blend.nr_samples > 1) { /* Conversion descriptor comes from the compile inputs, pixel * indices derived at run time based on sample ID */ @@ -827,11 +822,6 @@ bi_skip_atest(bi_context *ctx, bool emit_zs) static void bi_emit_atest(bi_builder *b, bi_index alpha) { - if (b->shader->arch >= 9) { - bi_instr *I = bi_nop(b); - I->flow = 0x8; /* .wait0126 */ - } - bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha); b->shader->emitted_atest = true; b->shader->coverage = atest->dest[0]; @@ -1549,11 +1539,6 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) : bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0); - if (!b->shader->inputs->is_blend && b->shader->arch >= 9) { - bi_instr *I = bi_nop(b); - I->flow = 0x9; /* .wait */ - } - bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc, regfmt, nr - 1); bi_emit_cached_split(b, dest, size * nr); @@ -4663,19 +4648,6 @@ bifrost_nir_lower_store_component(struct nir_builder *b, * That trick doesn't work on Valhall, which needs a NOP inserted in the * terminal block instead. */ - -static void -bi_lower_terminal_block(bi_context *ctx, bi_block *block) -{ - bi_builder b = bi_init_builder(ctx, bi_after_block(block)); - - /* Ensure the instruction is not dead code eliminated. XXX: This is a - * bit of a hack. - */ - bi_instr *I = bi_nop(&b); - I->flow = 0xF; -} - static void bi_lower_branch(bi_context *ctx, bi_block *block) { @@ -4701,7 +4673,7 @@ bi_lower_branch(bi_context *ctx, bi_block *block) if (cull_terminal) ins->branch_target = NULL; else if (ins->branch_target) - bi_lower_terminal_block(ctx, ins->branch_target); + ins->branch_target->needs_nop = true; } } @@ -5075,7 +5047,11 @@ bi_compile_variant_nir(nir_shader *nir, if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) bi_print_shader(ctx, stdout); - if (ctx->arch <= 8) { + if (ctx->arch >= 9) { + va_assign_slots(ctx); + va_insert_flow_control_nops(ctx); + va_merge_flow(ctx); + } else { bi_schedule(ctx); bi_assign_scoreboard(ctx); diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index ea2e0faf1f4..f8323f93cfb 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -185,12 +185,6 @@ bi_side_effects(const bi_instr *I) if (bi_opcode_props[I->op].last) return true; - /* On Valhall, nontrivial flow control acts as a side effect and should - * not be dead code eliminated away. - */ - if (I->flow) - return true; - switch (I->op) { case BI_OPCODE_DISCARD_F32: case BI_OPCODE_DISCARD_B32: diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c index e934fc81941..dce06ac80d1 100644 --- a/src/panfrost/bifrost/valhall/va_pack.c +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -793,30 +793,6 @@ va_pack_instr(const bi_instr *I) return hex; } -static bool -va_last_in_block(bi_block *block, bi_instr *I) -{ - return (I->link.next == &block->instructions); -} - -static bool -va_should_return(bi_block *block, bi_instr *I) -{ - /* Don't return within a block */ - if (!va_last_in_block(block, I)) - return false; - - /* Don't return if we're succeeded by instructions */ - for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) { - bi_block *succ = block->successors[i]; - - if (succ && !bi_is_terminal_block(succ)) - return false; - } - - return true; -} - static unsigned va_instructions_in_block(bi_block *block) { @@ -903,15 +879,13 @@ va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I) static void va_lower_blend(bi_context *ctx) { - bool last_blend = true; - /* Link register (ABI between fragment and blend shaders) */ bi_index lr = bi_register(48); /* Program counter for *next* instruction */ bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false); - bi_foreach_instr_global_rev(ctx, I) { + bi_foreach_instr_global(ctx, I) { if (I->op != BI_OPCODE_BLEND) continue; @@ -919,7 +893,7 @@ va_lower_blend(bi_context *ctx) unsigned prolog_length = 2 * 8; - if (last_blend) + if (I->flow == VA_FLOW_END) bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0); else bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8); @@ -927,66 +901,8 @@ va_lower_blend(bi_context *ctx) bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ); /* For fixed function: skip the prologue, or return */ - if (last_blend) - I->flow = 0x7 | 0x8; /* .return */ - else + if (I->flow != VA_FLOW_END) I->branch_offset = prolog_length; - - /* Iterate backwards makes the last BLEND easy to identify */ - last_blend = false; - } -} - -/* - * Add a flow control modifier to an instruction. There may be an existing flow - * control modifier; if so, we need to add a NOP with the extra flow control - * _after_ this instruction - */ -static void -va_add_flow(bi_context *ctx, bi_instr *I, enum va_flow flow) -{ - if (I->flow != VA_FLOW_NONE) { - bi_builder b = bi_init_builder(ctx, bi_after_instr(I)); - I = bi_nop(&b); - } - - I->flow = flow; -} - -/* - * Add flow control modifiers to the program. This is a stop gap until we have a - * proper scheduler. For now, this should be conformant while doing little - * optimization of message waits. - */ -static void -va_lower_flow_control(bi_context *ctx) -{ - bi_foreach_block(ctx, block) { - bool block_reconverges = bi_reconverge_branches(block); - - bi_foreach_instr_in_block_safe(block, I) { - /* If this instruction returns, there is nothing left to do. */ - if (va_should_return(block, I)) { - I->flow = VA_FLOW_END; - continue; - } - - /* We may need to wait */ - if (I->op == BI_OPCODE_BARRIER) - va_add_flow(ctx, I, VA_FLOW_WAIT); - else if (bi_opcode_props[I->op].message) - va_add_flow(ctx, I, VA_FLOW_WAIT0); - - /* Lastly, we may need to reconverge. If we need reconvergence, it - * has to be on the last instruction of the block. If we have to - * generate a NOP for that reconverge, we need that to be last. So - * this ordering is careful. - */ - if (va_last_in_block(block, I) && block_reconverges) - va_add_flow(ctx, I, VA_FLOW_RECONVERGE); - - - } } } @@ -1001,8 +917,6 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) va_lower_blend(ctx); - va_lower_flow_control(ctx); - bi_foreach_block(ctx, block) { bi_foreach_instr_in_block(block, I) { if (I->op == BI_OPCODE_BRANCHZ_I16)