From 5067a26f4432ae5e9690e70ef2498ea24798593e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 19 May 2022 16:10:44 -0400 Subject: [PATCH] pan/bi: Use flow control lowering on Valhall Logically at the same part of the compile pipeline as clause scheduling on Bifrost. Lots of similarities, too. Now that we generate flow control only as a late pass, various hacks in the compiler are no longer necessary and are dropped. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 36 ++-------- src/panfrost/bifrost/bir.c | 6 -- src/panfrost/bifrost/valhall/va_pack.c | 92 +------------------------- 3 files changed, 9 insertions(+), 125 deletions(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index f5c0865393d..e3178d3124a 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -770,11 +770,6 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, uint64_t blend_desc = inputs->blend.bifrost_blend_desc; enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); - if (b->shader->arch >= 9 && !inputs->is_blend) { - bi_instr *I = bi_nop(b); - I->flow = 0x9; /* .wait */ - } - if (inputs->is_blend && inputs->blend.nr_samples > 1) { /* Conversion descriptor comes from the compile inputs, pixel * indices derived at run time based on sample ID */ @@ -827,11 +822,6 @@ bi_skip_atest(bi_context *ctx, bool emit_zs) static void bi_emit_atest(bi_builder *b, bi_index alpha) { - if (b->shader->arch >= 9) { - bi_instr *I = bi_nop(b); - I->flow = 0x8; /* .wait0126 */ - } - bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha); b->shader->emitted_atest = true; b->shader->coverage = atest->dest[0]; @@ -1549,11 +1539,6 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) : bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0); - if (!b->shader->inputs->is_blend && b->shader->arch >= 9) { - bi_instr *I = bi_nop(b); - I->flow = 0x9; /* .wait */ - } - bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc, regfmt, nr - 1); bi_emit_cached_split(b, dest, size * nr); @@ -4663,19 +4648,6 @@ bifrost_nir_lower_store_component(struct nir_builder *b, * That trick doesn't work on Valhall, which needs a NOP inserted in the * terminal block instead. */ - -static void -bi_lower_terminal_block(bi_context *ctx, bi_block *block) -{ - bi_builder b = bi_init_builder(ctx, bi_after_block(block)); - - /* Ensure the instruction is not dead code eliminated. XXX: This is a - * bit of a hack. - */ - bi_instr *I = bi_nop(&b); - I->flow = 0xF; -} - static void bi_lower_branch(bi_context *ctx, bi_block *block) { @@ -4701,7 +4673,7 @@ bi_lower_branch(bi_context *ctx, bi_block *block) if (cull_terminal) ins->branch_target = NULL; else if (ins->branch_target) - bi_lower_terminal_block(ctx, ins->branch_target); + ins->branch_target->needs_nop = true; } } @@ -5075,7 +5047,11 @@ bi_compile_variant_nir(nir_shader *nir, if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) bi_print_shader(ctx, stdout); - if (ctx->arch <= 8) { + if (ctx->arch >= 9) { + va_assign_slots(ctx); + va_insert_flow_control_nops(ctx); + va_merge_flow(ctx); + } else { bi_schedule(ctx); bi_assign_scoreboard(ctx); diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index ea2e0faf1f4..f8323f93cfb 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -185,12 +185,6 @@ bi_side_effects(const bi_instr *I) if (bi_opcode_props[I->op].last) return true; - /* On Valhall, nontrivial flow control acts as a side effect and should - * not be dead code eliminated away. - */ - if (I->flow) - return true; - switch (I->op) { case BI_OPCODE_DISCARD_F32: case BI_OPCODE_DISCARD_B32: diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c index e934fc81941..dce06ac80d1 100644 --- a/src/panfrost/bifrost/valhall/va_pack.c +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -793,30 +793,6 @@ va_pack_instr(const bi_instr *I) return hex; } -static bool -va_last_in_block(bi_block *block, bi_instr *I) -{ - return (I->link.next == &block->instructions); -} - -static bool -va_should_return(bi_block *block, bi_instr *I) -{ - /* Don't return within a block */ - if (!va_last_in_block(block, I)) - return false; - - /* Don't return if we're succeeded by instructions */ - for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) { - bi_block *succ = block->successors[i]; - - if (succ && !bi_is_terminal_block(succ)) - return false; - } - - return true; -} - static unsigned va_instructions_in_block(bi_block *block) { @@ -903,15 +879,13 @@ va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I) static void va_lower_blend(bi_context *ctx) { - bool last_blend = true; - /* Link register (ABI between fragment and blend shaders) */ bi_index lr = bi_register(48); /* Program counter for *next* instruction */ bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false); - bi_foreach_instr_global_rev(ctx, I) { + bi_foreach_instr_global(ctx, I) { if (I->op != BI_OPCODE_BLEND) continue; @@ -919,7 +893,7 @@ va_lower_blend(bi_context *ctx) unsigned prolog_length = 2 * 8; - if (last_blend) + if (I->flow == VA_FLOW_END) bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0); else bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8); @@ -927,66 +901,8 @@ va_lower_blend(bi_context *ctx) bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ); /* For fixed function: skip the prologue, or return */ - if (last_blend) - I->flow = 0x7 | 0x8; /* .return */ - else + if (I->flow != VA_FLOW_END) I->branch_offset = prolog_length; - - /* Iterate backwards makes the last BLEND easy to identify */ - last_blend = false; - } -} - -/* - * Add a flow control modifier to an instruction. There may be an existing flow - * control modifier; if so, we need to add a NOP with the extra flow control - * _after_ this instruction - */ -static void -va_add_flow(bi_context *ctx, bi_instr *I, enum va_flow flow) -{ - if (I->flow != VA_FLOW_NONE) { - bi_builder b = bi_init_builder(ctx, bi_after_instr(I)); - I = bi_nop(&b); - } - - I->flow = flow; -} - -/* - * Add flow control modifiers to the program. This is a stop gap until we have a - * proper scheduler. For now, this should be conformant while doing little - * optimization of message waits. - */ -static void -va_lower_flow_control(bi_context *ctx) -{ - bi_foreach_block(ctx, block) { - bool block_reconverges = bi_reconverge_branches(block); - - bi_foreach_instr_in_block_safe(block, I) { - /* If this instruction returns, there is nothing left to do. */ - if (va_should_return(block, I)) { - I->flow = VA_FLOW_END; - continue; - } - - /* We may need to wait */ - if (I->op == BI_OPCODE_BARRIER) - va_add_flow(ctx, I, VA_FLOW_WAIT); - else if (bi_opcode_props[I->op].message) - va_add_flow(ctx, I, VA_FLOW_WAIT0); - - /* Lastly, we may need to reconverge. If we need reconvergence, it - * has to be on the last instruction of the block. If we have to - * generate a NOP for that reconverge, we need that to be last. So - * this ordering is careful. - */ - if (va_last_in_block(block, I) && block_reconverges) - va_add_flow(ctx, I, VA_FLOW_RECONVERGE); - - - } } } @@ -1001,8 +917,6 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) va_lower_blend(ctx); - va_lower_flow_control(ctx); - bi_foreach_block(ctx, block) { bi_foreach_instr_in_block(block, I) { if (I->op == BI_OPCODE_BRANCHZ_I16)