pan/bi: Use flow control lowering on Valhall

Logically at the same part of the compile pipeline as clause scheduling on
Bifrost. Lots of similarities, too. Now that we generate flow control only as a
late pass, various hacks in the compiler are no longer necessary and are
dropped.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16804>
This commit is contained in:
Alyssa Rosenzweig 2022-05-19 16:10:44 -04:00 committed by Marge Bot
parent a394c32cd2
commit 5067a26f44
3 changed files with 9 additions and 125 deletions

View file

@ -770,11 +770,6 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
uint64_t blend_desc = inputs->blend.bifrost_blend_desc; uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
if (b->shader->arch >= 9 && !inputs->is_blend) {
bi_instr *I = bi_nop(b);
I->flow = 0x9; /* .wait */
}
if (inputs->is_blend && inputs->blend.nr_samples > 1) { if (inputs->is_blend && inputs->blend.nr_samples > 1) {
/* Conversion descriptor comes from the compile inputs, pixel /* Conversion descriptor comes from the compile inputs, pixel
* indices derived at run time based on sample ID */ * indices derived at run time based on sample ID */
@ -827,11 +822,6 @@ bi_skip_atest(bi_context *ctx, bool emit_zs)
static void static void
bi_emit_atest(bi_builder *b, bi_index alpha) bi_emit_atest(bi_builder *b, bi_index alpha)
{ {
if (b->shader->arch >= 9) {
bi_instr *I = bi_nop(b);
I->flow = 0x8; /* .wait0126 */
}
bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha); bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha);
b->shader->emitted_atest = true; b->shader->emitted_atest = true;
b->shader->coverage = atest->dest[0]; b->shader->coverage = atest->dest[0];
@ -1549,11 +1539,6 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) : bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) :
bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0); bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
if (!b->shader->inputs->is_blend && b->shader->arch >= 9) {
bi_instr *I = bi_nop(b);
I->flow = 0x9; /* .wait */
}
bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc, bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc,
regfmt, nr - 1); regfmt, nr - 1);
bi_emit_cached_split(b, dest, size * nr); bi_emit_cached_split(b, dest, size * nr);
@ -4663,19 +4648,6 @@ bifrost_nir_lower_store_component(struct nir_builder *b,
* That trick doesn't work on Valhall, which needs a NOP inserted in the * That trick doesn't work on Valhall, which needs a NOP inserted in the
* terminal block instead. * terminal block instead.
*/ */
static void
bi_lower_terminal_block(bi_context *ctx, bi_block *block)
{
bi_builder b = bi_init_builder(ctx, bi_after_block(block));
/* Ensure the instruction is not dead code eliminated. XXX: This is a
* bit of a hack.
*/
bi_instr *I = bi_nop(&b);
I->flow = 0xF;
}
static void static void
bi_lower_branch(bi_context *ctx, bi_block *block) bi_lower_branch(bi_context *ctx, bi_block *block)
{ {
@ -4701,7 +4673,7 @@ bi_lower_branch(bi_context *ctx, bi_block *block)
if (cull_terminal) if (cull_terminal)
ins->branch_target = NULL; ins->branch_target = NULL;
else if (ins->branch_target) else if (ins->branch_target)
bi_lower_terminal_block(ctx, ins->branch_target); ins->branch_target->needs_nop = true;
} }
} }
@ -5075,7 +5047,11 @@ bi_compile_variant_nir(nir_shader *nir,
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
bi_print_shader(ctx, stdout); bi_print_shader(ctx, stdout);
if (ctx->arch <= 8) { if (ctx->arch >= 9) {
va_assign_slots(ctx);
va_insert_flow_control_nops(ctx);
va_merge_flow(ctx);
} else {
bi_schedule(ctx); bi_schedule(ctx);
bi_assign_scoreboard(ctx); bi_assign_scoreboard(ctx);

View file

@ -185,12 +185,6 @@ bi_side_effects(const bi_instr *I)
if (bi_opcode_props[I->op].last) if (bi_opcode_props[I->op].last)
return true; return true;
/* On Valhall, nontrivial flow control acts as a side effect and should
* not be dead code eliminated away.
*/
if (I->flow)
return true;
switch (I->op) { switch (I->op) {
case BI_OPCODE_DISCARD_F32: case BI_OPCODE_DISCARD_F32:
case BI_OPCODE_DISCARD_B32: case BI_OPCODE_DISCARD_B32:

View file

@ -793,30 +793,6 @@ va_pack_instr(const bi_instr *I)
return hex; return hex;
} }
static bool
va_last_in_block(bi_block *block, bi_instr *I)
{
return (I->link.next == &block->instructions);
}
static bool
va_should_return(bi_block *block, bi_instr *I)
{
/* Don't return within a block */
if (!va_last_in_block(block, I))
return false;
/* Don't return if we're succeeded by instructions */
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
bi_block *succ = block->successors[i];
if (succ && !bi_is_terminal_block(succ))
return false;
}
return true;
}
static unsigned static unsigned
va_instructions_in_block(bi_block *block) va_instructions_in_block(bi_block *block)
{ {
@ -903,15 +879,13 @@ va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I)
static void static void
va_lower_blend(bi_context *ctx) va_lower_blend(bi_context *ctx)
{ {
bool last_blend = true;
/* Link register (ABI between fragment and blend shaders) */ /* Link register (ABI between fragment and blend shaders) */
bi_index lr = bi_register(48); bi_index lr = bi_register(48);
/* Program counter for *next* instruction */ /* Program counter for *next* instruction */
bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false); bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false);
bi_foreach_instr_global_rev(ctx, I) { bi_foreach_instr_global(ctx, I) {
if (I->op != BI_OPCODE_BLEND) if (I->op != BI_OPCODE_BLEND)
continue; continue;
@ -919,7 +893,7 @@ va_lower_blend(bi_context *ctx)
unsigned prolog_length = 2 * 8; unsigned prolog_length = 2 * 8;
if (last_blend) if (I->flow == VA_FLOW_END)
bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0); bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0);
else else
bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8); bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8);
@ -927,66 +901,8 @@ va_lower_blend(bi_context *ctx)
bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ); bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ);
/* For fixed function: skip the prologue, or return */ /* For fixed function: skip the prologue, or return */
if (last_blend) if (I->flow != VA_FLOW_END)
I->flow = 0x7 | 0x8; /* .return */
else
I->branch_offset = prolog_length; I->branch_offset = prolog_length;
/* Iterate backwards makes the last BLEND easy to identify */
last_blend = false;
}
}
/*
* Add a flow control modifier to an instruction. There may be an existing flow
* control modifier; if so, we need to add a NOP with the extra flow control
* _after_ this instruction
*/
static void
va_add_flow(bi_context *ctx, bi_instr *I, enum va_flow flow)
{
if (I->flow != VA_FLOW_NONE) {
bi_builder b = bi_init_builder(ctx, bi_after_instr(I));
I = bi_nop(&b);
}
I->flow = flow;
}
/*
* Add flow control modifiers to the program. This is a stop gap until we have a
* proper scheduler. For now, this should be conformant while doing little
* optimization of message waits.
*/
static void
va_lower_flow_control(bi_context *ctx)
{
bi_foreach_block(ctx, block) {
bool block_reconverges = bi_reconverge_branches(block);
bi_foreach_instr_in_block_safe(block, I) {
/* If this instruction returns, there is nothing left to do. */
if (va_should_return(block, I)) {
I->flow = VA_FLOW_END;
continue;
}
/* We may need to wait */
if (I->op == BI_OPCODE_BARRIER)
va_add_flow(ctx, I, VA_FLOW_WAIT);
else if (bi_opcode_props[I->op].message)
va_add_flow(ctx, I, VA_FLOW_WAIT0);
/* Lastly, we may need to reconverge. If we need reconvergence, it
* has to be on the last instruction of the block. If we have to
* generate a NOP for that reconverge, we need that to be last. So
* this ordering is careful.
*/
if (va_last_in_block(block, I) && block_reconverges)
va_add_flow(ctx, I, VA_FLOW_RECONVERGE);
}
} }
} }
@ -1001,8 +917,6 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission)
if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend)
va_lower_blend(ctx); va_lower_blend(ctx);
va_lower_flow_control(ctx);
bi_foreach_block(ctx, block) { bi_foreach_block(ctx, block) {
bi_foreach_instr_in_block(block, I) { bi_foreach_instr_in_block(block, I) {
if (I->op == BI_OPCODE_BRANCHZ_I16) if (I->op == BI_OPCODE_BRANCHZ_I16)