mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 11:20:11 +01:00
pan/bi: Use flow control lowering on Valhall
Logically at the same part of the compile pipeline as clause scheduling on Bifrost. Lots of similarities, too. Now that we generate flow control only as a late pass, various hacks in the compiler are no longer necessary and are dropped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16804>
This commit is contained in:
parent
a394c32cd2
commit
5067a26f44
3 changed files with 9 additions and 125 deletions
|
|
@ -770,11 +770,6 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
|
||||||
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
|
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
|
||||||
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
|
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
|
||||||
|
|
||||||
if (b->shader->arch >= 9 && !inputs->is_blend) {
|
|
||||||
bi_instr *I = bi_nop(b);
|
|
||||||
I->flow = 0x9; /* .wait */
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inputs->is_blend && inputs->blend.nr_samples > 1) {
|
if (inputs->is_blend && inputs->blend.nr_samples > 1) {
|
||||||
/* Conversion descriptor comes from the compile inputs, pixel
|
/* Conversion descriptor comes from the compile inputs, pixel
|
||||||
* indices derived at run time based on sample ID */
|
* indices derived at run time based on sample ID */
|
||||||
|
|
@ -827,11 +822,6 @@ bi_skip_atest(bi_context *ctx, bool emit_zs)
|
||||||
static void
|
static void
|
||||||
bi_emit_atest(bi_builder *b, bi_index alpha)
|
bi_emit_atest(bi_builder *b, bi_index alpha)
|
||||||
{
|
{
|
||||||
if (b->shader->arch >= 9) {
|
|
||||||
bi_instr *I = bi_nop(b);
|
|
||||||
I->flow = 0x8; /* .wait0126 */
|
|
||||||
}
|
|
||||||
|
|
||||||
bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha);
|
bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha);
|
||||||
b->shader->emitted_atest = true;
|
b->shader->emitted_atest = true;
|
||||||
b->shader->coverage = atest->dest[0];
|
b->shader->coverage = atest->dest[0];
|
||||||
|
|
@ -1549,11 +1539,6 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) :
|
bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) :
|
||||||
bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
|
bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
|
||||||
|
|
||||||
if (!b->shader->inputs->is_blend && b->shader->arch >= 9) {
|
|
||||||
bi_instr *I = bi_nop(b);
|
|
||||||
I->flow = 0x9; /* .wait */
|
|
||||||
}
|
|
||||||
|
|
||||||
bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc,
|
bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc,
|
||||||
regfmt, nr - 1);
|
regfmt, nr - 1);
|
||||||
bi_emit_cached_split(b, dest, size * nr);
|
bi_emit_cached_split(b, dest, size * nr);
|
||||||
|
|
@ -4663,19 +4648,6 @@ bifrost_nir_lower_store_component(struct nir_builder *b,
|
||||||
* That trick doesn't work on Valhall, which needs a NOP inserted in the
|
* That trick doesn't work on Valhall, which needs a NOP inserted in the
|
||||||
* terminal block instead.
|
* terminal block instead.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
|
||||||
bi_lower_terminal_block(bi_context *ctx, bi_block *block)
|
|
||||||
{
|
|
||||||
bi_builder b = bi_init_builder(ctx, bi_after_block(block));
|
|
||||||
|
|
||||||
/* Ensure the instruction is not dead code eliminated. XXX: This is a
|
|
||||||
* bit of a hack.
|
|
||||||
*/
|
|
||||||
bi_instr *I = bi_nop(&b);
|
|
||||||
I->flow = 0xF;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bi_lower_branch(bi_context *ctx, bi_block *block)
|
bi_lower_branch(bi_context *ctx, bi_block *block)
|
||||||
{
|
{
|
||||||
|
|
@ -4701,7 +4673,7 @@ bi_lower_branch(bi_context *ctx, bi_block *block)
|
||||||
if (cull_terminal)
|
if (cull_terminal)
|
||||||
ins->branch_target = NULL;
|
ins->branch_target = NULL;
|
||||||
else if (ins->branch_target)
|
else if (ins->branch_target)
|
||||||
bi_lower_terminal_block(ctx, ins->branch_target);
|
ins->branch_target->needs_nop = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -5075,7 +5047,11 @@ bi_compile_variant_nir(nir_shader *nir,
|
||||||
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
|
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
|
||||||
bi_print_shader(ctx, stdout);
|
bi_print_shader(ctx, stdout);
|
||||||
|
|
||||||
if (ctx->arch <= 8) {
|
if (ctx->arch >= 9) {
|
||||||
|
va_assign_slots(ctx);
|
||||||
|
va_insert_flow_control_nops(ctx);
|
||||||
|
va_merge_flow(ctx);
|
||||||
|
} else {
|
||||||
bi_schedule(ctx);
|
bi_schedule(ctx);
|
||||||
bi_assign_scoreboard(ctx);
|
bi_assign_scoreboard(ctx);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -185,12 +185,6 @@ bi_side_effects(const bi_instr *I)
|
||||||
if (bi_opcode_props[I->op].last)
|
if (bi_opcode_props[I->op].last)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* On Valhall, nontrivial flow control acts as a side effect and should
|
|
||||||
* not be dead code eliminated away.
|
|
||||||
*/
|
|
||||||
if (I->flow)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
switch (I->op) {
|
switch (I->op) {
|
||||||
case BI_OPCODE_DISCARD_F32:
|
case BI_OPCODE_DISCARD_F32:
|
||||||
case BI_OPCODE_DISCARD_B32:
|
case BI_OPCODE_DISCARD_B32:
|
||||||
|
|
|
||||||
|
|
@ -793,30 +793,6 @@ va_pack_instr(const bi_instr *I)
|
||||||
return hex;
|
return hex;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
|
||||||
va_last_in_block(bi_block *block, bi_instr *I)
|
|
||||||
{
|
|
||||||
return (I->link.next == &block->instructions);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
va_should_return(bi_block *block, bi_instr *I)
|
|
||||||
{
|
|
||||||
/* Don't return within a block */
|
|
||||||
if (!va_last_in_block(block, I))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Don't return if we're succeeded by instructions */
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
|
|
||||||
bi_block *succ = block->successors[i];
|
|
||||||
|
|
||||||
if (succ && !bi_is_terminal_block(succ))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned
|
static unsigned
|
||||||
va_instructions_in_block(bi_block *block)
|
va_instructions_in_block(bi_block *block)
|
||||||
{
|
{
|
||||||
|
|
@ -903,15 +879,13 @@ va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I)
|
||||||
static void
|
static void
|
||||||
va_lower_blend(bi_context *ctx)
|
va_lower_blend(bi_context *ctx)
|
||||||
{
|
{
|
||||||
bool last_blend = true;
|
|
||||||
|
|
||||||
/* Link register (ABI between fragment and blend shaders) */
|
/* Link register (ABI between fragment and blend shaders) */
|
||||||
bi_index lr = bi_register(48);
|
bi_index lr = bi_register(48);
|
||||||
|
|
||||||
/* Program counter for *next* instruction */
|
/* Program counter for *next* instruction */
|
||||||
bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false);
|
bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false);
|
||||||
|
|
||||||
bi_foreach_instr_global_rev(ctx, I) {
|
bi_foreach_instr_global(ctx, I) {
|
||||||
if (I->op != BI_OPCODE_BLEND)
|
if (I->op != BI_OPCODE_BLEND)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -919,7 +893,7 @@ va_lower_blend(bi_context *ctx)
|
||||||
|
|
||||||
unsigned prolog_length = 2 * 8;
|
unsigned prolog_length = 2 * 8;
|
||||||
|
|
||||||
if (last_blend)
|
if (I->flow == VA_FLOW_END)
|
||||||
bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0);
|
bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0);
|
||||||
else
|
else
|
||||||
bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8);
|
bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8);
|
||||||
|
|
@ -927,66 +901,8 @@ va_lower_blend(bi_context *ctx)
|
||||||
bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ);
|
bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ);
|
||||||
|
|
||||||
/* For fixed function: skip the prologue, or return */
|
/* For fixed function: skip the prologue, or return */
|
||||||
if (last_blend)
|
if (I->flow != VA_FLOW_END)
|
||||||
I->flow = 0x7 | 0x8; /* .return */
|
|
||||||
else
|
|
||||||
I->branch_offset = prolog_length;
|
I->branch_offset = prolog_length;
|
||||||
|
|
||||||
/* Iterate backwards makes the last BLEND easy to identify */
|
|
||||||
last_blend = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Add a flow control modifier to an instruction. There may be an existing flow
|
|
||||||
* control modifier; if so, we need to add a NOP with the extra flow control
|
|
||||||
* _after_ this instruction
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
va_add_flow(bi_context *ctx, bi_instr *I, enum va_flow flow)
|
|
||||||
{
|
|
||||||
if (I->flow != VA_FLOW_NONE) {
|
|
||||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(I));
|
|
||||||
I = bi_nop(&b);
|
|
||||||
}
|
|
||||||
|
|
||||||
I->flow = flow;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Add flow control modifiers to the program. This is a stop gap until we have a
|
|
||||||
* proper scheduler. For now, this should be conformant while doing little
|
|
||||||
* optimization of message waits.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
va_lower_flow_control(bi_context *ctx)
|
|
||||||
{
|
|
||||||
bi_foreach_block(ctx, block) {
|
|
||||||
bool block_reconverges = bi_reconverge_branches(block);
|
|
||||||
|
|
||||||
bi_foreach_instr_in_block_safe(block, I) {
|
|
||||||
/* If this instruction returns, there is nothing left to do. */
|
|
||||||
if (va_should_return(block, I)) {
|
|
||||||
I->flow = VA_FLOW_END;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We may need to wait */
|
|
||||||
if (I->op == BI_OPCODE_BARRIER)
|
|
||||||
va_add_flow(ctx, I, VA_FLOW_WAIT);
|
|
||||||
else if (bi_opcode_props[I->op].message)
|
|
||||||
va_add_flow(ctx, I, VA_FLOW_WAIT0);
|
|
||||||
|
|
||||||
/* Lastly, we may need to reconverge. If we need reconvergence, it
|
|
||||||
* has to be on the last instruction of the block. If we have to
|
|
||||||
* generate a NOP for that reconverge, we need that to be last. So
|
|
||||||
* this ordering is careful.
|
|
||||||
*/
|
|
||||||
if (va_last_in_block(block, I) && block_reconverges)
|
|
||||||
va_add_flow(ctx, I, VA_FLOW_RECONVERGE);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1001,8 +917,6 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission)
|
||||||
if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend)
|
if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend)
|
||||||
va_lower_blend(ctx);
|
va_lower_blend(ctx);
|
||||||
|
|
||||||
va_lower_flow_control(ctx);
|
|
||||||
|
|
||||||
bi_foreach_block(ctx, block) {
|
bi_foreach_block(ctx, block) {
|
||||||
bi_foreach_instr_in_block(block, I) {
|
bi_foreach_instr_in_block(block, I) {
|
||||||
if (I->op == BI_OPCODE_BRANCHZ_I16)
|
if (I->op == BI_OPCODE_BRANCHZ_I16)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue