diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index 03a0ca7a520..67c2928c8bb 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -26,15 +26,6 @@ #include "compiler.h" -/* Finds the clause type required or return none */ - -static bool -bi_is_fragz(bi_instruction *ins) -{ - return ins->load_vary.special && - ins->load_vary.var_id == BI_VARYING_NAME_FRAG_Z; -} - /* Determines messsage type by checking the table and a few special cases. Only * case missing is tilebuffer instructions that access depth/stencil, which * require a Z_STENCIL message (to implement @@ -55,110 +46,6 @@ bi_message_type_for_instr(bi_instr *ins) return msg; } -static enum bifrost_message_type -bi_message_type_for_ins(bi_instruction *ins) -{ - unsigned T = ins->type; - - /* Only high latency ops impose clause types */ - if (!(bi_class_props[T] & BI_SCHED_HI_LATENCY)) - return BIFROST_MESSAGE_NONE; - - switch (T) { - case BI_BRANCH: - case BI_DISCARD: - return BIFROST_MESSAGE_NONE; - - case BI_LOAD_VAR: - if (bi_is_fragz(ins)) - return BIFROST_MESSAGE_Z_STENCIL; - - return BIFROST_MESSAGE_VARYING; - - case BI_LOAD_UNIFORM: - case BI_LOAD_ATTR: - case BI_LOAD_VAR_ADDRESS: - return BIFROST_MESSAGE_ATTRIBUTE; - - case BI_TEXS: - case BI_TEXC: - case BI_TEXC_DUAL: - return BIFROST_MESSAGE_TEX; - - case BI_LOAD: - return BIFROST_MESSAGE_LOAD; - - case BI_STORE: - case BI_STORE_VAR: - return BIFROST_MESSAGE_STORE; - - case BI_BLEND: - return BIFROST_MESSAGE_BLEND; - - case BI_LOAD_TILE: - return BIFROST_MESSAGE_TILE; - - case BI_ATEST: - return BIFROST_MESSAGE_ATEST; - - case BI_ZS_EMIT: - return BIFROST_MESSAGE_Z_STENCIL; - - default: - unreachable("Invalid high-latency class"); - } -} - -/* There is an encoding restriction against FMA fp16 add/min/max - * having both sources with abs(..) with a duplicated source. This is - * due to the packing being order-sensitive, so the slots must end up distinct - * to handle both having abs(..). The swizzle doesn't matter here. Note - * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce - * this). - */ - -static bool -bi_ambiguous_abs(bi_instruction *ins) -{ - bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA; - bool typey = ins->dest_type == nir_type_float16; - bool absy = ins->src_abs[0] && ins->src_abs[1]; - - return classy && typey && absy; -} - -/* New Bifrost (which?) don't seem to have ICMP on FMA */ -static bool -bi_icmp(bi_instruction *ins) -{ - bool ic = nir_alu_type_get_base_type(ins->src_types[0]) != nir_type_float; - return ic && (ins->type == BI_CMP); -} - -/* No 8/16-bit IADD/ISUB on FMA */ -static bool -bi_imath_small(bi_instruction *ins) -{ - bool sz = nir_alu_type_get_type_size(ins->src_types[0]) < 32; - return sz && (ins->type == BI_IMATH); -} - -/* Lowers FMOV to ADD #0, since FMOV doesn't exist on the h/w and this is the - * latest time it's sane to lower (it's useful to distinguish before, but we'll - * need this handle during scheduling to ensure the slots get modeled - * correctly with respect to the new zero source) */ - -static void -bi_lower_fmov(bi_instruction *ins) -{ - if (ins->type != BI_FMOV) - return; - - ins->type = BI_ADD; - ins->src[1] = BIR_INDEX_ZERO; - ins->src_types[1] = ins->src_types[0]; -} - /* To work out the back-to-back flag, we need to detect branches and * "fallthrough" branches, implied in the last clause of a block that falls * through to another block with *multiple predecessors*. */ @@ -263,66 +150,6 @@ bi_singleton(void *memctx, bi_instr *ins, return u; } -/* Insert a clause wrapping a single instruction */ - -bi_clause * -bi_make_singleton(void *memctx, bi_instruction *ins, - bi_block *block, - unsigned scoreboard_id, - unsigned dependencies, - bool osrb) -{ - unsigned props = bi_class_props[ins->type]; - - bi_clause *u = rzalloc(memctx, bi_clause); - u->bundle_count = 1; - - /* Check for scheduling restrictions */ - - bool can_fma = props & BI_SCHED_FMA; - ASSERTED bool can_add = props & BI_SCHED_ADD; - - can_fma &= !bi_ambiguous_abs(ins); - can_fma &= !bi_icmp(ins); - can_fma &= !bi_imath_small(ins); - - assert(can_fma || can_add); - - if (can_fma) - u->bundles[0].fma = ins; - else - u->bundles[0].add = ins; - - u->scoreboard_id = scoreboard_id; - u->staging_barrier = osrb; - u->dependencies = dependencies; - - if (ins->type == BI_ATEST) - u->dependencies |= (1 << 6); - - if (ins->type == BI_BLEND) - u->dependencies |= (1 << 6) | (1 << 7); - - /* Let's be optimistic, we'll fix up later */ - u->flow_control = BIFROST_FLOW_NBTB; - - u->constant_count = 1; - u->constants[0] = ins->constant.u64; - - if (ins->type == BI_BRANCH && ins->branch_target) - u->branch_constant = true; - - /* We always prefetch except unconditional branches */ - u->next_clause_prefetch = !( - (ins->type == BI_BRANCH) && - (ins->cond == BI_COND_ALWAYS)); - - u->message_type = bi_message_type_for_ins(ins); - u->block = block; - - return u; -} - /* Eventually, we'll need a proper scheduling, grouping instructions * into clauses and ordering/assigning grouped instructions to the * appropriate FMA/ADD slots. Right now we do the dumbest possible @@ -340,9 +167,6 @@ bi_schedule(bi_context *ctx) list_inithead(&bblock->clauses); bi_foreach_instr_in_block(bblock, ins) { - /* Convenient time to lower */ -// bi_lower_fmov(ins); - bi_clause *u = bi_singleton(ctx, (bi_instr *) ins, bblock, 0, (1 << 0), !is_first); diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index d3fd1031393..9ce8bb31f03 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -1101,12 +1101,6 @@ bi_singleton(void *memctx, bi_instr *ins, unsigned dependencies, bool osrb); -bi_clause *bi_make_singleton(void *memctx, bi_instruction *ins, - bi_block *block, - unsigned scoreboard_id, - unsigned dependencies, - bool osrb); - /* Liveness */ void bi_compute_liveness(bi_context *ctx);