diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index cd5e3a6da6d..d6f6d932d24 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -1403,11 +1403,84 @@ bi_schedule_block(bi_context *ctx, bi_block *block) bi_free_worklist(st); } +static bool +bi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants, unsigned *cwords, bi_index *fau) +{ + bi_index src = ins->src[s]; + + /* Staging registers can't have FAU accesses */ + if (s == 0 && bi_opcode_props[ins->op].sr_read) + return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU); + + if (src.type == BI_INDEX_CONSTANT) { + /* Allow fast zero */ + if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins)) + return true; + + if (!bi_is_null(*fau)) + return false; + + /* Else, try to inline a constant */ + for (unsigned i = 0; i < *cwords; ++i) { + if (src.value == constants[i]) + return true; + } + + if (*cwords >= 2) + return false; + + constants[(*cwords)++] = src.value; + } else if (src.type == BI_INDEX_FAU) { + if (*cwords != 0) + return false; + + /* Can only read from one pair of FAU words */ + if (!bi_is_null(*fau) && (src.value != fau->value)) + return false; + + /* If there is a target, we'll need a PC-relative constant */ + if (ins->branch_target) + return false; + + *fau = src; + } + + return true; +} + +static void +bi_lower_fau(bi_context *ctx, bi_block *block) +{ + bi_builder b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); + + bi_foreach_instr_in_block_safe(block, _ins) { + bi_instr *ins = (bi_instr *) _ins; + + uint32_t constants[2]; + unsigned cwords = 0; + bi_index fau = bi_null(); + + /* ATEST must have the ATEST datum encoded, not any other + * uniform. See to it this is the case. */ + if (ins->op == BI_OPCODE_ATEST) + fau = ins->src[2]; + + bi_foreach_src(ins, s) { + if (bi_check_fau_src(ins, s, constants, &cwords, &fau)) continue; + + b.cursor = bi_before_instr(ins); + bi_index copy = bi_mov_i32(&b, ins->src[s]); + ins->src[s] = bi_replace_index(ins->src[s], copy); + } + } +} + void bi_schedule(bi_context *ctx) { bi_foreach_block(ctx, block) { bi_block *bblock = (bi_block *) block; + bi_lower_fau(ctx, bblock); bi_schedule_block(ctx, bblock); bi_opt_dead_code_eliminate(ctx, bblock, true); } diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 954f259b7e6..c8dd6b20177 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -2272,57 +2272,6 @@ glsl_type_size(const struct glsl_type *type, bool bindless) return glsl_count_attribute_slots(type, false); } -static unsigned -bi_lower_constant(bi_builder *b, bi_instr *ins, unsigned s, uint32_t *accum, unsigned cwords, bool allow_constant) -{ - uint32_t value = ins->src[s].value; - - /* Staging registers can't have constants */ - allow_constant &= !(s == 0 && bi_opcode_props[ins->op].sr_read); - - /* If we're allowed any inline constants, see if this one works */ - if (allow_constant) { - for (unsigned i = 0; i < cwords; ++i) { - if (value == accum[i]) - return cwords; - } - - if (value == 0 && !bi_opcode_props[ins->op].add) - return cwords; - - if (cwords < 2) { - accum[cwords] = value; - return cwords + 1; - } - } - - /* should be const folded */ - assert(!ins->src[s].abs && !ins->src[s].neg); - enum bi_swizzle old_swizzle = ins->src[s].swizzle; - - b->cursor = bi_before_instr(ins); - ins->src[s] = bi_mov_i32(b, bi_imm_u32(value)); - ins->src[s].swizzle = old_swizzle; - return cwords; -} - -static void -bi_lower_fau(bi_context *ctx, bi_block *block) -{ - bi_builder b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); - - bi_foreach_instr_in_block_safe(block, _ins) { - bi_instr *ins = (bi_instr *) _ins; - uint32_t constants[2]; - unsigned cwords = 0; - - bi_foreach_src(ins, s) { - if (ins->src[s].type == BI_INDEX_CONSTANT) - cwords = bi_lower_constant(&b, ins, s, constants, cwords, true); - } - } -} - static void bi_optimize_nir(nir_shader *nir) { @@ -2575,11 +2524,6 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir, } } while(progress); - bi_foreach_block(ctx, _block) { - bi_block *block = (bi_block *) _block; - bi_lower_fau(ctx, block); - } - if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) bi_print_shader(ctx, stdout); bi_schedule(ctx);