diff --git a/src/panfrost/compiler/bifrost/bi_schedule.c b/src/panfrost/compiler/bifrost/bi_schedule.c index ed6514de317..f12254fc82f 100644 --- a/src/panfrost/compiler/bifrost/bi_schedule.c +++ b/src/panfrost/compiler/bifrost/bi_schedule.c @@ -1021,7 +1021,7 @@ bi_write_count(bi_instr *instr, uint64_t live_after_temp) /* Test if an instruction requires a specific flush-to-zero mode. */ static enum bi_ftz_state -bi_instr_ftz(bi_instr *I) +bi_instr_ftz(bi_context *ctx, bi_instr *I) { /* f16<->f32 conversions have a ftz override flag to implement fquantize16 */ if ((I->op == BI_OPCODE_F16_TO_F32 || I->op == BI_OPCODE_V2F32_TO_V2F16) && @@ -1032,7 +1032,22 @@ bi_instr_ftz(bi_instr *I) if (!props.is_float) return BI_FTZ_STATE_NONE; - return BI_FTZ_STATE_DISABLE; + enum bi_size size = bi_opcode_props[I->op].size; + unsigned bitsize; + if (size == BI_SIZE_16) + bitsize = 16; + else if (size == BI_SIZE_32) + bitsize = 32; + else + return BI_FTZ_STATE_NONE; + + unsigned execution_mode = ctx->nir->info.float_controls_execution_mode; + if (nir_is_denorm_flush_to_zero(execution_mode, bitsize)) + return BI_FTZ_STATE_ENABLE; + else if (nir_is_denorm_preserve(execution_mode, bitsize)) + return BI_FTZ_STATE_DISABLE; + else + return BI_FTZ_STATE_NONE; } /* @@ -1040,9 +1055,10 @@ bi_instr_ftz(bi_instr *I) * present we only consider flush-to-zero modes. */ static bool -bi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr) +bi_numerically_incompatible(bi_context *ctx, struct bi_clause_state *clause, + bi_instr *instr) { - enum bi_ftz_state instr_ftz = bi_instr_ftz(instr); + enum bi_ftz_state instr_ftz = bi_instr_ftz(ctx, instr); return (clause->ftz != BI_FTZ_STATE_NONE) && (instr_ftz != BI_FTZ_STATE_NONE) && (clause->ftz != instr_ftz); @@ -1056,7 +1072,8 @@ bi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr) * whitepaper. The cost function is a heuristic. */ static bool -bi_instr_schedulable(bi_instr *instr, struct bi_clause_state *clause, +bi_instr_schedulable(bi_context *ctx, bi_instr *instr, + struct bi_clause_state *clause, struct bi_tuple_state *tuple, uint64_t live_after_temp, bool fma) { @@ -1076,7 +1093,7 @@ bi_instr_schedulable(bi_instr *instr, struct bi_clause_state *clause, return false; /* Numerical properties must be compatible with the clause */ - if (bi_numerically_incompatible(clause, instr)) + if (bi_numerically_incompatible(ctx, clause, instr)) return false; /* Message-passing instructions are not guaranteed write within the @@ -1199,9 +1216,9 @@ bi_instr_cost(bi_instr *instr, struct bi_tuple_state *tuple) } static unsigned -bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause, - struct bi_tuple_state *tuple, uint64_t live_after_temp, - bool fma) +bi_choose_index(bi_context *ctx, struct bi_worklist st, + struct bi_clause_state *clause, struct bi_tuple_state *tuple, + uint64_t live_after_temp, bool fma) { unsigned i, best_idx = ~0; signed best_cost = INT_MAX; @@ -1209,7 +1226,7 @@ bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause, BITSET_FOREACH_SET(i, st.worklist, st.count) { bi_instr *instr = st.instructions[i]; - if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma)) + if (!bi_instr_schedulable(ctx, instr, clause, tuple, live_after_temp, fma)) continue; signed cost = bi_instr_cost(instr, tuple); @@ -1229,8 +1246,9 @@ bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause, } static void -bi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple, - bi_instr *instr, uint64_t live_after_temp, bool fma) +bi_pop_instr(bi_context *ctx, struct bi_clause_state *clause, + struct bi_tuple_state *tuple, bi_instr *instr, + uint64_t live_after_temp, bool fma) { bi_update_fau(clause, tuple, instr, fma, true); @@ -1252,7 +1270,7 @@ bi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple, tuple->reg.reads[tuple->reg.nr_reads++] = instr->src[s]; } - enum bi_ftz_state ftz = bi_instr_ftz(instr); + enum bi_ftz_state ftz = bi_instr_ftz(ctx, instr); if (ftz != BI_FTZ_STATE_NONE) clause->ftz = ftz; } @@ -1287,7 +1305,7 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st, /* Schedule the spill */ bi_builder b = bi_init_builder(ctx, bi_before_tuple(tuple->prev)); bi_instr *mov = bi_mov_i32_to(&b, src, src); - bi_pop_instr(clause, tuple, mov, live_after_temp, fma); + bi_pop_instr(ctx, clause, tuple, mov, live_after_temp, fma); return mov; } @@ -1297,7 +1315,7 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st, return NULL; #endif - unsigned idx = bi_choose_index(st, clause, tuple, live_after_temp, fma); + unsigned idx = bi_choose_index(ctx, st, clause, tuple, live_after_temp, fma); if (idx >= st.count) return NULL; @@ -1307,7 +1325,7 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st, BITSET_CLEAR(st.worklist, idx); bi_update_worklist(st, idx); - bi_pop_instr(clause, tuple, instr, live_after_temp, fma); + bi_pop_instr(ctx, clause, tuple, instr, live_after_temp, fma); /* Fixups */ bi_builder b = bi_init_builder(ctx, bi_before_instr(instr));