pan/bi: implement denorm behavior float controls

On bifrost independent float controls are implementable, just
potentially expensive because it requires scheduling FP16 and FP32
instructions in separate clauses.

Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33660>
This commit is contained in:
Benjamin Lee 2025-02-20 17:47:59 -08:00 committed by Marge Bot
parent 9737c1fa15
commit b6406c179b

View file

@ -1021,7 +1021,7 @@ bi_write_count(bi_instr *instr, uint64_t live_after_temp)
/* Test if an instruction requires a specific flush-to-zero mode. */
static enum bi_ftz_state
bi_instr_ftz(bi_instr *I)
bi_instr_ftz(bi_context *ctx, bi_instr *I)
{
/* f16<->f32 conversions have a ftz override flag to implement fquantize16 */
if ((I->op == BI_OPCODE_F16_TO_F32 || I->op == BI_OPCODE_V2F32_TO_V2F16) &&
@ -1032,7 +1032,22 @@ bi_instr_ftz(bi_instr *I)
if (!props.is_float)
return BI_FTZ_STATE_NONE;
return BI_FTZ_STATE_DISABLE;
enum bi_size size = bi_opcode_props[I->op].size;
unsigned bitsize;
if (size == BI_SIZE_16)
bitsize = 16;
else if (size == BI_SIZE_32)
bitsize = 32;
else
return BI_FTZ_STATE_NONE;
unsigned execution_mode = ctx->nir->info.float_controls_execution_mode;
if (nir_is_denorm_flush_to_zero(execution_mode, bitsize))
return BI_FTZ_STATE_ENABLE;
else if (nir_is_denorm_preserve(execution_mode, bitsize))
return BI_FTZ_STATE_DISABLE;
else
return BI_FTZ_STATE_NONE;
}
/*
@ -1040,9 +1055,10 @@ bi_instr_ftz(bi_instr *I)
* present we only consider flush-to-zero modes.
*/
static bool
bi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr)
bi_numerically_incompatible(bi_context *ctx, struct bi_clause_state *clause,
bi_instr *instr)
{
enum bi_ftz_state instr_ftz = bi_instr_ftz(instr);
enum bi_ftz_state instr_ftz = bi_instr_ftz(ctx, instr);
return (clause->ftz != BI_FTZ_STATE_NONE) &&
(instr_ftz != BI_FTZ_STATE_NONE) &&
(clause->ftz != instr_ftz);
@ -1056,7 +1072,8 @@ bi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr)
* whitepaper. The cost function is a heuristic. */
static bool
bi_instr_schedulable(bi_instr *instr, struct bi_clause_state *clause,
bi_instr_schedulable(bi_context *ctx, bi_instr *instr,
struct bi_clause_state *clause,
struct bi_tuple_state *tuple, uint64_t live_after_temp,
bool fma)
{
@ -1076,7 +1093,7 @@ bi_instr_schedulable(bi_instr *instr, struct bi_clause_state *clause,
return false;
/* Numerical properties must be compatible with the clause */
if (bi_numerically_incompatible(clause, instr))
if (bi_numerically_incompatible(ctx, clause, instr))
return false;
/* Message-passing instructions are not guaranteed write within the
@ -1199,9 +1216,9 @@ bi_instr_cost(bi_instr *instr, struct bi_tuple_state *tuple)
}
static unsigned
bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause,
struct bi_tuple_state *tuple, uint64_t live_after_temp,
bool fma)
bi_choose_index(bi_context *ctx, struct bi_worklist st,
struct bi_clause_state *clause, struct bi_tuple_state *tuple,
uint64_t live_after_temp, bool fma)
{
unsigned i, best_idx = ~0;
signed best_cost = INT_MAX;
@ -1209,7 +1226,7 @@ bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause,
BITSET_FOREACH_SET(i, st.worklist, st.count) {
bi_instr *instr = st.instructions[i];
if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma))
if (!bi_instr_schedulable(ctx, instr, clause, tuple, live_after_temp, fma))
continue;
signed cost = bi_instr_cost(instr, tuple);
@ -1229,8 +1246,9 @@ bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause,
}
static void
bi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple,
bi_instr *instr, uint64_t live_after_temp, bool fma)
bi_pop_instr(bi_context *ctx, struct bi_clause_state *clause,
struct bi_tuple_state *tuple, bi_instr *instr,
uint64_t live_after_temp, bool fma)
{
bi_update_fau(clause, tuple, instr, fma, true);
@ -1252,7 +1270,7 @@ bi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple,
tuple->reg.reads[tuple->reg.nr_reads++] = instr->src[s];
}
enum bi_ftz_state ftz = bi_instr_ftz(instr);
enum bi_ftz_state ftz = bi_instr_ftz(ctx, instr);
if (ftz != BI_FTZ_STATE_NONE)
clause->ftz = ftz;
}
@ -1287,7 +1305,7 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st,
/* Schedule the spill */
bi_builder b = bi_init_builder(ctx, bi_before_tuple(tuple->prev));
bi_instr *mov = bi_mov_i32_to(&b, src, src);
bi_pop_instr(clause, tuple, mov, live_after_temp, fma);
bi_pop_instr(ctx, clause, tuple, mov, live_after_temp, fma);
return mov;
}
@ -1297,7 +1315,7 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st,
return NULL;
#endif
unsigned idx = bi_choose_index(st, clause, tuple, live_after_temp, fma);
unsigned idx = bi_choose_index(ctx, st, clause, tuple, live_after_temp, fma);
if (idx >= st.count)
return NULL;
@ -1307,7 +1325,7 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st,
BITSET_CLEAR(st.worklist, idx);
bi_update_worklist(st, idx);
bi_pop_instr(clause, tuple, instr, live_after_temp, fma);
bi_pop_instr(ctx, clause, tuple, instr, live_after_temp, fma);
/* Fixups */
bi_builder b = bi_init_builder(ctx, bi_before_instr(instr));