pan/bi: Flush subnormals to zero for FROUND on v11+

FROUND on v11+ does not flush subnormals to zero even when configured in
the shader program header.

We now use FLUSH.ftz on the input of FROUND to ensure proper
behavior when rounding up and down with FTZ enabled.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Backport-to: 25.1
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34970>
This commit is contained in:
Mary Guillemard 2025-05-14 08:54:29 +02:00 committed by Marge Bot
parent e674823d55
commit 5588ff49a7
2 changed files with 18 additions and 2 deletions

View file

@ -3372,11 +3372,25 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_fround_even:
case nir_op_fceil:
case nir_op_ffloor:
case nir_op_ftrunc:
case nir_op_ftrunc: {
/* On v11+, FROUND.v2s16 is gone, we lower this in nir_lower_bit_size */
assert(sz != 16 || b->shader->arch < 11);
bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op));
enum bi_round round = bi_nir_round(instr->op);
/* On v11+, FROUND does not flush subnormals to zero even when configured
* in the shader program header */
if (b->shader->arch >= 11 &&
(round == BI_ROUND_RTP || round == BI_ROUND_RTN) &&
b->shader->ftz_fp32) {
bi_instr *flush = bi_flush_to(b, 32, bi_temp(b->shader), s0);
flush->ftz = true;
s0 = flush->dest[0];
}
bi_fround_to(b, sz, dst, s0, round);
break;
}
case nir_op_fmin:
bi_fmin_to(b, sz, dst, s0, s1);
@ -5929,6 +5943,7 @@ bi_compile_variant_nir(nir_shader *nir,
unsigned execution_mode = nir->info.float_controls_execution_mode;
ctx->rtz_fp16 = nir_is_rounding_mode_rtz(execution_mode, 16);
ctx->rtz_fp32 = nir_is_rounding_mode_rtz(execution_mode, 32);
ctx->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32);
if (idvs == BI_IDVS_POSITION || idvs == BI_IDVS_VARYING) {
/* Specializing shaders for IDVS is destructive, so we need to

View file

@ -917,6 +917,7 @@ typedef struct {
/* Floating point rounding mode controls */
bool rtz_fp16;
bool rtz_fp32;
bool ftz_fp32;
/* In any graphics shader, whether the "IDVS with memory
* allocation" flow is used. This affects how varyings are loaded and