pan/bi: Flush subnormals to zero for FROUND on v11+

FROUND on v11+ does not flush subnormals to zero even when configured in
the shader program header.

We now use FLUSH.ftz on the input of FROUND to ensure proper
behavior when rounding up and down with FTZ enabled.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Backport-to: 25.1
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34970>
(cherry picked from commit 5588ff49a7)
This commit is contained in:
Mary Guillemard 2025-05-14 08:54:29 +02:00 committed by Eric Engestrom
parent 912437c1a3
commit 360d2958af
3 changed files with 19 additions and 3 deletions

View file

@ -1804,7 +1804,7 @@
"description": "pan/bi: Flush subnormals to zero for FROUND on v11+",
"nominated": true,
"nomination_type": 4,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -3356,11 +3356,25 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_fround_even:
case nir_op_fceil:
case nir_op_ffloor:
case nir_op_ftrunc:
case nir_op_ftrunc: {
/* On v11+, FROUND.v2s16 is gone, we lower this in nir_lower_bit_size */
assert(sz != 16 || b->shader->arch < 11);
bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op));
enum bi_round round = bi_nir_round(instr->op);
/* On v11+, FROUND does not flush subnormals to zero even when configured
* in the shader program header */
if (b->shader->arch >= 11 &&
(round == BI_ROUND_RTP || round == BI_ROUND_RTN) &&
b->shader->ftz_fp32) {
bi_instr *flush = bi_flush_to(b, 32, bi_temp(b->shader), s0);
flush->ftz = true;
s0 = flush->dest[0];
}
bi_fround_to(b, sz, dst, s0, round);
break;
}
case nir_op_fmin:
bi_fmin_to(b, sz, dst, s0, s1);
@ -5896,6 +5910,7 @@ bi_compile_variant_nir(nir_shader *nir,
unsigned execution_mode = nir->info.float_controls_execution_mode;
ctx->rtz_fp16 = nir_is_rounding_mode_rtz(execution_mode, 16);
ctx->rtz_fp32 = nir_is_rounding_mode_rtz(execution_mode, 32);
ctx->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32);
if (idvs == BI_IDVS_POSITION || idvs == BI_IDVS_VARYING) {
/* Specializing shaders for IDVS is destructive, so we need to

View file

@ -917,6 +917,7 @@ typedef struct {
/* Floating point rounding mode controls */
bool rtz_fp16;
bool rtz_fp32;
bool ftz_fp32;
/* In any graphics shader, whether the "IDVS with memory
* allocation" flow is used. This affects how varyings are loaded and