From 360d2958af36379872298434eb493157060a78bc Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 14 May 2025 08:54:29 +0200 Subject: [PATCH] pan/bi: Flush subnormals to zero for FROUND on v11+ FROUND on v11+ does not flush subnormals to zero even when configured in the shader program header. We now use FLUSH.ftz on the input of FROUND to ensure proper behavior when rounding up and down with FTZ enabled. Signed-off-by: Mary Guillemard Reviewed-by: Lars-Ivar Hesselberg Simonsen Backport-to: 25.1 Part-of: (cherry picked from commit 5588ff49a7f4841b6c6674123cf145563704827e) --- .pick_status.json | 2 +- src/panfrost/compiler/bifrost_compile.c | 19 +++++++++++++++++-- src/panfrost/compiler/compiler.h | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index abee6bac3d5..4a340f78c3b 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1804,7 +1804,7 @@ "description": "pan/bi: Flush subnormals to zero for FROUND on v11+", "nominated": true, "nomination_type": 4, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 3e3bcce2527..d5b8737e658 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -3356,11 +3356,25 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) case nir_op_fround_even: case nir_op_fceil: case nir_op_ffloor: - case nir_op_ftrunc: + case nir_op_ftrunc: { /* On v11+, FROUND.v2s16 is gone, we lower this in nir_lower_bit_size */ assert(sz != 16 || b->shader->arch < 11); - bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op)); + + enum bi_round round = bi_nir_round(instr->op); + + /* On v11+, FROUND does not flush subnormals to zero even when configured + * in the shader program header */ + if (b->shader->arch >= 11 && + (round == BI_ROUND_RTP || round == BI_ROUND_RTN) && + b->shader->ftz_fp32) { + bi_instr *flush = bi_flush_to(b, 32, bi_temp(b->shader), s0); + flush->ftz = true; + s0 = flush->dest[0]; + } + + bi_fround_to(b, sz, dst, s0, round); break; + } case nir_op_fmin: bi_fmin_to(b, sz, dst, s0, s1); @@ -5896,6 +5910,7 @@ bi_compile_variant_nir(nir_shader *nir, unsigned execution_mode = nir->info.float_controls_execution_mode; ctx->rtz_fp16 = nir_is_rounding_mode_rtz(execution_mode, 16); ctx->rtz_fp32 = nir_is_rounding_mode_rtz(execution_mode, 32); + ctx->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32); if (idvs == BI_IDVS_POSITION || idvs == BI_IDVS_VARYING) { /* Specializing shaders for IDVS is destructive, so we need to diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index 7124b973f4d..c10e81d529d 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -917,6 +917,7 @@ typedef struct { /* Floating point rounding mode controls */ bool rtz_fp16; bool rtz_fp32; + bool ftz_fp32; /* In any graphics shader, whether the "IDVS with memory * allocation" flow is used. This affects how varyings are loaded and