diff --git a/.pick_status.json b/.pick_status.json index abee6bac3d5..4a340f78c3b 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1804,7 +1804,7 @@ "description": "pan/bi: Flush subnormals to zero for FROUND on v11+", "nominated": true, "nomination_type": 4, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 3e3bcce2527..d5b8737e658 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -3356,11 +3356,25 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) case nir_op_fround_even: case nir_op_fceil: case nir_op_ffloor: - case nir_op_ftrunc: + case nir_op_ftrunc: { /* On v11+, FROUND.v2s16 is gone, we lower this in nir_lower_bit_size */ assert(sz != 16 || b->shader->arch < 11); - bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op)); + + enum bi_round round = bi_nir_round(instr->op); + + /* On v11+, FROUND does not flush subnormals to zero even when configured + * in the shader program header */ + if (b->shader->arch >= 11 && + (round == BI_ROUND_RTP || round == BI_ROUND_RTN) && + b->shader->ftz_fp32) { + bi_instr *flush = bi_flush_to(b, 32, bi_temp(b->shader), s0); + flush->ftz = true; + s0 = flush->dest[0]; + } + + bi_fround_to(b, sz, dst, s0, round); break; + } case nir_op_fmin: bi_fmin_to(b, sz, dst, s0, s1); @@ -5896,6 +5910,7 @@ bi_compile_variant_nir(nir_shader *nir, unsigned execution_mode = nir->info.float_controls_execution_mode; ctx->rtz_fp16 = nir_is_rounding_mode_rtz(execution_mode, 16); ctx->rtz_fp32 = nir_is_rounding_mode_rtz(execution_mode, 32); + ctx->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32); if (idvs == BI_IDVS_POSITION || idvs == BI_IDVS_VARYING) { /* Specializing shaders for IDVS is destructive, so we need to diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index 7124b973f4d..c10e81d529d 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -917,6 +917,7 @@ typedef struct { /* Floating point rounding mode controls */ bool rtz_fp16; bool rtz_fp32; + bool ftz_fp32; /* In any graphics shader, whether the "IDVS with memory * allocation" flow is used. This affects how varyings are loaded and