From fd669fa69d475a566c106d14cd5e72088fa1a6dd Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 26 Nov 2024 16:28:58 +0100 Subject: [PATCH] aco/optimizer: label fcanonicalize like a copy if there is nothing to flush Allows copy propagation into non alu instructions like phis. Foz-DB Navi21: Totals from 138 (0.17% of 79395) affected shaders: Instrs: 308135 -> 307792 (-0.11%); split: -0.12%, +0.01% CodeSize: 1567924 -> 1566484 (-0.09%); split: -0.10%, +0.01% VGPRs: 9696 -> 9720 (+0.25%) Latency: 1162719 -> 1161663 (-0.09%); split: -0.10%, +0.00% InvThroughput: 256944 -> 256590 (-0.14%); split: -0.15%, +0.01% VClause: 5631 -> 5626 (-0.09%); split: -0.14%, +0.05% Copies: 29962 -> 30028 (+0.22%); split: -0.10%, +0.32% Branches: 8241 -> 8237 (-0.05%) PreVGPRs: 7800 -> 7797 (-0.04%) VALU: 216243 -> 215898 (-0.16%); split: -0.17%, +0.01% SALU: 30768 -> 30767 (-0.00%) Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 8d88007bba7..c7f5c64ef56 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1882,6 +1882,7 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) /* TODO: try to move the negate/abs modifier to the consumer instead */ bool uses_mods = instr->usesModifiers(); bool fp16 = instr->opcode == aco_opcode::v_mul_f16; + unsigned denorm_mode = fp16 ? ctx.fp_mode.denorm16_64 : ctx.fp_mode.denorm32; for (unsigned i = 0; i < 2; i++) { if (instr->operands[!i].isConstant() && instr->operands[i].isTemp()) { @@ -1910,8 +1911,12 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) ctx.info[instr->definitions[0].tempId()].set_abs(other); else if (!abs && neg && other.type() == RegType::vgpr) ctx.info[instr->definitions[0].tempId()].set_neg(other); - else if (!abs && !neg) - ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(other); + else if (!abs && !neg) { + if (denorm_mode == fp_denorm_keep || ctx.info[other.id()].is_canonicalized()) + ctx.info[instr->definitions[0].tempId()].set_temp(other); + else + ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(other); + } } else if (uses_mods || (instr->definitions[0].isSZPreserve() && instr->opcode != aco_opcode::v_mul_legacy_f32)) { continue; /* omod uses a legacy multiplication. */ @@ -1920,7 +1925,7 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) !instr->definitions[0].isInfPreserve()) || instr->opcode == aco_opcode::v_mul_legacy_f32)) { /* 0.0 */ ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u); - } else if ((fp16 ? ctx.fp_mode.denorm16_64 : ctx.fp_mode.denorm32) != fp_denorm_flush) { + } else if (denorm_mode != fp_denorm_flush) { /* omod has no effect if denormals are enabled. */ continue; } else if (instr->operands[!i].constantValue() ==