From b79950fc1f366c9a1a557cdcb18e188110cf743e Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 23 Oct 2024 17:13:41 +0200 Subject: [PATCH] aco: remove heuristic that restricts VOP2/C with 2 sgprs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looking at the stats, the slightly increased code size isn't a problem compared to the benefits. This also only affects gfx10+, and those generations aren't throughput limited by 64bit instructions like early gcn. Foz-DB Navi21: Totals from 12377 (15.59% of 79395) affected shaders: MaxWaves: 269323 -> 269857 (+0.20%); split: +0.23%, -0.03% Instrs: 16505304 -> 16472552 (-0.20%); split: -0.21%, +0.01% CodeSize: 89815804 -> 90130344 (+0.35%); split: -0.02%, +0.37% VGPRs: 661160 -> 658640 (-0.38%); split: -0.40%, +0.02% SpillSGPRs: 3032 -> 3049 (+0.56%) SpillVGPRs: 826 -> 796 (-3.63%) Latency: 145800231 -> 145818568 (+0.01%); split: -0.14%, +0.15% InvThroughput: 39026010 -> 38892467 (-0.34%); split: -0.36%, +0.02% VClause: 325693 -> 325992 (+0.09%); split: -0.12%, +0.21% SClause: 497938 -> 497208 (-0.15%); split: -0.23%, +0.08% Copies: 1239036 -> 1204045 (-2.82%); split: -2.90%, +0.07% Branches: 462952 -> 462934 (-0.00%); split: -0.01%, +0.00% PreSGPRs: 586066 -> 587558 (+0.25%) PreVGPRs: 550024 -> 547736 (-0.42%) VALU: 11147608 -> 11114528 (-0.30%); split: -0.31%, +0.01% SALU: 2105546 -> 2105131 (-0.02%); split: -0.03%, +0.01% VMEM: 575983 -> 575923 (-0.01%) Foz-DB Navi31: Totals from 11544 (14.54% of 79395) affected shaders: MaxWaves: 319612 -> 319804 (+0.06%) Instrs: 17563158 -> 17527341 (-0.20%); split: -0.22%, +0.02% CodeSize: 92366832 -> 92626280 (+0.28%); split: -0.03%, +0.31% VGPRs: 667620 -> 665484 (-0.32%); split: -0.33%, +0.01% SpillSGPRs: 3418 -> 3434 (+0.47%) SpillVGPRs: 896 -> 858 (-4.24%) Scratch: 4738048 -> 4736512 (-0.03%) Latency: 141366653 -> 141399756 (+0.02%); split: -0.10%, +0.12% InvThroughput: 26213994 -> 26165751 (-0.18%); split: -0.21%, +0.03% VClause: 307956 -> 308124 (+0.05%); split: -0.12%, +0.18% SClause: 477816 -> 477326 (-0.10%); split: -0.18%, +0.08% Copies: 1161148 -> 1129386 (-2.74%); split: -2.81%, +0.08% Branches: 411509 -> 411506 (-0.00%); split: -0.00%, +0.00% PreSGPRs: 531354 -> 535027 (+0.69%) PreVGPRs: 525201 -> 521861 (-0.64%) VALU: 10360363 -> 10330274 (-0.29%); split: -0.30%, +0.01% SALU: 1778044 -> 1777585 (-0.03%); split: -0.04%, +0.01% VMEM: 551379 -> 551303 (-0.01%) VOPD: 3539 -> 3471 (-1.92%); split: +0.14%, -2.06% Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 52e0155917d..c0526d4ff1b 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2948,13 +2948,6 @@ apply_sgprs(opt_ctx& ctx, aco_ptr& instr) ssa_info& info = ctx.info[sgpr_info_id]; - /* Applying two sgprs require making it VOP3, so don't do it unless it's - * definitively beneficial. - * TODO: this is too conservative because later the use count could be reduced to 1 */ - if (!info.is_extract() && num_sgprs && ctx.uses[sgpr_info_id] > 1 && !instr->isVOP3() && - !instr->isSDWA() && instr->format != Format::VOP3P) - break; - Temp sgpr = info.is_extract() ? info.instr->operands[0].getTemp() : info.temp; bool new_sgpr = sgpr.id() != sgpr_ids[0] && sgpr.id() != sgpr_ids[1]; if (new_sgpr && num_sgprs >= max_sgprs)