From c8445c16910747693c5e150125972da61a815dc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 20 Aug 2022 22:55:45 +0200 Subject: [PATCH] aco: Change inverse-comparison optimization to work with s_not MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some time ago we stopped using s_andn2 with exec for boolean NOT. The reasoning behind that change was that those booleans will be always ANDed with exec when necessary. This inhibited the inverse-comparison optimization in most cases which is fixed by this patch. Fossil DB stats on Navi 21: Totals from 12251 (9.08% of 134913) affected shaders: VGPRs: 801744 -> 802016 (+0.03%); split: -0.00%, +0.04% SpillSGPRs: 8863 -> 8893 (+0.34%) CodeSize: 100593244 -> 100370684 (-0.22%); split: -0.22%, +0.00% MaxWaves: 204994 -> 204948 (-0.02%); split: +0.00%, -0.02% Instrs: 18717001 -> 18668965 (-0.26%); split: -0.26%, +0.00% Latency: 263255046 -> 262874896 (-0.14%); split: -0.16%, +0.02% InvThroughput: 52760249 -> 52721736 (-0.07%); split: -0.08%, +0.01% VClause: 329631 -> 329680 (+0.01%); split: -0.03%, +0.04% SClause: 681563 -> 681435 (-0.02%); split: -0.02%, +0.00% Copies: 1331612 -> 1372446 (+3.07%); split: -0.03%, +3.10% Branches: 548325 -> 548301 (-0.00%) PreSGPRs: 911317 -> 909700 (-0.18%) PreVGPRs: 766279 -> 767070 (+0.10%) Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 493738a8849..71f827fd645 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2389,16 +2389,14 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr& instr) return true; } -/* s_andn2(exec, cmp(a, b)) -> get_inverse(cmp)(a, b) */ +/* s_not(cmp(a, b)) -> get_inverse(cmp)(a, b) */ bool combine_inverse_comparison(opt_ctx& ctx, aco_ptr& instr) { - if (!instr->operands[0].isFixed() || instr->operands[0].physReg() != exec) - return false; if (ctx.uses[instr->definitions[1].tempId()]) return false; - Instruction* cmp = follow_operand(ctx, instr->operands[1]); + Instruction* cmp = follow_operand(ctx, instr->operands[0]); if (!cmp) return false; @@ -4214,7 +4212,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) ctx.program->gfx_level >= GFX9) { combine_salu_lshl_add(ctx, instr); } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) { - combine_salu_not_bitwise(ctx, instr); + if (!combine_salu_not_bitwise(ctx, instr)) + combine_inverse_comparison(ctx, instr); } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 || instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) { if (combine_ordering_test(ctx, instr)) { @@ -4243,10 +4242,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } } } - - /* do this after combine_salu_n2() */ - if (instr->opcode == aco_opcode::s_andn2_b32 || instr->opcode == aco_opcode::s_andn2_b64) - combine_inverse_comparison(ctx, instr); } bool