aco: Change inverse-comparison optimization to work with s_not

Some time ago we stopped using s_andn2 with exec for boolean NOT.
The reasoning behind that change was that those booleans will be
always ANDed with exec when necessary.

This inhibited the inverse-comparison optimization in most cases
which is fixed by this patch.

Fossil DB stats on Navi 21:

Totals from 12251 (9.08% of 134913) affected shaders:
VGPRs: 801744 -> 802016 (+0.03%); split: -0.00%, +0.04%
SpillSGPRs: 8863 -> 8893 (+0.34%)
CodeSize: 100593244 -> 100370684 (-0.22%); split: -0.22%, +0.00%
MaxWaves: 204994 -> 204948 (-0.02%); split: +0.00%, -0.02%
Instrs: 18717001 -> 18668965 (-0.26%); split: -0.26%, +0.00%
Latency: 263255046 -> 262874896 (-0.14%); split: -0.16%, +0.02%
InvThroughput: 52760249 -> 52721736 (-0.07%); split: -0.08%, +0.01%
VClause: 329631 -> 329680 (+0.01%); split: -0.03%, +0.04%
SClause: 681563 -> 681435 (-0.02%); split: -0.02%, +0.00%
Copies: 1331612 -> 1372446 (+3.07%); split: -0.03%, +3.10%
Branches: 548325 -> 548301 (-0.00%)
PreSGPRs: 911317 -> 909700 (-0.18%)
PreVGPRs: 766279 -> 767070 (+0.10%)

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18253>
This commit is contained in:
Timur Kristóf 2022-08-20 22:55:45 +02:00 committed by Marge Bot
parent cf5f9854bc
commit c8445c1691

View file

@ -2389,16 +2389,14 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return true;
}
/* s_andn2(exec, cmp(a, b)) -> get_inverse(cmp)(a, b) */
/* s_not(cmp(a, b)) -> get_inverse(cmp)(a, b) */
bool
combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (!instr->operands[0].isFixed() || instr->operands[0].physReg() != exec)
return false;
if (ctx.uses[instr->definitions[1].tempId()])
return false;
Instruction* cmp = follow_operand(ctx, instr->operands[1]);
Instruction* cmp = follow_operand(ctx, instr->operands[0]);
if (!cmp)
return false;
@ -4214,7 +4212,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.program->gfx_level >= GFX9) {
combine_salu_lshl_add(ctx, instr);
} else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
combine_salu_not_bitwise(ctx, instr);
if (!combine_salu_not_bitwise(ctx, instr))
combine_inverse_comparison(ctx, instr);
} else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 ||
instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) {
if (combine_ordering_test(ctx, instr)) {
@ -4243,10 +4242,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
}
}
/* do this after combine_salu_n2() */
if (instr->opcode == aco_opcode::s_andn2_b32 || instr->opcode == aco_opcode::s_andn2_b64)
combine_inverse_comparison(ctx, instr);
}
bool