aco: Combine constant bit test to s_bitcmp.

Foz-DB Navi21:
Totals from 73988 (54.84% of 134913) affected shaders:
VGPRs: 2959768 -> 2959752 (-0.00%)
SpillSGPRs: 10250 -> 10697 (+4.36%); split: -0.64%, +5.00%
SpillVGPRs: 2326 -> 2291 (-1.50%); split: -2.24%, +0.73%
CodeSize: 261339476 -> 261045912 (-0.11%); split: -0.12%, +0.00%
Scratch: 239616 -> 238592 (-0.43%)
Instrs: 49214044 -> 49188242 (-0.05%); split: -0.06%, +0.00%
Latency: 413214139 -> 413296229 (+0.02%); split: -0.03%, +0.05%
InvThroughput: 71741622 -> 71786300 (+0.06%); split: -0.07%, +0.13%
VClause: 856838 -> 856973 (+0.02%); split: -0.01%, +0.02%
SClause: 1504502 -> 1504567 (+0.00%); split: -0.01%, +0.02%
Copies: 4058433 -> 4060424 (+0.05%); split: -0.03%, +0.08%
Branches: 1502953 -> 1502945 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 3081927 -> 3081531 (-0.01%); split: -0.02%, +0.01%
PreVGPRs: 2513990 -> 2513992 (+0.00%)

The vast majority of instruction count regressions are caused by parallel-rdp.

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18870>
This commit is contained in:
Georg Lehmann 2022-10-18 15:48:21 +02:00 committed by Marge Bot
parent 73be938c48
commit 7aa94efe82

View file

@ -2839,12 +2839,18 @@ combine_s_bitcmp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (!lshl_instr || lshl_instr->opcode != s_lshl ||
!lshl_instr->operands[0].constantEquals(1) ||
(lshl_instr->operands[1].isLiteral() && and_instr->operands[!and_idx].isLiteral()))
lshl_instr = nullptr;
uint64_t constant;
if (!lshl_instr &&
(!is_operand_constant(ctx, and_instr->operands[and_idx], b64 ? 64 : 32, &constant) ||
!util_is_power_of_two_or_zero64(constant) || constant == 0))
continue;
bool test1 = false;
if (instr->operands[!cmp_idx].constantEquals(0)) {
test1 = lg;
} else if (instr->operands[!cmp_idx].isTemp() &&
} else if (lshl_instr && instr->operands[!cmp_idx].isTemp() &&
instr->operands[!cmp_idx].tempId() == lshl_instr->definitions[0].tempId()) {
test1 = !lg;
ctx.uses[lshl_instr->definitions[0].tempId()]--;
@ -2862,9 +2868,13 @@ combine_s_bitcmp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
instr->opcode = aco_opcode::s_bitcmp0_b32;
instr->operands[0] = copy_operand(ctx, and_instr->operands[!and_idx]);
instr->operands[1] = copy_operand(ctx, lshl_instr->operands[1]);
decrease_uses(ctx, and_instr);
decrease_op_uses_if_dead(ctx, lshl_instr);
if (lshl_instr) {
instr->operands[1] = copy_operand(ctx, lshl_instr->operands[1]);
decrease_op_uses_if_dead(ctx, lshl_instr);
} else {
instr->operands[1] = Operand::c32(ffsll(constant) - 1);
}
return true;
}
}