aco/isel: use s_mul_i32 instead of s_cselect_b32 for a ? b : 0

It doesn't require SCC and this is more consistent with b2f.

Foz-DB Navi21:
Totals from 2107 (2.64% of 79789) affected shaders:
Instrs: 6619774 -> 6619280 (-0.01%); split: -0.01%, +0.00%
CodeSize: 36754448 -> 36752396 (-0.01%); split: -0.01%, +0.00%
Latency: 62207779 -> 62206422 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 13090494 -> 13090204 (-0.00%); split: -0.00%, +0.00%
VClause: 171572 -> 171573 (+0.00%)
SClause: 257528 -> 257530 (+0.00%)
Copies: 607680 -> 607204 (-0.08%); split: -0.10%, +0.02%
VALU: 4189422 -> 4189418 (-0.00%)
SALU: 1001750 -> 1001264 (-0.05%); split: -0.07%, +0.02%

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33734>
This commit is contained in:
Georg Lehmann 2025-02-25 10:37:02 +01:00 committed by Marge Bot
parent 2d68efd9f3
commit 20dd6dfa12

View file

@ -967,13 +967,21 @@ emit_bcsel(isel_context* ctx, nir_alu_instr* instr, Temp dst)
}
if (!nir_src_is_divergent(&instr->src[0].src)) { /* uniform condition and values in sgpr */
if (dst.regClass() == s1 || dst.regClass() == s2) {
cond = bool_to_scalar_condition(ctx, cond);
bool els_zero =
nir_src_is_const(instr->src[2].src) && nir_src_as_uint(instr->src[2].src) == 0;
if (dst.regClass() == s1 && els_zero) {
/* Use s_mul_i32 because it doesn't require scc. */
bld.sop2(aco_opcode::s_mul_i32, Definition(dst), then, cond);
} else if (dst.regClass() == s1 || dst.regClass() == s2) {
assert((then.regClass() == s1 || then.regClass() == s2) &&
els.regClass() == then.regClass());
assert(dst.size() == then.size());
aco_opcode op =
dst.regClass() == s1 ? aco_opcode::s_cselect_b32 : aco_opcode::s_cselect_b64;
bld.sop2(op, Definition(dst), then, els, bld.scc(bool_to_scalar_condition(ctx, cond)));
bld.sop2(op, Definition(dst), then, els, bld.scc(cond));
} else {
isel_err(&instr->instr, "Unimplemented uniform bcsel bit size");
}