aco: optimize 64bit find_lsb/find_msb

No Foz-DB changes, but this should be better, especially for gfx6-7 where
uadd_sat is 2 instructions.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30549>
This commit is contained in:
Georg Lehmann 2024-08-07 15:25:03 +02:00 committed by Marge Bot
parent c2bbdda39b
commit f36fccabf5

View file

@ -1775,7 +1775,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
lo = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), lo);
hi = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), hi);
hi = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)), hi);
hi = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(32u), hi);
bld.vop2(aco_opcode::v_min_u32, Definition(dst), lo, hi);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
@ -1816,12 +1816,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
lo = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)),
bld.vop1(op, bld.def(v1), lo));
lo = bld.vop1(op, bld.def(v1), lo);
lo = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(32), lo);
hi = bld.vop1(op, bld.def(v1), hi);
Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi);
Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi);
Temp msb_rev = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), lo, hi);
Temp msb = bld.tmp(v1);
Temp carry =