From f36fccabf547d889eae1338face770cd1a093528 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 7 Aug 2024 15:25:03 +0200 Subject: [PATCH] aco: optimize 64bit find_lsb/find_msb No Foz-DB changes, but this should be better, especially for gfx6-7 where uadd_sat is 2 instructions. Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 503cb662ea3..11e63cf0816 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1775,7 +1775,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); lo = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), lo); hi = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), hi); - hi = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)), hi); + hi = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(32u), hi); bld.vop2(aco_opcode::v_min_u32, Definition(dst), lo, hi); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -1816,12 +1816,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp lo = bld.tmp(v1), hi = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); - lo = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)), - bld.vop1(op, bld.def(v1), lo)); + lo = bld.vop1(op, bld.def(v1), lo); + lo = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(32), lo); hi = bld.vop1(op, bld.def(v1), hi); - Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi); - - Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi); + Temp msb_rev = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), lo, hi); Temp msb = bld.tmp(v1); Temp carry =