From e8220e106bf22eb5103bfec7a3b97bbcf69aee90 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 8 Feb 2021 15:37:02 +0000 Subject: [PATCH] aco: optimize AC_FETCH_FORMAT_SNORM alpha adjust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This looks like it was copied from LLVM, which didn't have a fmax intrinsic. fossil-db (GFX8): Totals from 43 (0.03% of 140385) affected shaders: CodeSize: 49660 -> 49488 (-0.35%) Instrs: 10434 -> 10348 (-0.82%) Cycles: 41736 -> 41392 (-0.82%) VMEM: 13793 -> 13719 (-0.54%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index a980f972f4a..e21b71b99ff 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4854,14 +4854,13 @@ Temp adjust_vertex_fetch_alpha(isel_context *ctx, unsigned adjustment, Temp alph * and happen to contain 0, 1, 2, 3 as the two LSBs of the * exponent. */ - alpha = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(adjustment == AC_FETCH_FORMAT_SNORM ? 7u : 30u), alpha); - alpha = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(30u), alpha); + unsigned offset = adjustment == AC_FETCH_FORMAT_SNORM ? 7u : 30u; + alpha = bld.vop3(aco_opcode::v_bfe_i32, bld.def(v1), alpha, Operand(offset), Operand(2u)); /* Convert back to the right type. */ if (adjustment == AC_FETCH_FORMAT_SNORM) { alpha = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), alpha); - Temp clamp = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0xbf800000u), alpha); - alpha = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0xbf800000u), alpha, clamp); + alpha = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0xbf800000u), alpha); } else if (adjustment == AC_FETCH_FORMAT_SSCALED) { alpha = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), alpha); }