From ce5838599d73cbda68303ba0ffb29de29410dfa2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 16 Nov 2022 17:08:09 +0000 Subject: [PATCH] aco/gfx11: use v_cvt_i32_i16/v_cvt_u32_u16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (gfx1100): Totals from 52753 (39.07% of 135032) affected shaders: CodeSize: 153603860 -> 153163384 (-0.29%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 5 +++++ src/amd/compiler/tests/test_to_hw_instr.cpp | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 0f4d1f445ff..95c8d77ffc3 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1478,6 +1478,8 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera /* move lo and zero high bits */ if (lo.physReg().byte() == 2) bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), lo); + else if (ctx->program->gfx_level >= GFX11) + bld.vop1(aco_opcode::v_cvt_u32_u16, def, lo); else bld.vop2(aco_opcode::v_and_b32, def_lo, Operand::c32(0xFFFFu), lo); bld.vop2(aco_opcode::v_or_b32, def, Operand::c32(hi.constantValue() << 16u), @@ -2237,6 +2239,9 @@ lower_to_hw_instr(Program* program) if (offset == (32 - bits) && op.regClass() != s1) { bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : aco_opcode::v_lshrrev_b32, dst, Operand::c32(offset), op); + } else if (offset == 0 && bits == 16 && ctx.program->gfx_level >= GFX11) { + bld.vop1(signext ? aco_opcode::v_cvt_i32_i16 : aco_opcode::v_cvt_u32_u16, dst, + op); } else { bld.vop3(signext ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32, dst, op, Operand::c32(offset), Operand::c32(bits)); diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 4f6aa44623f..91d049e691e 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -626,7 +626,9 @@ BEGIN_TEST(to_hw_instr.extract) EXT(2, 8) //! v1: %_:v[0] = @v_shr 24, %_:v[1] EXT(3, 8) - //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 + //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 + //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1] + //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1] EXT(0, 16) //! v1: %_:v[0] = @v_shr 16, %_:v[1] EXT(1, 16)