diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 9732933cd84..74244b0907e 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2224,6 +2224,8 @@ lower_to_hw_instr(Program* program) } else if (offset == 0 && signext && (bits == 8 || bits == 16)) { bld.sop1(bits == 8 ? aco_opcode::s_sext_i32_i8 : aco_opcode::s_sext_i32_i16, dst, op); + } else if (ctx.program->gfx_level >= GFX9 && offset == 0 && bits == 16) { + bld.sop2(aco_opcode::s_pack_ll_b32_b16, dst, op, Operand::zero()); } else { bld.sop2(signext ? aco_opcode::s_bfe_i32 : aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op, Operand::c32((bits << 16) | offset)); diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 5e71d294a88..4f6aa44623f 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -648,7 +648,8 @@ BEGIN_TEST(to_hw_instr.extract) EXT(2, 8) //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24 EXT(3, 8) - //~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 + //~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 + //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1] EXT(0, 16) //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16