diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 92768a637ab..503cb662ea3 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4219,8 +4219,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) aco_opcode subrev = instr->def.bit_size == 16 ? aco_opcode::v_subrev_f16 : aco_opcode::v_subrev_f32; + bool use_interp = dpp_ctrl1 == dpp_quad_perm(0, 0, 0, 0) && instr->def.bit_size == 32 && + ctx->program->gfx_level >= GFX11_5; if (!nir_src_is_divergent(instr->src[0].src)) { bld.vop2(subrev, Definition(dst), src, src); + } else if (use_interp && dpp_ctrl2 == dpp_quad_perm(1, 1, 1, 1)) { + bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), src, + Operand::c32(0x3f800000), src) + ->valu() + .neg[2] = true; + } else if (use_interp && dpp_ctrl2 == dpp_quad_perm(2, 2, 2, 2)) { + Builder::Result tmp = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1), + Operand::c32(0), Operand::c32(0), src); + tmp->valu().neg = 0x6; + bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), src, + Operand::c32(0x3f800000), tmp); } else if (ctx->program->gfx_level >= GFX8) { Temp tmp = bld.vop2_dpp(subrev, bld.def(v1), src, src, dpp_ctrl1); bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), tmp, dpp_ctrl2);