diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0488eff8bef..224f8bcce39 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4372,6 +4372,22 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) add_opt(v_mul_f64, v_mul_f64, 0x3, "120", reassoc_omod_cb, true); } else if (info.opcode == aco_opcode::v_mul_f64_e64 && can_reassoc_omod(ctx, info, 64)) { add_opt(v_mul_f64_e64, v_mul_f64_e64, 0x3, "120", reassoc_omod_cb, true); + } else if (info.opcode == aco_opcode::v_rcp_f32 && can_reassoc_omod(ctx, info, 32)) { + add_opt(v_mul_f32, v_rcp_f32, 0x1, "01", reassoc_omod_cb); + add_opt(v_mul_legacy_f32, v_rcp_f32, 0x1, "01", reassoc_omod_cb); + add_opt(s_mul_f32, v_rcp_f32, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_s_rcp_f32 && can_reassoc_omod(ctx, info, 32)) { + add_opt(s_mul_f32, v_s_rcp_f32, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_rcp_f16 && can_reassoc_omod(ctx, info, 16)) { + add_opt(v_mul_f16, v_rcp_f16, 0x1, "01", reassoc_omod_cb); + add_opt(s_mul_f16, v_rcp_f16, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_s_rcp_f16 && can_reassoc_omod(ctx, info, 16)) { + add_opt(s_mul_f16, v_s_rcp_f16, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_rcp_f64 && can_reassoc_omod(ctx, info, 64)) { + if (ctx.program->gfx_level < GFX12) + add_opt(v_mul_f64_e64, v_rcp_f64, 0x1, "01", reassoc_omod_cb); + else + add_opt(v_mul_f64, v_rcp_f64, 0x1, "01", reassoc_omod_cb); } else if (info.opcode == aco_opcode::v_add_u16 && !info.clamp) { if (ctx.program->gfx_level < GFX9) { add_opt(v_mul_lo_u16, v_mad_legacy_u16, 0x3, "120");