diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 50a884202a3..53758ea4d3e 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1492,7 +1492,8 @@ get_tied_defs(Instruction* instr) instr->opcode == aco_opcode::v_fmac_legacy_f32 || instr->opcode == aco_opcode::v_pk_fmac_f16 || instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64 || - instr->opcode == aco_opcode::v_dot4c_i32_i8 || instr->opcode == aco_opcode::s_fmac_f32 || + instr->opcode == aco_opcode::v_dot4c_i32_i8 || + instr->opcode == aco_opcode::v_dot2c_f32_f16 || instr->opcode == aco_opcode::s_fmac_f32 || instr->opcode == aco_opcode::s_fmac_f16) { ops.push_back(2); } else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32 || diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 0c38ede575e..c66d40a58de 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -3127,6 +3127,10 @@ vop3_can_use_vop2acc(ra_ctx& ctx, Instruction* instr) if (!ctx.program->dev.has_fmac_legacy32) return false; break; + case aco_opcode::v_dot2_f32_f16: + if (ctx.program->gfx_level < GFX10 || ctx.program->gfx_level >= GFX12) + return false; + break; default: return false; } @@ -3537,6 +3541,7 @@ optimize_encoding_vop2(ra_ctx& ctx, RegisterFile& register_file, aco_ptropcode = aco_opcode::v_dot4c_i32_i8; break; case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break; case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break; + case aco_opcode::v_dot2_f32_f16: instr->opcode = aco_opcode::v_dot2c_f32_f16; break; default: break; } }