diff --git a/.pick_status.json b/.pick_status.json index 04d5f7e9b70..12b337ada5f 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -292,7 +292,7 @@ "description": "aco: optimize v_pk_fma_f16 -> v_pk_fmac_f16 on GFX10", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index d617ecb84ad..2557559b5c0 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2083,7 +2083,8 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc (instr->opcode == aco_opcode::v_fma_f32 && program->chip_class >= GFX10) || instr->opcode == aco_opcode::v_mad_f16 || instr->opcode == aco_opcode::v_mad_legacy_f16 || - (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10)) && + (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) || + (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10)) && instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() && instr->operands[2].getTemp().type() == RegType::vgpr && @@ -2113,6 +2114,9 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break; + case aco_opcode::v_pk_fma_f16: + instr->opcode = aco_opcode::v_pk_fmac_f16; + break; default: break; } @@ -2125,6 +2129,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || + instr->opcode == aco_opcode::v_pk_fmac_f16 || instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64) { instr->definitions[0].setFixed(instr->operands[2].physReg());