From 71a58f02e520d8555fd7c393d18639d32d3470bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 16 Sep 2020 10:32:29 +0100 Subject: [PATCH] aco: optimize v_pk_fma_f16 -> v_pk_fmac_f16 on GFX10 Reviewed-by: Rhys Perry Part-of: (cherry picked from commit 6ecbccfb2374c4074ab49d3cd31d9aa48a950ab2) --- .pick_status.json | 2 +- src/amd/compiler/aco_register_allocation.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 04d5f7e9b70..12b337ada5f 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -292,7 +292,7 @@ "description": "aco: optimize v_pk_fma_f16 -> v_pk_fmac_f16 on GFX10", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index d617ecb84ad..2557559b5c0 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2083,7 +2083,8 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc (instr->opcode == aco_opcode::v_fma_f32 && program->chip_class >= GFX10) || instr->opcode == aco_opcode::v_mad_f16 || instr->opcode == aco_opcode::v_mad_legacy_f16 || - (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10)) && + (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) || + (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10)) && instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() && instr->operands[2].getTemp().type() == RegType::vgpr && @@ -2113,6 +2114,9 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break; + case aco_opcode::v_pk_fma_f16: + instr->opcode = aco_opcode::v_pk_fmac_f16; + break; default: break; } @@ -2125,6 +2129,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || + instr->opcode == aco_opcode::v_pk_fmac_f16 || instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64) { instr->definitions[0].setFixed(instr->operands[2].physReg());