From 80f296268e65cd6b3a87f1ff58672f19fd854dc5 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 22 Oct 2022 16:59:36 +0200 Subject: [PATCH] aco: Don't use opsel for p_insert. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This doesn't make sense, opsel preserves the not selected half of the register, p_insert zeros it. No Foz-DB changes. Signed-off-by: Georg Lehmann Reviewed-by: Daniel Schürmann Fixes: 54292e99c78 ("aco: optimize 32-bit extracts and inserts using SDWA") Part-of: (cherry picked from commit 616d3908dc179c7319380111fd1cd5b047caeb75) --- .pick_status.json | 2 +- src/amd/compiler/aco_optimizer.cpp | 20 ++++++-------------- src/amd/compiler/tests/test_sdwa.cpp | 14 ++++++-------- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index d1785690af7..5e34f402f6c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1687,7 +1687,7 @@ "description": "aco: Don't use opsel for p_insert.", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "54292e99c7844500314bfd623469c65adef954c5" }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index a3f8555e15e..94fbfabded4 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3245,21 +3245,13 @@ apply_insert(opt_ctx& ctx, aco_ptr& instr) SubdwordSel sel = parse_insert(def_info.instr); assert(sel); - if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() && - can_use_opsel(ctx.program->gfx_level, instr->opcode, -1)) { - if (instr->vop3().opsel & (1 << 3)) - return false; - if (sel.offset()) - instr->vop3().opsel |= 1 << 3; - } else { - if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) - return false; + if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) + return false; - to_SDWA(ctx, instr); - if (instr->sdwa().dst_sel.size() != 4) - return false; - static_cast(instr.get())->dst_sel = sel; - } + to_SDWA(ctx, instr); + if (instr->sdwa().dst_sel.size() != 4) + return false; + static_cast(instr.get())->dst_sel = sel; instr->definitions[0].swapTemp(def_info.instr->definitions[0]); ctx.info[instr->definitions[0].tempId()].label = 0; diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp index 8d784ef6883..9df87eb568c 100644 --- a/src/amd/compiler/tests/test_sdwa.cpp +++ b/src/amd/compiler/tests/test_sdwa.cpp @@ -496,17 +496,15 @@ BEGIN_TEST(optimize.sdwa.insert) bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)); writeout(10, val); - //~gfx8! v1: %tmp11 = v_sub_i16 %a, %b - //~gfx8! v1: %res11 = p_insert %tmp11, 0, 16 - //~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b - //~gfx(8|9|10)! p_unit_test 11, %res11 + //~gfx[^7]! v1: %tmp11 = v_sub_i16 %a, %b + //~gfx[^7]! v1: %res11 = p_insert %tmp11, 0, 16 + //~gfx[^7]! p_unit_test 11, %res11 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u))); - //~gfx8! v1: %tmp12 = v_sub_i16 %a, %b - //~gfx8! v1: %res12 = p_insert %tmp12, 1, 16 - //~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi - //~gfx(8|9|10)! p_unit_test 12, %res12 + //~gfx[^7]! v1: %tmp12 = v_sub_i16 %a, %b + //~gfx[^7]! v1: %res12 = p_insert %tmp12, 1, 16 + //~gfx[^7]! p_unit_test 12, %res12 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));