From 8d9349e75bc3e2bbafe4fad5c5414c82c297633d Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 27 Jan 2026 15:27:10 +0100 Subject: [PATCH] aco: disable DPP for rev integer subs and shifts It is not documented anywhere, but at least on gfx12 and gfx10.3 DPP is applied to src1 instead of src0. This might be useful for shifts, but to be safe just disable DPP completely for now. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14739 Reviewed-by: Rhys Perry Part-of: (cherry picked from commit 140ca3bb50a1dd5bb1c190653bdc18244a6fe54e) --- .pick_status.json | 2 +- src/amd/compiler/README-ISA.md | 5 +++++ src/amd/compiler/aco_ir.cpp | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index 7c87c5e7078..bbb15776622 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -444,7 +444,7 @@ "description": "aco: disable DPP for rev integer subs and shifts", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index f828e74d5df..a2c02d1ccf9 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -216,6 +216,11 @@ the correct layout is: VOP2 `v_pk_fmac_f16`. But like all other packed math opcodes, DPP does not function in practice. RDNA1 and RDNA2 support `v_pk_fmac_f16_dpp`. +## DPP with integer `subrev` and shifts + +No documentation mentions this, but DPP is seemingly applied to src1 instead of src0 for +integer reverse subtract and shift opcodes. + ## ds_swizzle_b32 rotate/fft modes These are first mentioned in the GFX9 (Vega) ISA doc, information from the LLVM bug tracker diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 6edd4eb81d6..b198378b374 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -395,6 +395,21 @@ bool opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p) { switch (opcode) { + /* reverse integer subtract and shift seem to apply dpp to src1 instead of src0 */ + case aco_opcode::v_subrev_co_u32: + case aco_opcode::v_subrev_co_u32_e64: + case aco_opcode::v_subbrev_co_u32: + case aco_opcode::v_subrev_u16: + case aco_opcode::v_subrev_u32: + case aco_opcode::v_ashrrev_i32: + case aco_opcode::v_lshrrev_b32: + case aco_opcode::v_lshlrev_b32: + case aco_opcode::v_ashrrev_i16: + case aco_opcode::v_lshrrev_b16: + case aco_opcode::v_lshlrev_b16: + case aco_opcode::v_ashrrev_i16_e64: + case aco_opcode::v_lshrrev_b16_e64: + case aco_opcode::v_lshlrev_b16_e64: return false; case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11; /* there are more cases but those all take 64-bit inputs */ case aco_opcode::v_madmk_f32: