From 8d9349e75bc3e2bbafe4fad5c5414c82c297633d Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Tue, 27 Jan 2026 15:27:10 +0100
Subject: [PATCH] aco: disable DPP for rev integer subs and shifts

It is not documented anywhere, but at least on gfx12 and gfx10.3
DPP is applied to src1 instead of src0.
This might be useful for shifts, but to be safe just disable DPP
completely for now.

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14739

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
(cherry picked from commit 140ca3bb50a1dd5bb1c190653bdc18244a6fe54e)
---
 .pick_status.json              |  2 +-
 src/amd/compiler/README-ISA.md |  5 +++++
 src/amd/compiler/aco_ir.cpp    | 15 +++++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index 7c87c5e7078..bbb15776622 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -444,7 +444,7 @@
         "description": "aco: disable DPP for rev integer subs and shifts",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null,
         "notes": null
diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md
index f828e74d5df..a2c02d1ccf9 100644
--- a/src/amd/compiler/README-ISA.md
+++ b/src/amd/compiler/README-ISA.md
@@ -216,6 +216,11 @@ the correct layout is:
 VOP2 `v_pk_fmac_f16`. But like all other packed math opcodes, DPP does not function in practice.
 RDNA1 and RDNA2 support `v_pk_fmac_f16_dpp`.
 
+## DPP with integer `subrev` and shifts
+
+No documentation mentions this, but DPP is seemingly applied to src1 instead of src0 for
+integer reverse subtract and shift opcodes.
+
 ## ds_swizzle_b32 rotate/fft modes
 
 These are first mentioned in the GFX9 (Vega) ISA doc, information from the LLVM bug tracker
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 6edd4eb81d6..b198378b374 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -395,6 +395,21 @@ bool
 opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p)
 {
    switch (opcode) {
+   /* reverse integer subtract and shift seem to apply dpp to src1 instead of src0 */
+   case aco_opcode::v_subrev_co_u32:
+   case aco_opcode::v_subrev_co_u32_e64:
+   case aco_opcode::v_subbrev_co_u32:
+   case aco_opcode::v_subrev_u16:
+   case aco_opcode::v_subrev_u32:
+   case aco_opcode::v_ashrrev_i32:
+   case aco_opcode::v_lshrrev_b32:
+   case aco_opcode::v_lshlrev_b32:
+   case aco_opcode::v_ashrrev_i16:
+   case aco_opcode::v_lshrrev_b16:
+   case aco_opcode::v_lshlrev_b16:
+   case aco_opcode::v_ashrrev_i16_e64:
+   case aco_opcode::v_lshrrev_b16_e64:
+   case aco_opcode::v_lshlrev_b16_e64: return false;
    case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11;
    /* there are more cases but those all take 64-bit inputs */
    case aco_opcode::v_madmk_f32: