diff --git a/.pick_status.json b/.pick_status.json index 7c87c5e7078..bbb15776622 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -444,7 +444,7 @@ "description": "aco: disable DPP for rev integer subs and shifts", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index f828e74d5df..a2c02d1ccf9 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -216,6 +216,11 @@ the correct layout is: VOP2 `v_pk_fmac_f16`. But like all other packed math opcodes, DPP does not function in practice. RDNA1 and RDNA2 support `v_pk_fmac_f16_dpp`. +## DPP with integer `subrev` and shifts + +No documentation mentions this, but DPP is seemingly applied to src1 instead of src0 for +integer reverse subtract and shift opcodes. + ## ds_swizzle_b32 rotate/fft modes These are first mentioned in the GFX9 (Vega) ISA doc, information from the LLVM bug tracker diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 6edd4eb81d6..b198378b374 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -395,6 +395,21 @@ bool opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p) { switch (opcode) { + /* reverse integer subtract and shift seem to apply dpp to src1 instead of src0 */ + case aco_opcode::v_subrev_co_u32: + case aco_opcode::v_subrev_co_u32_e64: + case aco_opcode::v_subbrev_co_u32: + case aco_opcode::v_subrev_u16: + case aco_opcode::v_subrev_u32: + case aco_opcode::v_ashrrev_i32: + case aco_opcode::v_lshrrev_b32: + case aco_opcode::v_lshlrev_b32: + case aco_opcode::v_ashrrev_i16: + case aco_opcode::v_lshrrev_b16: + case aco_opcode::v_lshlrev_b16: + case aco_opcode::v_ashrrev_i16_e64: + case aco_opcode::v_lshrrev_b16_e64: + case aco_opcode::v_lshlrev_b16_e64: return false; case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11; /* there are more cases but those all take 64-bit inputs */ case aco_opcode::v_madmk_f32: