mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
aco: disable DPP for rev integer subs and shifts
It is not documented anywhere, but at least on gfx12 and gfx10.3
DPP is applied to src1 instead of src0.
This might be useful for shifts, but to be safe just disable DPP
completely for now.
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14739
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
(cherry picked from commit 140ca3bb50)
This commit is contained in:
parent
6553c4ce40
commit
8d9349e75b
3 changed files with 21 additions and 1 deletions
|
|
@ -444,7 +444,7 @@
|
|||
"description": "aco: disable DPP for rev integer subs and shifts",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -216,6 +216,11 @@ the correct layout is:
|
|||
VOP2 `v_pk_fmac_f16`. But like all other packed math opcodes, DPP does not function in practice.
|
||||
RDNA1 and RDNA2 support `v_pk_fmac_f16_dpp`.
|
||||
|
||||
## DPP with integer `subrev` and shifts
|
||||
|
||||
No documentation mentions this, but DPP is seemingly applied to src1 instead of src0 for
|
||||
integer reverse subtract and shift opcodes.
|
||||
|
||||
## ds_swizzle_b32 rotate/fft modes
|
||||
|
||||
These are first mentioned in the GFX9 (Vega) ISA doc, information from the LLVM bug tracker
|
||||
|
|
|
|||
|
|
@ -395,6 +395,21 @@ bool
|
|||
opcode_supports_dpp(amd_gfx_level gfx_level, aco_opcode opcode, bool vop3p)
|
||||
{
|
||||
switch (opcode) {
|
||||
/* reverse integer subtract and shift seem to apply dpp to src1 instead of src0 */
|
||||
case aco_opcode::v_subrev_co_u32:
|
||||
case aco_opcode::v_subrev_co_u32_e64:
|
||||
case aco_opcode::v_subbrev_co_u32:
|
||||
case aco_opcode::v_subrev_u16:
|
||||
case aco_opcode::v_subrev_u32:
|
||||
case aco_opcode::v_ashrrev_i32:
|
||||
case aco_opcode::v_lshrrev_b32:
|
||||
case aco_opcode::v_lshlrev_b32:
|
||||
case aco_opcode::v_ashrrev_i16:
|
||||
case aco_opcode::v_lshrrev_b16:
|
||||
case aco_opcode::v_lshlrev_b16:
|
||||
case aco_opcode::v_ashrrev_i16_e64:
|
||||
case aco_opcode::v_lshrrev_b16_e64:
|
||||
case aco_opcode::v_lshlrev_b16_e64: return false;
|
||||
case aco_opcode::v_pk_fmac_f16: return gfx_level < GFX11;
|
||||
/* there are more cases but those all take 64-bit inputs */
|
||||
case aco_opcode::v_madmk_f32:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue