freedreno/ir3: add a6xx global atomics and separate atomic opcodes

Separating atomic opcodes makes possible to express a6xx global
atomics which take iova in SRC1. They would be needed by
VK_KHR_buffer_device_address.
The change also makes easier to distiguish atomics in conditions.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8717>
This commit is contained in:
Danylo Piliaiev 2021-01-22 19:51:56 +02:00 committed by Marge Bot
parent c5d6e57e42
commit 5d5b1fc472
13 changed files with 465 additions and 137 deletions

View file

@ -4638,12 +4638,12 @@ shader-blocks:
size: 2048
:0:0000:0000[00000000x_00003002x] nop
:0:0001:0001[00000000x_00000000x] nop
:6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
:6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
:6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
:6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
:6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
:6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
:6:0002:0002[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
:6:0003:0003[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
:6:0004:0004[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
:6:0005:0005[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
:6:0006:0006[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
:6:0007:0007[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
-----------------------------------------------
8192 (0x2000) bytes
000000: 00003002 00000000 00000000 00000000 |.0..............|

View file

@ -333,6 +333,39 @@ static const struct opc_info {
OPC(6, OPC_ATOMIC_AND, atomic.and),
OPC(6, OPC_ATOMIC_OR, atomic.or),
OPC(6, OPC_ATOMIC_XOR, atomic.xor),
OPC(6, OPC_ATOMIC_B_ADD, atomic.b.add),
OPC(6, OPC_ATOMIC_B_SUB, atomic.b.sub),
OPC(6, OPC_ATOMIC_B_XCHG, atomic.b.xchg),
OPC(6, OPC_ATOMIC_B_INC, atomic.b.inc),
OPC(6, OPC_ATOMIC_B_DEC, atomic.b.dec),
OPC(6, OPC_ATOMIC_B_CMPXCHG, atomic.b.cmpxchg),
OPC(6, OPC_ATOMIC_B_MIN, atomic.b.min),
OPC(6, OPC_ATOMIC_B_MAX, atomic.b.max),
OPC(6, OPC_ATOMIC_B_AND, atomic.b.and),
OPC(6, OPC_ATOMIC_B_OR, atomic.b.or),
OPC(6, OPC_ATOMIC_B_XOR, atomic.b.xor),
OPC(6, OPC_ATOMIC_S_ADD, atomic.s.add),
OPC(6, OPC_ATOMIC_S_SUB, atomic.s.sub),
OPC(6, OPC_ATOMIC_S_XCHG, atomic.s.xchg),
OPC(6, OPC_ATOMIC_S_INC, atomic.s.inc),
OPC(6, OPC_ATOMIC_S_DEC, atomic.s.dec),
OPC(6, OPC_ATOMIC_S_CMPXCHG, atomic.s.cmpxchg),
OPC(6, OPC_ATOMIC_S_MIN, atomic.s.min),
OPC(6, OPC_ATOMIC_S_MAX, atomic.s.max),
OPC(6, OPC_ATOMIC_S_AND, atomic.s.and),
OPC(6, OPC_ATOMIC_S_OR, atomic.s.or),
OPC(6, OPC_ATOMIC_S_XOR, atomic.s.xor),
OPC(6, OPC_ATOMIC_G_ADD, atomic.g.add),
OPC(6, OPC_ATOMIC_G_SUB, atomic.g.sub),
OPC(6, OPC_ATOMIC_G_XCHG, atomic.g.xchg),
OPC(6, OPC_ATOMIC_G_INC, atomic.g.inc),
OPC(6, OPC_ATOMIC_G_DEC, atomic.g.dec),
OPC(6, OPC_ATOMIC_G_CMPXCHG, atomic.g.cmpxchg),
OPC(6, OPC_ATOMIC_G_MIN, atomic.g.min),
OPC(6, OPC_ATOMIC_G_MAX, atomic.g.max),
OPC(6, OPC_ATOMIC_G_AND, atomic.g.and),
OPC(6, OPC_ATOMIC_G_OR, atomic.g.or),
OPC(6, OPC_ATOMIC_G_XOR, atomic.g.xor),
OPC(6, OPC_LDGB, ldgb),
OPC(6, OPC_STGB, stgb),
OPC(6, OPC_STIB, stib),

View file

@ -306,11 +306,35 @@ typedef enum {
OPC_ATOMIC_B_OR = _OPC(6, 53),
OPC_ATOMIC_B_XOR = _OPC(6, 54),
OPC_LDG_A = _OPC(6, 55),
OPC_STG_A = _OPC(6, 56),
OPC_ATOMIC_S_ADD = _OPC(6, 55),
OPC_ATOMIC_S_SUB = _OPC(6, 56),
OPC_ATOMIC_S_XCHG = _OPC(6, 57),
OPC_ATOMIC_S_INC = _OPC(6, 58),
OPC_ATOMIC_S_DEC = _OPC(6, 59),
OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60),
OPC_ATOMIC_S_MIN = _OPC(6, 61),
OPC_ATOMIC_S_MAX = _OPC(6, 62),
OPC_ATOMIC_S_AND = _OPC(6, 63),
OPC_ATOMIC_S_OR = _OPC(6, 64),
OPC_ATOMIC_S_XOR = _OPC(6, 65),
OPC_SPILL_MACRO = _OPC(6, 57),
OPC_RELOAD_MACRO = _OPC(6, 58),
OPC_ATOMIC_G_ADD = _OPC(6, 66),
OPC_ATOMIC_G_SUB = _OPC(6, 67),
OPC_ATOMIC_G_XCHG = _OPC(6, 68),
OPC_ATOMIC_G_INC = _OPC(6, 69),
OPC_ATOMIC_G_DEC = _OPC(6, 70),
OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71),
OPC_ATOMIC_G_MIN = _OPC(6, 72),
OPC_ATOMIC_G_MAX = _OPC(6, 73),
OPC_ATOMIC_G_AND = _OPC(6, 74),
OPC_ATOMIC_G_OR = _OPC(6, 75),
OPC_ATOMIC_G_XOR = _OPC(6, 76),
OPC_LDG_A = _OPC(6, 77),
OPC_STG_A = _OPC(6, 78),
OPC_SPILL_MACRO = _OPC(6, 79),
OPC_RELOAD_MACRO = _OPC(6, 80),
/* category 7: */
OPC_BAR = _OPC(7, 0),
@ -592,7 +616,7 @@ is_madsh(opc_t opc)
}
static inline bool
is_atomic(opc_t opc)
is_local_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_ADD:
@ -612,6 +636,76 @@ is_atomic(opc_t opc)
}
}
static inline bool
is_global_a3xx_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_S_ADD:
case OPC_ATOMIC_S_SUB:
case OPC_ATOMIC_S_XCHG:
case OPC_ATOMIC_S_INC:
case OPC_ATOMIC_S_DEC:
case OPC_ATOMIC_S_CMPXCHG:
case OPC_ATOMIC_S_MIN:
case OPC_ATOMIC_S_MAX:
case OPC_ATOMIC_S_AND:
case OPC_ATOMIC_S_OR:
case OPC_ATOMIC_S_XOR:
return true;
default:
return false;
}
}
static inline bool
is_global_a6xx_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_G_ADD:
case OPC_ATOMIC_G_SUB:
case OPC_ATOMIC_G_XCHG:
case OPC_ATOMIC_G_INC:
case OPC_ATOMIC_G_DEC:
case OPC_ATOMIC_G_CMPXCHG:
case OPC_ATOMIC_G_MIN:
case OPC_ATOMIC_G_MAX:
case OPC_ATOMIC_G_AND:
case OPC_ATOMIC_G_OR:
case OPC_ATOMIC_G_XOR:
return true;
default:
return false;
}
}
static inline bool
is_bindless_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_B_ADD:
case OPC_ATOMIC_B_SUB:
case OPC_ATOMIC_B_XCHG:
case OPC_ATOMIC_B_INC:
case OPC_ATOMIC_B_DEC:
case OPC_ATOMIC_B_CMPXCHG:
case OPC_ATOMIC_B_MIN:
case OPC_ATOMIC_B_MAX:
case OPC_ATOMIC_B_AND:
case OPC_ATOMIC_B_OR:
case OPC_ATOMIC_B_XOR:
return true;
default:
return false;
}
}
static inline bool
is_atomic(opc_t opc)
{
return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
}
static inline bool
is_ssbo(opc_t opc)
{

View file

@ -952,10 +952,11 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
/* disallow immediates in anything but the SSBO slot argument for
* cat6 instructions:
*/
if (is_atomic(instr->opc) && (n != 0))
if (is_global_a3xx_atomic(instr->opc) && (n != 0))
return false;
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
if (is_local_atomic(instr->opc) || is_global_a6xx_atomic(instr->opc) ||
is_bindless_atomic(instr->opc))
return false;
if (instr->opc == OPC_STG && (n == 2))

View file

@ -282,19 +282,18 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
IR3_INSTR_G = 0x400,
IR3_INSTR_SAT = 0x800,
IR3_INSTR_SAT = 0x400,
/* (cat5/cat6) Bindless */
IR3_INSTR_B = 0x1000,
IR3_INSTR_B = 0x800,
/* (cat5/cat6) nonuniform */
IR3_INSTR_NONUNIF = 0x02000,
IR3_INSTR_NONUNIF = 0x1000,
/* (cat5-only) Get some parts of the encoding from a1.x */
IR3_INSTR_A1EN = 0x04000,
IR3_INSTR_A1EN = 0x02000,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
IR3_INSTR_MARK = 0x08000,
IR3_INSTR_UNUSED = 0x10000,
IR3_INSTR_MARK = 0x04000,
IR3_INSTR_UNUSED = 0x08000,
} flags;
uint8_t repeat;
uint8_t nop;
@ -2183,17 +2182,28 @@ INSTR3NODST(STIB);
INSTR2(LDIB);
INSTR5(LDG_A);
INSTR6NODST(STG_A);
INSTR3F(G, ATOMIC_ADD)
INSTR3F(G, ATOMIC_SUB)
INSTR3F(G, ATOMIC_XCHG)
INSTR3F(G, ATOMIC_INC)
INSTR3F(G, ATOMIC_DEC)
INSTR3F(G, ATOMIC_CMPXCHG)
INSTR3F(G, ATOMIC_MIN)
INSTR3F(G, ATOMIC_MAX)
INSTR3F(G, ATOMIC_AND)
INSTR3F(G, ATOMIC_OR)
INSTR3F(G, ATOMIC_XOR)
INSTR2(ATOMIC_G_ADD)
INSTR2(ATOMIC_G_SUB)
INSTR2(ATOMIC_G_XCHG)
INSTR2(ATOMIC_G_INC)
INSTR2(ATOMIC_G_DEC)
INSTR2(ATOMIC_G_CMPXCHG)
INSTR2(ATOMIC_G_MIN)
INSTR2(ATOMIC_G_MAX)
INSTR2(ATOMIC_G_AND)
INSTR2(ATOMIC_G_OR)
INSTR2(ATOMIC_G_XOR)
INSTR3(ATOMIC_B_ADD)
INSTR3(ATOMIC_B_SUB)
INSTR3(ATOMIC_B_XCHG)
INSTR3(ATOMIC_B_INC)
INSTR3(ATOMIC_B_DEC)
INSTR3(ATOMIC_B_CMPXCHG)
INSTR3(ATOMIC_B_MIN)
INSTR3(ATOMIC_B_MAX)
INSTR3(ATOMIC_B_AND)
INSTR3(ATOMIC_B_OR)
INSTR3(ATOMIC_B_XOR)
#elif GPU >= 400
INSTR3(LDGB)
#if GPU >= 500
@ -2201,17 +2211,17 @@ INSTR3(LDIB)
#endif
INSTR4NODST(STGB)
INSTR4NODST(STIB)
INSTR4F(G, ATOMIC_ADD)
INSTR4F(G, ATOMIC_SUB)
INSTR4F(G, ATOMIC_XCHG)
INSTR4F(G, ATOMIC_INC)
INSTR4F(G, ATOMIC_DEC)
INSTR4F(G, ATOMIC_CMPXCHG)
INSTR4F(G, ATOMIC_MIN)
INSTR4F(G, ATOMIC_MAX)
INSTR4F(G, ATOMIC_AND)
INSTR4F(G, ATOMIC_OR)
INSTR4F(G, ATOMIC_XOR)
INSTR4(ATOMIC_S_ADD)
INSTR4(ATOMIC_S_SUB)
INSTR4(ATOMIC_S_XCHG)
INSTR4(ATOMIC_S_INC)
INSTR4(ATOMIC_S_DEC)
INSTR4(ATOMIC_S_CMPXCHG)
INSTR4(ATOMIC_S_MIN)
INSTR4(ATOMIC_S_MAX)
INSTR4(ATOMIC_S_AND)
INSTR4(ATOMIC_S_OR)
INSTR4(ATOMIC_S_XOR)
#endif
/* cat7 instructions: */

View file

@ -135,39 +135,39 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add_ir3:
atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_ADD(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_imin_ir3:
atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umin_ir3:
atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_imax_ir3:
atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umax_ir3:
atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_and_ir3:
atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_AND(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_or_ir3:
atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_OR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_xor_ir3:
atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_XOR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_exchange_ir3:
atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
atomic = ir3_ATOMIC_S_XCHG(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
data = ir3_collect(b, src3, data);
struct ir3_instruction *dword_offset = ir3_get_src(ctx, &intr->src[4])[0];
atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, data, 0, dword_offset, 0,
atomic = ir3_ATOMIC_S_CMPXCHG(b, ssbo, 0, data, 0, dword_offset, 0,
byte_offset, 0);
break;
default:
@ -311,32 +311,32 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_image_atomic_add:
atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_ADD(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_MIN(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_MAX(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_and:
atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_AND(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_or:
atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_OR(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_xor:
atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_XOR(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_exchange:
atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_XCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_comp_swap:
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
src0 = ir3_collect(b, ir3_get_src(ctx, &intr->src[4])[0], src0);
atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
atomic = ir3_ATOMIC_S_CMPXCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
default:
unreachable("boo");

View file

@ -144,36 +144,36 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add_ir3:
atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_imin_ir3:
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umin_ir3:
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_imax_ir3:
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umax_ir3:
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_and_ir3:
atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_or_ir3:
atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_xor_ir3:
atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_exchange_ir3:
atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
break;
default:
unreachable("boo");
@ -288,39 +288,39 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_bindless_image_atomic_add:
atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_bindless_image_atomic_imin:
case nir_intrinsic_bindless_image_atomic_umin:
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_bindless_image_atomic_imax:
case nir_intrinsic_bindless_image_atomic_umax:
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_bindless_image_atomic_and:
atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_bindless_image_atomic_or:
atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_bindless_image_atomic_xor:
atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_bindless_image_atomic_exchange:
atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_bindless_image_atomic_comp_swap:
atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
break;
default:
unreachable("boo");

View file

@ -273,19 +273,18 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
else
regmask_set(&state->needs_sy, n->dsts[0]);
} else if (is_atomic(n->opc)) {
if (n->flags & IR3_INSTR_G) {
if (ctx->compiler->gen >= 6) {
/* New encoding, returns result via second src: */
regmask_set(&state->needs_sy, n->srcs[2]);
} else {
regmask_set(&state->needs_sy, n->dsts[0]);
}
if (is_bindless_atomic(n->opc)) {
regmask_set(&state->needs_sy, n->srcs[2]);
} else if (is_global_a3xx_atomic(n->opc) ||
is_global_a6xx_atomic(n->opc)) {
regmask_set(&state->needs_sy, n->dsts[0]);
} else {
regmask_set(&state->needs_ss, n->dsts[0]);
}
}
if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
is_bindless_atomic(n->opc))
ctx->so->has_ssbo = true;
/* both tex/sfu appear to not always immediately consume

View file

@ -339,6 +339,29 @@ static int parse_w(const char *str)
"atomic.b.and" return TOKEN(T_OP_ATOMIC_B_AND);
"atomic.b.or" return TOKEN(T_OP_ATOMIC_B_OR);
"atomic.b.xor" return TOKEN(T_OP_ATOMIC_B_XOR);
"atomic.s.add" return TOKEN(T_OP_ATOMIC_S_ADD);
"atomic.s.sub" return TOKEN(T_OP_ATOMIC_S_SUB);
"atomic.s.xchg" return TOKEN(T_OP_ATOMIC_S_XCHG);
"atomic.s.inc" return TOKEN(T_OP_ATOMIC_S_INC);
"atomic.s.dec" return TOKEN(T_OP_ATOMIC_S_DEC);
"atomic.s.cmpxchg" return TOKEN(T_OP_ATOMIC_S_CMPXCHG);
"atomic.s.min" return TOKEN(T_OP_ATOMIC_S_MIN);
"atomic.s.max" return TOKEN(T_OP_ATOMIC_S_MAX);
"atomic.s.and" return TOKEN(T_OP_ATOMIC_S_AND);
"atomic.s.or" return TOKEN(T_OP_ATOMIC_S_OR);
"atomic.s.xor" return TOKEN(T_OP_ATOMIC_S_XOR);
"atomic.g.add" return TOKEN(T_OP_ATOMIC_G_ADD);
"atomic.g.sub" return TOKEN(T_OP_ATOMIC_G_SUB);
"atomic.g.xchg" return TOKEN(T_OP_ATOMIC_G_XCHG);
"atomic.g.inc" return TOKEN(T_OP_ATOMIC_G_INC);
"atomic.g.dec" return TOKEN(T_OP_ATOMIC_G_DEC);
"atomic.g.cmpxchg" return TOKEN(T_OP_ATOMIC_G_CMPXCHG);
"atomic.g.min" return TOKEN(T_OP_ATOMIC_G_MIN);
"atomic.g.max" return TOKEN(T_OP_ATOMIC_G_MAX);
"atomic.g.and" return TOKEN(T_OP_ATOMIC_G_AND);
"atomic.g.or" return TOKEN(T_OP_ATOMIC_G_OR);
"atomic.g.xor" return TOKEN(T_OP_ATOMIC_G_XOR);
"ldgb" return TOKEN(T_OP_LDGB);
"stgb" return TOKEN(T_OP_STGB);
"stib" return TOKEN(T_OP_STIB);

View file

@ -569,6 +569,28 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_ATOMIC_B_AND
%token <tok> T_OP_ATOMIC_B_OR
%token <tok> T_OP_ATOMIC_B_XOR
%token <tok> T_OP_ATOMIC_S_ADD
%token <tok> T_OP_ATOMIC_S_SUB
%token <tok> T_OP_ATOMIC_S_XCHG
%token <tok> T_OP_ATOMIC_S_INC
%token <tok> T_OP_ATOMIC_S_DEC
%token <tok> T_OP_ATOMIC_S_CMPXCHG
%token <tok> T_OP_ATOMIC_S_MIN
%token <tok> T_OP_ATOMIC_S_MAX
%token <tok> T_OP_ATOMIC_S_AND
%token <tok> T_OP_ATOMIC_S_OR
%token <tok> T_OP_ATOMIC_S_XOR
%token <tok> T_OP_ATOMIC_G_ADD
%token <tok> T_OP_ATOMIC_G_SUB
%token <tok> T_OP_ATOMIC_G_XCHG
%token <tok> T_OP_ATOMIC_G_INC
%token <tok> T_OP_ATOMIC_G_DEC
%token <tok> T_OP_ATOMIC_G_CMPXCHG
%token <tok> T_OP_ATOMIC_G_MIN
%token <tok> T_OP_ATOMIC_G_MAX
%token <tok> T_OP_ATOMIC_G_AND
%token <tok> T_OP_ATOMIC_G_OR
%token <tok> T_OP_ATOMIC_G_XOR
%token <tok> T_OP_LDGB
%token <tok> T_OP_STGB
%token <tok> T_OP_STIB
@ -1020,7 +1042,7 @@ cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
cat6_offset: cat6_imm_offset
| '+' src
cat6_dst_offset: offset { instr->cat6.dst_offset = $1; }
| '+' src { instr->flags |= IR3_INSTR_G; }
| '+' src
cat6_immed: integer { instr->cat6.iim_val = $1; }
@ -1068,14 +1090,39 @@ cat6_atomic_opc: T_OP_ATOMIC_ADD { new_instr(OPC_ATOMIC_ADD); }
| T_OP_ATOMIC_OR { new_instr(OPC_ATOMIC_OR); }
| T_OP_ATOMIC_XOR { new_instr(OPC_ATOMIC_XOR); }
cat6_atomic_g: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src {
instr->flags |= IR3_INSTR_G;
}
cat6_a3xx_atomic_opc: T_OP_ATOMIC_S_ADD { new_instr(OPC_ATOMIC_S_ADD); }
| T_OP_ATOMIC_S_SUB { new_instr(OPC_ATOMIC_S_SUB); }
| T_OP_ATOMIC_S_XCHG { new_instr(OPC_ATOMIC_S_XCHG); }
| T_OP_ATOMIC_S_INC { new_instr(OPC_ATOMIC_S_INC); }
| T_OP_ATOMIC_S_DEC { new_instr(OPC_ATOMIC_S_DEC); }
| T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); }
| T_OP_ATOMIC_S_MIN { new_instr(OPC_ATOMIC_S_MIN); }
| T_OP_ATOMIC_S_MAX { new_instr(OPC_ATOMIC_S_MAX); }
| T_OP_ATOMIC_S_AND { new_instr(OPC_ATOMIC_S_AND); }
| T_OP_ATOMIC_S_OR { new_instr(OPC_ATOMIC_S_OR); }
| T_OP_ATOMIC_S_XOR { new_instr(OPC_ATOMIC_S_XOR); }
cat6_a6xx_atomic_opc: T_OP_ATOMIC_G_ADD { new_instr(OPC_ATOMIC_G_ADD); }
| T_OP_ATOMIC_G_SUB { new_instr(OPC_ATOMIC_G_SUB); }
| T_OP_ATOMIC_G_XCHG { new_instr(OPC_ATOMIC_G_XCHG); }
| T_OP_ATOMIC_G_INC { new_instr(OPC_ATOMIC_G_INC); }
| T_OP_ATOMIC_G_DEC { new_instr(OPC_ATOMIC_G_DEC); }
| T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); }
| T_OP_ATOMIC_G_MIN { new_instr(OPC_ATOMIC_G_MIN); }
| T_OP_ATOMIC_G_MAX { new_instr(OPC_ATOMIC_G_MAX); }
| T_OP_ATOMIC_G_AND { new_instr(OPC_ATOMIC_G_AND); }
| T_OP_ATOMIC_G_OR { new_instr(OPC_ATOMIC_G_OR); }
| T_OP_ATOMIC_G_XOR { new_instr(OPC_ATOMIC_G_XOR); }
cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src
cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src
cat6_atomic_l: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src
cat6_atomic: cat6_atomic_g
| cat6_atomic_l
cat6_atomic: cat6_atomic_l
| cat6_a3xx_atomic_s
| cat6_a6xx_atomic_g
cat6_ibo_opc_1src: T_OP_RESINFO { new_instr(OPC_RESINFO); }
@ -1104,17 +1151,17 @@ cat6_reg_or_immed: src
cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B { new_instr(OPC_RESINFO); }
cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_ADD)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_SUB)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_INC)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_DEC)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_MIN)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_MAX)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_AND)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_OR)->flags |= IR3_INSTR_G; dummy_dst(); }
| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_XOR)->flags |= IR3_INSTR_G; dummy_dst(); }
cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); }
| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); }
| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); }
| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); }
| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); }
| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); }
| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); }
| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); }
| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); }
| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); }
| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); }
| T_OP_STIB_B { new_instr(OPC_STIB); dummy_dst(); }
cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B { new_instr(OPC_LDIB); }

View file

@ -336,14 +336,17 @@ static const struct test {
/* Atomic: */
#if 0
/* TODO our encoding differs in b53 for these two */
INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#else
INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#endif
INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
/* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */
INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"),
/* Bindless atomic: */
INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */

View file

@ -113,9 +113,6 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr)
}
} else if (instr->opc == OPC_DEMOTE) {
return OPC_KILL;
} else if ((instr->block->shader->compiler->gen >= 6) &&
is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) {
return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD;
} else if (s->compiler->gen >= 6) {
if (instr->opc == OPC_RESINFO) {
return OPC_RESINFO_B;
@ -243,7 +240,7 @@ extract_cat6_DESC_MODE(struct ir3_instruction *instr)
static inline struct ir3_register *
extract_cat6_SRC(struct ir3_instruction *instr, unsigned n)
{
if (instr->flags & IR3_INSTR_G) {
if (is_global_a3xx_atomic(instr->opc)) {
n++;
}
assert(n < instr->srcs_count);

View file

@ -26,6 +26,14 @@ SOFTWARE.
<!--
Cat6 Instructions: load/store/atomic instructions
There are instructions with suffixes like:
"stg.a", "ldib.b", "atomic.g.add", "atomic.s.add"
They have the following meaning:
'.a' - "addrcalc" stg/ldg with complex address computations
'.b' - "bindless" instructions
'.g' - "global" atomics that operate on raw iova addresses
'.s' - "ssbo" pre-a6xx image/ssbo atomics
-->
<bitset name="#instruction-cat6" extends="#instruction">
@ -482,16 +490,6 @@ SOFTWARE.
to still have an extra src. For now, match that.
</doc>
<override expr="#cat6-global">
<display>
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
</display>
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
<field low="41" high="48" name="SSBO" type="#cat6-src"> <!-- SSBO/image binding point -->
<param name="SSBO_IM" as="SRC_IM"/>
</field>
<field pos="53" name="SSBO_IM" type="bool"/>
</override>
<display>
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.l {DST}, l[{SRC1}], {SRC2}
</display>
@ -500,7 +498,6 @@ SOFTWARE.
<derived name="TYPE_SIZE" expr="#cat6-type-size" type="uint"/>
<pattern pos="0" >1</pattern>
<pattern low="1" high="8" >xxxxxxxx</pattern> <!-- SRC3 -->
<field low="9" high="10" name="D_MINUS_ONE" type="uint"/>
<field pos="11" name="TYPED" type="#cat6-typed"/>
<field low="12" high="13" name="TYPE_SIZE_MINUS_ONE" type="uint"/>
@ -514,30 +511,29 @@ SOFTWARE.
</field>
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern pos="40" >x</pattern>
<assert low="41" high="48">00000000</assert> <!-- SSBO/image binding point -->
<field pos="52" name="G" type="bool"/>
<assert pos="53" >0</assert> <!-- SSBO_IM -->
<encode>
<map name="G">!!(src->flags &amp; IR3_INSTR_G)</map>
<map name="TYPED">src</map>
<map name="D_MINUS_ONE">src->cat6.d - 1</map>
<map name="TYPE_SIZE_MINUS_ONE">src->cat6.iim_val - 1</map>
<map name="SSBO">src->srcs[0]</map>
<map name="SSBO_IM">!!(src->srcs[0]->flags &amp; IR3_REG_IMMED)</map>
<map name="SRC1">extract_cat6_SRC(src, 0)</map>
<map name="SRC1_IM">!!(extract_cat6_SRC(src, 0)->flags &amp; IR3_REG_IMMED)</map>
<map name="SRC2">extract_cat6_SRC(src, 1)</map>
<map name="SRC2_IM">!!(extract_cat6_SRC(src, 1)->flags &amp; IR3_REG_IMMED)</map>
<map name="SRC3">extract_cat6_SRC(src, 2)</map>
<map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags &amp; IR3_REG_IMMED)</map>
</encode>
</bitset>
<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic">
<bitset name="#instruction-cat6-a3xx-atomic-local" extends="#instruction-cat6-a3xx-atomic">
<pattern low="1" high="8" >00000000</pattern> <!-- SRC3 -->
<pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
<pattern pos="52" >0</pattern> <!-- "G" -->
<pattern pos="53" >0</pattern> <!-- SSBO_IM -->
</bitset>
<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic-local">
<!-- TODO when asm parser is updated, shift display templates, etc, here -->
</bitset>
<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic">
<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic-local">
<!-- TODO when asm parser is updated, shift display templates, etc, here -->
</bitset>
@ -585,6 +581,136 @@ SOFTWARE.
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
</bitset>
<bitset name="#instruction-cat6-a3xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
<doc>
Pre-a6xx atomics for Image/SSBO
</doc>
<gen max="599"/>
<display>
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
</display>
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
<field low="41" high="48" name="SSBO" type="#cat6-src"> <!-- SSBO/image binding point -->
<param name="SSBO_IM" as="SRC_IM"/>
</field>
<pattern pos="52" >1</pattern> <!-- "G" -->
<field pos="53" name="SSBO_IM" type="bool"/>
<encode>
<map name="SSBO">src->srcs[0]</map>
<map name="SSBO_IM">!!(src->srcs[0]->flags &amp; IR3_REG_IMMED)</map>
<map name="SRC3">extract_cat6_SRC(src, 2)</map>
<map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags &amp; IR3_REG_IMMED)</map>
</encode>
</bitset>
<bitset name="atomic.s.add" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10000</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.sub" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10001</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.xchg" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10010</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.inc" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10011</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.dec" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10100</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.cmpxchg" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10101</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.min" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10110</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.max" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">10111</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.and" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">11000</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.or" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">11001</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.s.xor" extends="#instruction-cat6-a3xx-atomic-global">
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
</bitset>
<bitset name="#instruction-cat6-a6xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
<doc>
a6xx+ global atomics which take iova in SRC1
</doc>
<gen min="600"/>
<display>
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, {SRC1}, {SRC2}
</display>
<pattern low="1" high="8" >00000000</pattern> <!-- SRC3 -->
<pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
<pattern pos="52" >1</pattern> <!-- "G" -->
<pattern pos="53" >0</pattern> <!-- SSBO_IM -->
</bitset>
<bitset name="atomic.g.add" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10000</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.sub" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10001</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.xchg" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10010</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.inc" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10011</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.dec" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10100</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.cmpxchg" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10101</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.min" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10110</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.max" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">10111</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.and" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">11000</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.or" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">11001</pattern> <!-- OPC -->
</bitset>
<bitset name="atomic.g.xor" extends="#instruction-cat6-a6xx-atomic-global">
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
</bitset>
<!--
New a6xx+ encodings for potentially bindless image/ssbo:
@ -850,11 +976,6 @@ SOFTWARE.
{TYPE_SIZE_MINUS_ONE} + 1
</expr>
<!-- Image/SSBO (ie. not local) -->
<expr name="#cat6-global">
{G}
</expr>
<bitset name="#cat6-typed" size="1">
<override>
<expr>{TYPED}</expr>