mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
freedreno/ir3: add a6xx global atomics and separate atomic opcodes
Separating atomic opcodes makes possible to express a6xx global atomics which take iova in SRC1. They would be needed by VK_KHR_buffer_device_address. The change also makes easier to distiguish atomics in conditions. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8717>
This commit is contained in:
parent
c5d6e57e42
commit
5d5b1fc472
13 changed files with 465 additions and 137 deletions
|
|
@ -4638,12 +4638,12 @@ shader-blocks:
|
|||
size: 2048
|
||||
:0:0000:0000[00000000x_00003002x] nop
|
||||
:0:0001:0001[00000000x_00000000x] nop
|
||||
:6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
|
||||
:6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
|
||||
:6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
|
||||
:6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
|
||||
:6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
|
||||
:6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
|
||||
:6:0002:0002[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
|
||||
:6:0003:0003[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
|
||||
:6:0004:0004[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
|
||||
:6:0005:0005[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
|
||||
:6:0006:0006[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
|
||||
:6:0007:0007[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
|
||||
-----------------------------------------------
|
||||
8192 (0x2000) bytes
|
||||
000000: 00003002 00000000 00000000 00000000 |.0..............|
|
||||
|
|
|
|||
|
|
@ -333,6 +333,39 @@ static const struct opc_info {
|
|||
OPC(6, OPC_ATOMIC_AND, atomic.and),
|
||||
OPC(6, OPC_ATOMIC_OR, atomic.or),
|
||||
OPC(6, OPC_ATOMIC_XOR, atomic.xor),
|
||||
OPC(6, OPC_ATOMIC_B_ADD, atomic.b.add),
|
||||
OPC(6, OPC_ATOMIC_B_SUB, atomic.b.sub),
|
||||
OPC(6, OPC_ATOMIC_B_XCHG, atomic.b.xchg),
|
||||
OPC(6, OPC_ATOMIC_B_INC, atomic.b.inc),
|
||||
OPC(6, OPC_ATOMIC_B_DEC, atomic.b.dec),
|
||||
OPC(6, OPC_ATOMIC_B_CMPXCHG, atomic.b.cmpxchg),
|
||||
OPC(6, OPC_ATOMIC_B_MIN, atomic.b.min),
|
||||
OPC(6, OPC_ATOMIC_B_MAX, atomic.b.max),
|
||||
OPC(6, OPC_ATOMIC_B_AND, atomic.b.and),
|
||||
OPC(6, OPC_ATOMIC_B_OR, atomic.b.or),
|
||||
OPC(6, OPC_ATOMIC_B_XOR, atomic.b.xor),
|
||||
OPC(6, OPC_ATOMIC_S_ADD, atomic.s.add),
|
||||
OPC(6, OPC_ATOMIC_S_SUB, atomic.s.sub),
|
||||
OPC(6, OPC_ATOMIC_S_XCHG, atomic.s.xchg),
|
||||
OPC(6, OPC_ATOMIC_S_INC, atomic.s.inc),
|
||||
OPC(6, OPC_ATOMIC_S_DEC, atomic.s.dec),
|
||||
OPC(6, OPC_ATOMIC_S_CMPXCHG, atomic.s.cmpxchg),
|
||||
OPC(6, OPC_ATOMIC_S_MIN, atomic.s.min),
|
||||
OPC(6, OPC_ATOMIC_S_MAX, atomic.s.max),
|
||||
OPC(6, OPC_ATOMIC_S_AND, atomic.s.and),
|
||||
OPC(6, OPC_ATOMIC_S_OR, atomic.s.or),
|
||||
OPC(6, OPC_ATOMIC_S_XOR, atomic.s.xor),
|
||||
OPC(6, OPC_ATOMIC_G_ADD, atomic.g.add),
|
||||
OPC(6, OPC_ATOMIC_G_SUB, atomic.g.sub),
|
||||
OPC(6, OPC_ATOMIC_G_XCHG, atomic.g.xchg),
|
||||
OPC(6, OPC_ATOMIC_G_INC, atomic.g.inc),
|
||||
OPC(6, OPC_ATOMIC_G_DEC, atomic.g.dec),
|
||||
OPC(6, OPC_ATOMIC_G_CMPXCHG, atomic.g.cmpxchg),
|
||||
OPC(6, OPC_ATOMIC_G_MIN, atomic.g.min),
|
||||
OPC(6, OPC_ATOMIC_G_MAX, atomic.g.max),
|
||||
OPC(6, OPC_ATOMIC_G_AND, atomic.g.and),
|
||||
OPC(6, OPC_ATOMIC_G_OR, atomic.g.or),
|
||||
OPC(6, OPC_ATOMIC_G_XOR, atomic.g.xor),
|
||||
OPC(6, OPC_LDGB, ldgb),
|
||||
OPC(6, OPC_STGB, stgb),
|
||||
OPC(6, OPC_STIB, stib),
|
||||
|
|
|
|||
|
|
@ -306,11 +306,35 @@ typedef enum {
|
|||
OPC_ATOMIC_B_OR = _OPC(6, 53),
|
||||
OPC_ATOMIC_B_XOR = _OPC(6, 54),
|
||||
|
||||
OPC_LDG_A = _OPC(6, 55),
|
||||
OPC_STG_A = _OPC(6, 56),
|
||||
OPC_ATOMIC_S_ADD = _OPC(6, 55),
|
||||
OPC_ATOMIC_S_SUB = _OPC(6, 56),
|
||||
OPC_ATOMIC_S_XCHG = _OPC(6, 57),
|
||||
OPC_ATOMIC_S_INC = _OPC(6, 58),
|
||||
OPC_ATOMIC_S_DEC = _OPC(6, 59),
|
||||
OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60),
|
||||
OPC_ATOMIC_S_MIN = _OPC(6, 61),
|
||||
OPC_ATOMIC_S_MAX = _OPC(6, 62),
|
||||
OPC_ATOMIC_S_AND = _OPC(6, 63),
|
||||
OPC_ATOMIC_S_OR = _OPC(6, 64),
|
||||
OPC_ATOMIC_S_XOR = _OPC(6, 65),
|
||||
|
||||
OPC_SPILL_MACRO = _OPC(6, 57),
|
||||
OPC_RELOAD_MACRO = _OPC(6, 58),
|
||||
OPC_ATOMIC_G_ADD = _OPC(6, 66),
|
||||
OPC_ATOMIC_G_SUB = _OPC(6, 67),
|
||||
OPC_ATOMIC_G_XCHG = _OPC(6, 68),
|
||||
OPC_ATOMIC_G_INC = _OPC(6, 69),
|
||||
OPC_ATOMIC_G_DEC = _OPC(6, 70),
|
||||
OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71),
|
||||
OPC_ATOMIC_G_MIN = _OPC(6, 72),
|
||||
OPC_ATOMIC_G_MAX = _OPC(6, 73),
|
||||
OPC_ATOMIC_G_AND = _OPC(6, 74),
|
||||
OPC_ATOMIC_G_OR = _OPC(6, 75),
|
||||
OPC_ATOMIC_G_XOR = _OPC(6, 76),
|
||||
|
||||
OPC_LDG_A = _OPC(6, 77),
|
||||
OPC_STG_A = _OPC(6, 78),
|
||||
|
||||
OPC_SPILL_MACRO = _OPC(6, 79),
|
||||
OPC_RELOAD_MACRO = _OPC(6, 80),
|
||||
|
||||
/* category 7: */
|
||||
OPC_BAR = _OPC(7, 0),
|
||||
|
|
@ -592,7 +616,7 @@ is_madsh(opc_t opc)
|
|||
}
|
||||
|
||||
static inline bool
|
||||
is_atomic(opc_t opc)
|
||||
is_local_atomic(opc_t opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case OPC_ATOMIC_ADD:
|
||||
|
|
@ -612,6 +636,76 @@ is_atomic(opc_t opc)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_global_a3xx_atomic(opc_t opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case OPC_ATOMIC_S_ADD:
|
||||
case OPC_ATOMIC_S_SUB:
|
||||
case OPC_ATOMIC_S_XCHG:
|
||||
case OPC_ATOMIC_S_INC:
|
||||
case OPC_ATOMIC_S_DEC:
|
||||
case OPC_ATOMIC_S_CMPXCHG:
|
||||
case OPC_ATOMIC_S_MIN:
|
||||
case OPC_ATOMIC_S_MAX:
|
||||
case OPC_ATOMIC_S_AND:
|
||||
case OPC_ATOMIC_S_OR:
|
||||
case OPC_ATOMIC_S_XOR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_global_a6xx_atomic(opc_t opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case OPC_ATOMIC_G_ADD:
|
||||
case OPC_ATOMIC_G_SUB:
|
||||
case OPC_ATOMIC_G_XCHG:
|
||||
case OPC_ATOMIC_G_INC:
|
||||
case OPC_ATOMIC_G_DEC:
|
||||
case OPC_ATOMIC_G_CMPXCHG:
|
||||
case OPC_ATOMIC_G_MIN:
|
||||
case OPC_ATOMIC_G_MAX:
|
||||
case OPC_ATOMIC_G_AND:
|
||||
case OPC_ATOMIC_G_OR:
|
||||
case OPC_ATOMIC_G_XOR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_bindless_atomic(opc_t opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case OPC_ATOMIC_B_ADD:
|
||||
case OPC_ATOMIC_B_SUB:
|
||||
case OPC_ATOMIC_B_XCHG:
|
||||
case OPC_ATOMIC_B_INC:
|
||||
case OPC_ATOMIC_B_DEC:
|
||||
case OPC_ATOMIC_B_CMPXCHG:
|
||||
case OPC_ATOMIC_B_MIN:
|
||||
case OPC_ATOMIC_B_MAX:
|
||||
case OPC_ATOMIC_B_AND:
|
||||
case OPC_ATOMIC_B_OR:
|
||||
case OPC_ATOMIC_B_XOR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_atomic(opc_t opc)
|
||||
{
|
||||
return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
|
||||
is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_ssbo(opc_t opc)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -952,10 +952,11 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
|
|||
/* disallow immediates in anything but the SSBO slot argument for
|
||||
* cat6 instructions:
|
||||
*/
|
||||
if (is_atomic(instr->opc) && (n != 0))
|
||||
if (is_global_a3xx_atomic(instr->opc) && (n != 0))
|
||||
return false;
|
||||
|
||||
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
|
||||
if (is_local_atomic(instr->opc) || is_global_a6xx_atomic(instr->opc) ||
|
||||
is_bindless_atomic(instr->opc))
|
||||
return false;
|
||||
|
||||
if (instr->opc == OPC_STG && (n == 2))
|
||||
|
|
|
|||
|
|
@ -282,19 +282,18 @@ struct ir3_instruction {
|
|||
IR3_INSTR_P = 0x080,
|
||||
IR3_INSTR_S = 0x100,
|
||||
IR3_INSTR_S2EN = 0x200,
|
||||
IR3_INSTR_G = 0x400,
|
||||
IR3_INSTR_SAT = 0x800,
|
||||
IR3_INSTR_SAT = 0x400,
|
||||
/* (cat5/cat6) Bindless */
|
||||
IR3_INSTR_B = 0x1000,
|
||||
IR3_INSTR_B = 0x800,
|
||||
/* (cat5/cat6) nonuniform */
|
||||
IR3_INSTR_NONUNIF = 0x02000,
|
||||
IR3_INSTR_NONUNIF = 0x1000,
|
||||
/* (cat5-only) Get some parts of the encoding from a1.x */
|
||||
IR3_INSTR_A1EN = 0x04000,
|
||||
IR3_INSTR_A1EN = 0x02000,
|
||||
/* meta-flags, for intermediate stages of IR, ie.
|
||||
* before register assignment is done:
|
||||
*/
|
||||
IR3_INSTR_MARK = 0x08000,
|
||||
IR3_INSTR_UNUSED = 0x10000,
|
||||
IR3_INSTR_MARK = 0x04000,
|
||||
IR3_INSTR_UNUSED = 0x08000,
|
||||
} flags;
|
||||
uint8_t repeat;
|
||||
uint8_t nop;
|
||||
|
|
@ -2183,17 +2182,28 @@ INSTR3NODST(STIB);
|
|||
INSTR2(LDIB);
|
||||
INSTR5(LDG_A);
|
||||
INSTR6NODST(STG_A);
|
||||
INSTR3F(G, ATOMIC_ADD)
|
||||
INSTR3F(G, ATOMIC_SUB)
|
||||
INSTR3F(G, ATOMIC_XCHG)
|
||||
INSTR3F(G, ATOMIC_INC)
|
||||
INSTR3F(G, ATOMIC_DEC)
|
||||
INSTR3F(G, ATOMIC_CMPXCHG)
|
||||
INSTR3F(G, ATOMIC_MIN)
|
||||
INSTR3F(G, ATOMIC_MAX)
|
||||
INSTR3F(G, ATOMIC_AND)
|
||||
INSTR3F(G, ATOMIC_OR)
|
||||
INSTR3F(G, ATOMIC_XOR)
|
||||
INSTR2(ATOMIC_G_ADD)
|
||||
INSTR2(ATOMIC_G_SUB)
|
||||
INSTR2(ATOMIC_G_XCHG)
|
||||
INSTR2(ATOMIC_G_INC)
|
||||
INSTR2(ATOMIC_G_DEC)
|
||||
INSTR2(ATOMIC_G_CMPXCHG)
|
||||
INSTR2(ATOMIC_G_MIN)
|
||||
INSTR2(ATOMIC_G_MAX)
|
||||
INSTR2(ATOMIC_G_AND)
|
||||
INSTR2(ATOMIC_G_OR)
|
||||
INSTR2(ATOMIC_G_XOR)
|
||||
INSTR3(ATOMIC_B_ADD)
|
||||
INSTR3(ATOMIC_B_SUB)
|
||||
INSTR3(ATOMIC_B_XCHG)
|
||||
INSTR3(ATOMIC_B_INC)
|
||||
INSTR3(ATOMIC_B_DEC)
|
||||
INSTR3(ATOMIC_B_CMPXCHG)
|
||||
INSTR3(ATOMIC_B_MIN)
|
||||
INSTR3(ATOMIC_B_MAX)
|
||||
INSTR3(ATOMIC_B_AND)
|
||||
INSTR3(ATOMIC_B_OR)
|
||||
INSTR3(ATOMIC_B_XOR)
|
||||
#elif GPU >= 400
|
||||
INSTR3(LDGB)
|
||||
#if GPU >= 500
|
||||
|
|
@ -2201,17 +2211,17 @@ INSTR3(LDIB)
|
|||
#endif
|
||||
INSTR4NODST(STGB)
|
||||
INSTR4NODST(STIB)
|
||||
INSTR4F(G, ATOMIC_ADD)
|
||||
INSTR4F(G, ATOMIC_SUB)
|
||||
INSTR4F(G, ATOMIC_XCHG)
|
||||
INSTR4F(G, ATOMIC_INC)
|
||||
INSTR4F(G, ATOMIC_DEC)
|
||||
INSTR4F(G, ATOMIC_CMPXCHG)
|
||||
INSTR4F(G, ATOMIC_MIN)
|
||||
INSTR4F(G, ATOMIC_MAX)
|
||||
INSTR4F(G, ATOMIC_AND)
|
||||
INSTR4F(G, ATOMIC_OR)
|
||||
INSTR4F(G, ATOMIC_XOR)
|
||||
INSTR4(ATOMIC_S_ADD)
|
||||
INSTR4(ATOMIC_S_SUB)
|
||||
INSTR4(ATOMIC_S_XCHG)
|
||||
INSTR4(ATOMIC_S_INC)
|
||||
INSTR4(ATOMIC_S_DEC)
|
||||
INSTR4(ATOMIC_S_CMPXCHG)
|
||||
INSTR4(ATOMIC_S_MIN)
|
||||
INSTR4(ATOMIC_S_MAX)
|
||||
INSTR4(ATOMIC_S_AND)
|
||||
INSTR4(ATOMIC_S_OR)
|
||||
INSTR4(ATOMIC_S_XOR)
|
||||
#endif
|
||||
|
||||
/* cat7 instructions: */
|
||||
|
|
|
|||
|
|
@ -135,39 +135,39 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_ssbo_atomic_add_ir3:
|
||||
atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_ADD(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_imin_ir3:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_umin_ir3:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_imax_ir3:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_umax_ir3:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_and_ir3:
|
||||
atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_AND(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_or_ir3:
|
||||
atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_OR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_xor_ir3:
|
||||
atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_XOR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_exchange_ir3:
|
||||
atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
atomic = ir3_ATOMIC_S_XCHG(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
|
||||
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
|
||||
data = ir3_collect(b, src3, data);
|
||||
struct ir3_instruction *dword_offset = ir3_get_src(ctx, &intr->src[4])[0];
|
||||
atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, data, 0, dword_offset, 0,
|
||||
atomic = ir3_ATOMIC_S_CMPXCHG(b, ssbo, 0, data, 0, dword_offset, 0,
|
||||
byte_offset, 0);
|
||||
break;
|
||||
default:
|
||||
|
|
@ -311,32 +311,32 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_ADD(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_MIN(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_MAX(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_AND(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_OR(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_XOR(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_XCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
|
||||
src0 = ir3_collect(b, ir3_get_src(ctx, &intr->src[4])[0], src0);
|
||||
atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
atomic = ir3_ATOMIC_S_CMPXCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable("boo");
|
||||
|
|
|
|||
|
|
@ -144,36 +144,36 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_ssbo_atomic_add_ir3:
|
||||
atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_imin_ir3:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_umin_ir3:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_imax_ir3:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_umax_ir3:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_and_ir3:
|
||||
atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_or_ir3:
|
||||
atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_xor_ir3:
|
||||
atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_exchange_ir3:
|
||||
atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
|
||||
atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable("boo");
|
||||
|
|
@ -288,39 +288,39 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_bindless_image_atomic_add:
|
||||
atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_bindless_image_atomic_imin:
|
||||
case nir_intrinsic_bindless_image_atomic_umin:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_bindless_image_atomic_imax:
|
||||
case nir_intrinsic_bindless_image_atomic_umax:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_bindless_image_atomic_and:
|
||||
atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_bindless_image_atomic_or:
|
||||
atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_bindless_image_atomic_xor:
|
||||
atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_bindless_image_atomic_exchange:
|
||||
atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_bindless_image_atomic_comp_swap:
|
||||
atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable("boo");
|
||||
|
|
|
|||
|
|
@ -273,19 +273,18 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
else
|
||||
regmask_set(&state->needs_sy, n->dsts[0]);
|
||||
} else if (is_atomic(n->opc)) {
|
||||
if (n->flags & IR3_INSTR_G) {
|
||||
if (ctx->compiler->gen >= 6) {
|
||||
/* New encoding, returns result via second src: */
|
||||
regmask_set(&state->needs_sy, n->srcs[2]);
|
||||
} else {
|
||||
regmask_set(&state->needs_sy, n->dsts[0]);
|
||||
}
|
||||
if (is_bindless_atomic(n->opc)) {
|
||||
regmask_set(&state->needs_sy, n->srcs[2]);
|
||||
} else if (is_global_a3xx_atomic(n->opc) ||
|
||||
is_global_a6xx_atomic(n->opc)) {
|
||||
regmask_set(&state->needs_sy, n->dsts[0]);
|
||||
} else {
|
||||
regmask_set(&state->needs_ss, n->dsts[0]);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
|
||||
if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
|
||||
is_bindless_atomic(n->opc))
|
||||
ctx->so->has_ssbo = true;
|
||||
|
||||
/* both tex/sfu appear to not always immediately consume
|
||||
|
|
|
|||
|
|
@ -339,6 +339,29 @@ static int parse_w(const char *str)
|
|||
"atomic.b.and" return TOKEN(T_OP_ATOMIC_B_AND);
|
||||
"atomic.b.or" return TOKEN(T_OP_ATOMIC_B_OR);
|
||||
"atomic.b.xor" return TOKEN(T_OP_ATOMIC_B_XOR);
|
||||
"atomic.s.add" return TOKEN(T_OP_ATOMIC_S_ADD);
|
||||
"atomic.s.sub" return TOKEN(T_OP_ATOMIC_S_SUB);
|
||||
"atomic.s.xchg" return TOKEN(T_OP_ATOMIC_S_XCHG);
|
||||
"atomic.s.inc" return TOKEN(T_OP_ATOMIC_S_INC);
|
||||
"atomic.s.dec" return TOKEN(T_OP_ATOMIC_S_DEC);
|
||||
"atomic.s.cmpxchg" return TOKEN(T_OP_ATOMIC_S_CMPXCHG);
|
||||
"atomic.s.min" return TOKEN(T_OP_ATOMIC_S_MIN);
|
||||
"atomic.s.max" return TOKEN(T_OP_ATOMIC_S_MAX);
|
||||
"atomic.s.and" return TOKEN(T_OP_ATOMIC_S_AND);
|
||||
"atomic.s.or" return TOKEN(T_OP_ATOMIC_S_OR);
|
||||
"atomic.s.xor" return TOKEN(T_OP_ATOMIC_S_XOR);
|
||||
"atomic.g.add" return TOKEN(T_OP_ATOMIC_G_ADD);
|
||||
"atomic.g.sub" return TOKEN(T_OP_ATOMIC_G_SUB);
|
||||
"atomic.g.xchg" return TOKEN(T_OP_ATOMIC_G_XCHG);
|
||||
"atomic.g.inc" return TOKEN(T_OP_ATOMIC_G_INC);
|
||||
"atomic.g.dec" return TOKEN(T_OP_ATOMIC_G_DEC);
|
||||
"atomic.g.cmpxchg" return TOKEN(T_OP_ATOMIC_G_CMPXCHG);
|
||||
"atomic.g.min" return TOKEN(T_OP_ATOMIC_G_MIN);
|
||||
"atomic.g.max" return TOKEN(T_OP_ATOMIC_G_MAX);
|
||||
"atomic.g.and" return TOKEN(T_OP_ATOMIC_G_AND);
|
||||
"atomic.g.or" return TOKEN(T_OP_ATOMIC_G_OR);
|
||||
"atomic.g.xor" return TOKEN(T_OP_ATOMIC_G_XOR);
|
||||
|
||||
"ldgb" return TOKEN(T_OP_LDGB);
|
||||
"stgb" return TOKEN(T_OP_STGB);
|
||||
"stib" return TOKEN(T_OP_STIB);
|
||||
|
|
|
|||
|
|
@ -569,6 +569,28 @@ static void print_token(FILE *file, int type, YYSTYPE value)
|
|||
%token <tok> T_OP_ATOMIC_B_AND
|
||||
%token <tok> T_OP_ATOMIC_B_OR
|
||||
%token <tok> T_OP_ATOMIC_B_XOR
|
||||
%token <tok> T_OP_ATOMIC_S_ADD
|
||||
%token <tok> T_OP_ATOMIC_S_SUB
|
||||
%token <tok> T_OP_ATOMIC_S_XCHG
|
||||
%token <tok> T_OP_ATOMIC_S_INC
|
||||
%token <tok> T_OP_ATOMIC_S_DEC
|
||||
%token <tok> T_OP_ATOMIC_S_CMPXCHG
|
||||
%token <tok> T_OP_ATOMIC_S_MIN
|
||||
%token <tok> T_OP_ATOMIC_S_MAX
|
||||
%token <tok> T_OP_ATOMIC_S_AND
|
||||
%token <tok> T_OP_ATOMIC_S_OR
|
||||
%token <tok> T_OP_ATOMIC_S_XOR
|
||||
%token <tok> T_OP_ATOMIC_G_ADD
|
||||
%token <tok> T_OP_ATOMIC_G_SUB
|
||||
%token <tok> T_OP_ATOMIC_G_XCHG
|
||||
%token <tok> T_OP_ATOMIC_G_INC
|
||||
%token <tok> T_OP_ATOMIC_G_DEC
|
||||
%token <tok> T_OP_ATOMIC_G_CMPXCHG
|
||||
%token <tok> T_OP_ATOMIC_G_MIN
|
||||
%token <tok> T_OP_ATOMIC_G_MAX
|
||||
%token <tok> T_OP_ATOMIC_G_AND
|
||||
%token <tok> T_OP_ATOMIC_G_OR
|
||||
%token <tok> T_OP_ATOMIC_G_XOR
|
||||
%token <tok> T_OP_LDGB
|
||||
%token <tok> T_OP_STGB
|
||||
%token <tok> T_OP_STIB
|
||||
|
|
@ -1020,7 +1042,7 @@ cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
|
|||
cat6_offset: cat6_imm_offset
|
||||
| '+' src
|
||||
cat6_dst_offset: offset { instr->cat6.dst_offset = $1; }
|
||||
| '+' src { instr->flags |= IR3_INSTR_G; }
|
||||
| '+' src
|
||||
|
||||
cat6_immed: integer { instr->cat6.iim_val = $1; }
|
||||
|
||||
|
|
@ -1068,14 +1090,39 @@ cat6_atomic_opc: T_OP_ATOMIC_ADD { new_instr(OPC_ATOMIC_ADD); }
|
|||
| T_OP_ATOMIC_OR { new_instr(OPC_ATOMIC_OR); }
|
||||
| T_OP_ATOMIC_XOR { new_instr(OPC_ATOMIC_XOR); }
|
||||
|
||||
cat6_atomic_g: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src {
|
||||
instr->flags |= IR3_INSTR_G;
|
||||
}
|
||||
cat6_a3xx_atomic_opc: T_OP_ATOMIC_S_ADD { new_instr(OPC_ATOMIC_S_ADD); }
|
||||
| T_OP_ATOMIC_S_SUB { new_instr(OPC_ATOMIC_S_SUB); }
|
||||
| T_OP_ATOMIC_S_XCHG { new_instr(OPC_ATOMIC_S_XCHG); }
|
||||
| T_OP_ATOMIC_S_INC { new_instr(OPC_ATOMIC_S_INC); }
|
||||
| T_OP_ATOMIC_S_DEC { new_instr(OPC_ATOMIC_S_DEC); }
|
||||
| T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); }
|
||||
| T_OP_ATOMIC_S_MIN { new_instr(OPC_ATOMIC_S_MIN); }
|
||||
| T_OP_ATOMIC_S_MAX { new_instr(OPC_ATOMIC_S_MAX); }
|
||||
| T_OP_ATOMIC_S_AND { new_instr(OPC_ATOMIC_S_AND); }
|
||||
| T_OP_ATOMIC_S_OR { new_instr(OPC_ATOMIC_S_OR); }
|
||||
| T_OP_ATOMIC_S_XOR { new_instr(OPC_ATOMIC_S_XOR); }
|
||||
|
||||
cat6_a6xx_atomic_opc: T_OP_ATOMIC_G_ADD { new_instr(OPC_ATOMIC_G_ADD); }
|
||||
| T_OP_ATOMIC_G_SUB { new_instr(OPC_ATOMIC_G_SUB); }
|
||||
| T_OP_ATOMIC_G_XCHG { new_instr(OPC_ATOMIC_G_XCHG); }
|
||||
| T_OP_ATOMIC_G_INC { new_instr(OPC_ATOMIC_G_INC); }
|
||||
| T_OP_ATOMIC_G_DEC { new_instr(OPC_ATOMIC_G_DEC); }
|
||||
| T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); }
|
||||
| T_OP_ATOMIC_G_MIN { new_instr(OPC_ATOMIC_G_MIN); }
|
||||
| T_OP_ATOMIC_G_MAX { new_instr(OPC_ATOMIC_G_MAX); }
|
||||
| T_OP_ATOMIC_G_AND { new_instr(OPC_ATOMIC_G_AND); }
|
||||
| T_OP_ATOMIC_G_OR { new_instr(OPC_ATOMIC_G_OR); }
|
||||
| T_OP_ATOMIC_G_XOR { new_instr(OPC_ATOMIC_G_XOR); }
|
||||
|
||||
cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src
|
||||
|
||||
cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src
|
||||
|
||||
cat6_atomic_l: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src
|
||||
|
||||
cat6_atomic: cat6_atomic_g
|
||||
| cat6_atomic_l
|
||||
cat6_atomic: cat6_atomic_l
|
||||
| cat6_a3xx_atomic_s
|
||||
| cat6_a6xx_atomic_g
|
||||
|
||||
cat6_ibo_opc_1src: T_OP_RESINFO { new_instr(OPC_RESINFO); }
|
||||
|
||||
|
|
@ -1104,17 +1151,17 @@ cat6_reg_or_immed: src
|
|||
|
||||
cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B { new_instr(OPC_RESINFO); }
|
||||
|
||||
cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_ADD)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_SUB)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_INC)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_DEC)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_MIN)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_MAX)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_AND)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_OR)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_XOR)->flags |= IR3_INSTR_G; dummy_dst(); }
|
||||
cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); }
|
||||
| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); }
|
||||
| T_OP_STIB_B { new_instr(OPC_STIB); dummy_dst(); }
|
||||
|
||||
cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B { new_instr(OPC_LDIB); }
|
||||
|
|
|
|||
|
|
@ -336,14 +336,17 @@ static const struct test {
|
|||
/* Atomic: */
|
||||
#if 0
|
||||
/* TODO our encoding differs in b53 for these two */
|
||||
INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
|
||||
INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
|
||||
INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
|
||||
INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
|
||||
#else
|
||||
INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
|
||||
INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
|
||||
INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
|
||||
INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
|
||||
#endif
|
||||
INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
|
||||
|
||||
/* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */
|
||||
INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"),
|
||||
|
||||
/* Bindless atomic: */
|
||||
INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
|
||||
INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
|
||||
|
|
|
|||
|
|
@ -113,9 +113,6 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr)
|
|||
}
|
||||
} else if (instr->opc == OPC_DEMOTE) {
|
||||
return OPC_KILL;
|
||||
} else if ((instr->block->shader->compiler->gen >= 6) &&
|
||||
is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) {
|
||||
return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD;
|
||||
} else if (s->compiler->gen >= 6) {
|
||||
if (instr->opc == OPC_RESINFO) {
|
||||
return OPC_RESINFO_B;
|
||||
|
|
@ -243,7 +240,7 @@ extract_cat6_DESC_MODE(struct ir3_instruction *instr)
|
|||
static inline struct ir3_register *
|
||||
extract_cat6_SRC(struct ir3_instruction *instr, unsigned n)
|
||||
{
|
||||
if (instr->flags & IR3_INSTR_G) {
|
||||
if (is_global_a3xx_atomic(instr->opc)) {
|
||||
n++;
|
||||
}
|
||||
assert(n < instr->srcs_count);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,14 @@ SOFTWARE.
|
|||
|
||||
<!--
|
||||
Cat6 Instructions: load/store/atomic instructions
|
||||
|
||||
There are instructions with suffixes like:
|
||||
"stg.a", "ldib.b", "atomic.g.add", "atomic.s.add"
|
||||
They have the following meaning:
|
||||
'.a' - "addrcalc" stg/ldg with complex address computations
|
||||
'.b' - "bindless" instructions
|
||||
'.g' - "global" atomics that operate on raw iova addresses
|
||||
'.s' - "ssbo" pre-a6xx image/ssbo atomics
|
||||
-->
|
||||
|
||||
<bitset name="#instruction-cat6" extends="#instruction">
|
||||
|
|
@ -482,16 +490,6 @@ SOFTWARE.
|
|||
to still have an extra src. For now, match that.
|
||||
</doc>
|
||||
|
||||
<override expr="#cat6-global">
|
||||
<display>
|
||||
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
|
||||
</display>
|
||||
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
|
||||
<field low="41" high="48" name="SSBO" type="#cat6-src"> <!-- SSBO/image binding point -->
|
||||
<param name="SSBO_IM" as="SRC_IM"/>
|
||||
</field>
|
||||
<field pos="53" name="SSBO_IM" type="bool"/>
|
||||
</override>
|
||||
<display>
|
||||
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.l {DST}, l[{SRC1}], {SRC2}
|
||||
</display>
|
||||
|
|
@ -500,7 +498,6 @@ SOFTWARE.
|
|||
<derived name="TYPE_SIZE" expr="#cat6-type-size" type="uint"/>
|
||||
|
||||
<pattern pos="0" >1</pattern>
|
||||
<pattern low="1" high="8" >xxxxxxxx</pattern> <!-- SRC3 -->
|
||||
<field low="9" high="10" name="D_MINUS_ONE" type="uint"/>
|
||||
<field pos="11" name="TYPED" type="#cat6-typed"/>
|
||||
<field low="12" high="13" name="TYPE_SIZE_MINUS_ONE" type="uint"/>
|
||||
|
|
@ -514,30 +511,29 @@ SOFTWARE.
|
|||
</field>
|
||||
<field low="32" high="39" name="DST" type="#reg-gpr"/>
|
||||
<pattern pos="40" >x</pattern>
|
||||
<assert low="41" high="48">00000000</assert> <!-- SSBO/image binding point -->
|
||||
<field pos="52" name="G" type="bool"/>
|
||||
<assert pos="53" >0</assert> <!-- SSBO_IM -->
|
||||
<encode>
|
||||
<map name="G">!!(src->flags & IR3_INSTR_G)</map>
|
||||
<map name="TYPED">src</map>
|
||||
<map name="D_MINUS_ONE">src->cat6.d - 1</map>
|
||||
<map name="TYPE_SIZE_MINUS_ONE">src->cat6.iim_val - 1</map>
|
||||
<map name="SSBO">src->srcs[0]</map>
|
||||
<map name="SSBO_IM">!!(src->srcs[0]->flags & IR3_REG_IMMED)</map>
|
||||
<map name="SRC1">extract_cat6_SRC(src, 0)</map>
|
||||
<map name="SRC1_IM">!!(extract_cat6_SRC(src, 0)->flags & IR3_REG_IMMED)</map>
|
||||
<map name="SRC2">extract_cat6_SRC(src, 1)</map>
|
||||
<map name="SRC2_IM">!!(extract_cat6_SRC(src, 1)->flags & IR3_REG_IMMED)</map>
|
||||
<map name="SRC3">extract_cat6_SRC(src, 2)</map>
|
||||
<map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED)</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic">
|
||||
<bitset name="#instruction-cat6-a3xx-atomic-local" extends="#instruction-cat6-a3xx-atomic">
|
||||
<pattern low="1" high="8" >00000000</pattern> <!-- SRC3 -->
|
||||
<pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
|
||||
<pattern pos="52" >0</pattern> <!-- "G" -->
|
||||
<pattern pos="53" >0</pattern> <!-- SSBO_IM -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic-local">
|
||||
<!-- TODO when asm parser is updated, shift display templates, etc, here -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic">
|
||||
<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic-local">
|
||||
<!-- TODO when asm parser is updated, shift display templates, etc, here -->
|
||||
</bitset>
|
||||
|
||||
|
|
@ -585,6 +581,136 @@ SOFTWARE.
|
|||
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat6-a3xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
|
||||
<doc>
|
||||
Pre-a6xx atomics for Image/SSBO
|
||||
</doc>
|
||||
|
||||
<gen max="599"/>
|
||||
|
||||
<display>
|
||||
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
|
||||
</display>
|
||||
|
||||
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
|
||||
<field low="41" high="48" name="SSBO" type="#cat6-src"> <!-- SSBO/image binding point -->
|
||||
<param name="SSBO_IM" as="SRC_IM"/>
|
||||
</field>
|
||||
<pattern pos="52" >1</pattern> <!-- "G" -->
|
||||
<field pos="53" name="SSBO_IM" type="bool"/>
|
||||
|
||||
<encode>
|
||||
<map name="SSBO">src->srcs[0]</map>
|
||||
<map name="SSBO_IM">!!(src->srcs[0]->flags & IR3_REG_IMMED)</map>
|
||||
<map name="SRC3">extract_cat6_SRC(src, 2)</map>
|
||||
<map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED)</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.add" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10000</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.sub" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10001</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.xchg" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10010</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.inc" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10011</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.dec" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10100</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.cmpxchg" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10101</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.min" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10110</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.max" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">10111</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.and" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">11000</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.or" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">11001</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.s.xor" extends="#instruction-cat6-a3xx-atomic-global">
|
||||
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat6-a6xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
|
||||
<doc>
|
||||
a6xx+ global atomics which take iova in SRC1
|
||||
</doc>
|
||||
|
||||
<gen min="600"/>
|
||||
|
||||
<display>
|
||||
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, {SRC1}, {SRC2}
|
||||
</display>
|
||||
|
||||
<pattern low="1" high="8" >00000000</pattern> <!-- SRC3 -->
|
||||
<pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
|
||||
<pattern pos="52" >1</pattern> <!-- "G" -->
|
||||
<pattern pos="53" >0</pattern> <!-- SSBO_IM -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.add" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10000</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.sub" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10001</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.xchg" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10010</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.inc" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10011</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.dec" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10100</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.cmpxchg" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10101</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.min" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10110</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.max" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">10111</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.and" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">11000</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.or" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">11001</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="atomic.g.xor" extends="#instruction-cat6-a6xx-atomic-global">
|
||||
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<!--
|
||||
New a6xx+ encodings for potentially bindless image/ssbo:
|
||||
|
|
@ -850,11 +976,6 @@ SOFTWARE.
|
|||
{TYPE_SIZE_MINUS_ONE} + 1
|
||||
</expr>
|
||||
|
||||
<!-- Image/SSBO (ie. not local) -->
|
||||
<expr name="#cat6-global">
|
||||
{G}
|
||||
</expr>
|
||||
|
||||
<bitset name="#cat6-typed" size="1">
|
||||
<override>
|
||||
<expr>{TYPED}</expr>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue