mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
freedreno/ir3: shared variable support
Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
dd75abc6f3
commit
eaae81058c
3 changed files with 177 additions and 2 deletions
|
|
@ -1432,6 +1432,149 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
return atomic;
|
||||
}
|
||||
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
static void
|
||||
emit_intrinsic_load_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *ldl, *offset;
|
||||
unsigned base;
|
||||
|
||||
offset = get_src(ctx, &intr->src[0])[0];
|
||||
base = intr->const_index[0];
|
||||
|
||||
ldl = ir3_LDL(b, offset, 0, create_immed(b, intr->num_components), 0);
|
||||
ldl->cat6.src_offset = base;
|
||||
ldl->cat6.type = TYPE_U32;
|
||||
ldl->regs[0]->wrmask = MASK(intr->num_components);
|
||||
|
||||
mark_read(ctx, ldl);
|
||||
|
||||
split_dest(b, dst, ldl, 0, intr->num_components);
|
||||
}
|
||||
|
||||
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
|
||||
static void
|
||||
emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *stl, *offset;
|
||||
struct ir3_instruction * const *value;
|
||||
unsigned base, wrmask;
|
||||
|
||||
value = get_src(ctx, &intr->src[0]);
|
||||
offset = get_src(ctx, &intr->src[1])[0];
|
||||
|
||||
base = intr->const_index[0];
|
||||
wrmask = intr->const_index[1];
|
||||
|
||||
/* Combine groups of consecutive enabled channels in one write
|
||||
* message. We use ffs to find the first enabled channel and then ffs on
|
||||
* the bit-inverse, down-shifted writemask to determine the length of
|
||||
* the block of enabled bits.
|
||||
*
|
||||
* (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
|
||||
*/
|
||||
while (wrmask) {
|
||||
unsigned first_component = ffs(wrmask) - 1;
|
||||
unsigned length = ffs(~(wrmask >> first_component)) - 1;
|
||||
|
||||
stl = ir3_STL(b, offset, 0,
|
||||
create_collect(b, &value[first_component], length), 0,
|
||||
create_immed(b, length), 0);
|
||||
stl->cat6.dst_offset = first_component + base;
|
||||
stl->cat6.type = TYPE_U32;
|
||||
|
||||
mark_write(ctx, stl);
|
||||
array_insert(b, b->keeps, stl);
|
||||
|
||||
/* Clear the bits in the writemask that we just wrote, then try
|
||||
* again to see if more channels are left.
|
||||
*/
|
||||
wrmask &= (15 << (first_component + length));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* CS shared variable atomic intrinsics
|
||||
*
|
||||
* All of the shared variable atomic memory operations read a value from
|
||||
* memory, compute a new value using one of the operations below, write the
|
||||
* new value to memory, and return the original value read.
|
||||
*
|
||||
* All operations take 2 sources except CompSwap that takes 3. These
|
||||
* sources represent:
|
||||
*
|
||||
* 0: The offset into the shared variable storage region that the atomic
|
||||
* operation will operate on.
|
||||
* 1: The data parameter to the atomic function (i.e. the value to add
|
||||
* in shared_atomic_add, etc).
|
||||
* 2: For CompSwap only: the second data parameter.
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *atomic, *src0, *src1;
|
||||
type_t type = TYPE_U32;
|
||||
|
||||
src0 = get_src(ctx, &intr->src[0])[0]; /* offset */
|
||||
src1 = get_src(ctx, &intr->src[1])[0]; /* value */
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_shared_atomic_add:
|
||||
atomic = ir3_ATOMIC_ADD(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_imin:
|
||||
atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_umin:
|
||||
atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_imax:
|
||||
atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_umax:
|
||||
atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_and:
|
||||
atomic = ir3_ATOMIC_AND(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_or:
|
||||
atomic = ir3_ATOMIC_OR(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_xor:
|
||||
atomic = ir3_ATOMIC_XOR(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_exchange:
|
||||
atomic = ir3_ATOMIC_XCHG(b, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_comp_swap:
|
||||
/* for cmpxchg, src1 is [ui]vec2(data, compare): */
|
||||
src1 = create_collect(b, (struct ir3_instruction*[]){
|
||||
get_src(ctx, &intr->src[2])[0],
|
||||
src1,
|
||||
}, 2);
|
||||
atomic = ir3_ATOMIC_CMPXCHG(b, src0, 0, src1, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable("boo");
|
||||
}
|
||||
|
||||
atomic->cat6.iim_val = 1;
|
||||
atomic->cat6.d = 1;
|
||||
atomic->cat6.type = type;
|
||||
mark_write(ctx, atomic);
|
||||
|
||||
/* even if nothing consume the result, we can't DCE the instruction: */
|
||||
array_insert(b, b->keeps, atomic);
|
||||
|
||||
return atomic;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
|
|
@ -1586,6 +1729,24 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_load_shared:
|
||||
emit_intrinsic_load_shared(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_store_shared:
|
||||
emit_intrinsic_store_shared(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_shared_atomic_add:
|
||||
case nir_intrinsic_shared_atomic_imin:
|
||||
case nir_intrinsic_shared_atomic_umin:
|
||||
case nir_intrinsic_shared_atomic_imax:
|
||||
case nir_intrinsic_shared_atomic_umax:
|
||||
case nir_intrinsic_shared_atomic_and:
|
||||
case nir_intrinsic_shared_atomic_or:
|
||||
case nir_intrinsic_shared_atomic_xor:
|
||||
case nir_intrinsic_shared_atomic_exchange:
|
||||
case nir_intrinsic_shared_atomic_comp_swap:
|
||||
dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_barrier:
|
||||
case nir_intrinsic_memory_barrier:
|
||||
case nir_intrinsic_group_memory_barrier:
|
||||
|
|
|
|||
|
|
@ -194,11 +194,20 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
|
|||
if (is_store(instr) && (n == 1))
|
||||
return false;
|
||||
|
||||
if ((instr->opc == OPC_LDL) && (n != 1))
|
||||
return false;
|
||||
|
||||
if ((instr->opc == OPC_STL) && (n != 2))
|
||||
return false;
|
||||
|
||||
/* disallow CP into anything but the SSBO slot argument for
|
||||
* atomics:
|
||||
*/
|
||||
if (is_atomic(instr->opc) && (n != 0))
|
||||
return false;
|
||||
|
||||
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -191,13 +191,18 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
/* seems like ldlv needs (ss) bit instead?? which is odd but
|
||||
* makes a bunch of flat-varying tests start working on a4xx.
|
||||
*/
|
||||
if (n->opc == OPC_LDLV)
|
||||
if ((n->opc == OPC_LDLV) || (n->opc == OPC_LDL))
|
||||
regmask_set(&needs_ss, n->regs[0]);
|
||||
else
|
||||
regmask_set(&needs_sy, n->regs[0]);
|
||||
} else if (is_atomic(n->opc)) {
|
||||
if (n->flags & IR3_INSTR_G)
|
||||
regmask_set(&needs_sy, n->regs[0]);
|
||||
else
|
||||
regmask_set(&needs_ss, n->regs[0]);
|
||||
}
|
||||
|
||||
if ((n->opc == OPC_LDGB) || (n->opc == OPC_STGB) || is_atomic(n->opc))
|
||||
if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
|
||||
ctx->has_ssbo = true;
|
||||
|
||||
/* both tex/sfu appear to not always immediately consume
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue