mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 02:20:11 +01:00
ir3: Add support for subgroup arithmetic
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14107>
This commit is contained in:
parent
a433db60c1
commit
1a78604d20
7 changed files with 487 additions and 105 deletions
|
|
@ -192,6 +192,7 @@ static const struct opc_info {
|
||||||
OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
|
OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
|
||||||
OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
|
OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
|
||||||
OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
|
OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
|
||||||
|
OPC(1, OPC_SCAN_MACRO, scan.macro),
|
||||||
|
|
||||||
/* category 2: */
|
/* category 2: */
|
||||||
OPC(2, OPC_ADD_F, add.f),
|
OPC(2, OPC_ADD_F, add.f),
|
||||||
|
|
|
||||||
|
|
@ -127,6 +127,9 @@ typedef enum {
|
||||||
OPC_READ_FIRST_MACRO = _OPC(1, 55),
|
OPC_READ_FIRST_MACRO = _OPC(1, 55),
|
||||||
OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
|
OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
|
||||||
|
|
||||||
|
/* Macros that expand to a loop */
|
||||||
|
OPC_SCAN_MACRO = _OPC(1, 57),
|
||||||
|
|
||||||
/* category 2: */
|
/* category 2: */
|
||||||
OPC_ADD_F = _OPC(2, 0),
|
OPC_ADD_F = _OPC(2, 0),
|
||||||
OPC_MIN_F = _OPC(2, 1),
|
OPC_MIN_F = _OPC(2, 1),
|
||||||
|
|
|
||||||
|
|
@ -239,6 +239,22 @@ struct ir3_register {
|
||||||
arr[arr##_count++] = __VA_ARGS__; \
|
arr[arr##_count++] = __VA_ARGS__; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
REDUCE_OP_ADD_U,
|
||||||
|
REDUCE_OP_ADD_F,
|
||||||
|
REDUCE_OP_MUL_U,
|
||||||
|
REDUCE_OP_MUL_F,
|
||||||
|
REDUCE_OP_MIN_U,
|
||||||
|
REDUCE_OP_MIN_S,
|
||||||
|
REDUCE_OP_MIN_F,
|
||||||
|
REDUCE_OP_MAX_U,
|
||||||
|
REDUCE_OP_MAX_S,
|
||||||
|
REDUCE_OP_MAX_F,
|
||||||
|
REDUCE_OP_AND_B,
|
||||||
|
REDUCE_OP_OR_B,
|
||||||
|
REDUCE_OP_XOR_B,
|
||||||
|
} reduce_op_t;
|
||||||
|
|
||||||
struct ir3_instruction {
|
struct ir3_instruction {
|
||||||
struct ir3_block *block;
|
struct ir3_block *block;
|
||||||
opc_t opc;
|
opc_t opc;
|
||||||
|
|
@ -324,6 +340,7 @@ struct ir3_instruction {
|
||||||
struct {
|
struct {
|
||||||
type_t src_type, dst_type;
|
type_t src_type, dst_type;
|
||||||
round_t round;
|
round_t round;
|
||||||
|
reduce_op_t reduce_op;
|
||||||
} cat1;
|
} cat1;
|
||||||
struct {
|
struct {
|
||||||
enum {
|
enum {
|
||||||
|
|
@ -896,6 +913,7 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr)
|
||||||
case OPC_READ_COND_MACRO:
|
case OPC_READ_COND_MACRO:
|
||||||
case OPC_READ_FIRST_MACRO:
|
case OPC_READ_FIRST_MACRO:
|
||||||
case OPC_SWZ_SHARED_MACRO:
|
case OPC_SWZ_SHARED_MACRO:
|
||||||
|
case OPC_SCAN_MACRO:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -1823,6 +1823,148 @@ get_frag_coord(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||||
return ctx->frag_coord;
|
return ctx->frag_coord;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This is a bit of a hack until ir3_context is converted to store SSA values
|
||||||
|
* as ir3_register's instead of ir3_instruction's. Pick out a given destination
|
||||||
|
* of an instruction with multiple destinations using a mov that will get folded
|
||||||
|
* away by ir3_cp.
|
||||||
|
*/
|
||||||
|
static struct ir3_instruction *
|
||||||
|
create_multidst_mov(struct ir3_block *block, struct ir3_register *dst)
|
||||||
|
{
|
||||||
|
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
|
||||||
|
unsigned dst_flags = dst->flags & IR3_REG_HALF;
|
||||||
|
unsigned src_flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
|
||||||
|
|
||||||
|
__ssa_dst(mov)->flags |= dst_flags;
|
||||||
|
struct ir3_register *src =
|
||||||
|
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | src_flags);
|
||||||
|
src->wrmask = dst->wrmask;
|
||||||
|
src->def = dst;
|
||||||
|
debug_assert(!(dst->flags & IR3_REG_RELATIV));
|
||||||
|
mov->cat1.src_type = mov->cat1.dst_type =
|
||||||
|
(dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||||
|
return mov;
|
||||||
|
}
|
||||||
|
|
||||||
|
static reduce_op_t
|
||||||
|
get_reduce_op(nir_op opc)
|
||||||
|
{
|
||||||
|
switch (opc) {
|
||||||
|
case nir_op_iadd: return REDUCE_OP_ADD_U;
|
||||||
|
case nir_op_fadd: return REDUCE_OP_ADD_F;
|
||||||
|
case nir_op_imul: return REDUCE_OP_MUL_U;
|
||||||
|
case nir_op_fmul: return REDUCE_OP_MUL_F;
|
||||||
|
case nir_op_umin: return REDUCE_OP_MIN_U;
|
||||||
|
case nir_op_imin: return REDUCE_OP_MIN_S;
|
||||||
|
case nir_op_fmin: return REDUCE_OP_MIN_F;
|
||||||
|
case nir_op_umax: return REDUCE_OP_MAX_U;
|
||||||
|
case nir_op_imax: return REDUCE_OP_MAX_S;
|
||||||
|
case nir_op_fmax: return REDUCE_OP_MAX_F;
|
||||||
|
case nir_op_iand: return REDUCE_OP_AND_B;
|
||||||
|
case nir_op_ior: return REDUCE_OP_OR_B;
|
||||||
|
case nir_op_ixor: return REDUCE_OP_XOR_B;
|
||||||
|
default:
|
||||||
|
unreachable("unknown NIR reduce op");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
get_reduce_identity(nir_op opc, unsigned size)
|
||||||
|
{
|
||||||
|
switch (opc) {
|
||||||
|
case nir_op_iadd:
|
||||||
|
return 0;
|
||||||
|
case nir_op_fadd:
|
||||||
|
return size == 32 ? fui(0.0f) : _mesa_float_to_half(0.0f);
|
||||||
|
case nir_op_imul:
|
||||||
|
return 1;
|
||||||
|
case nir_op_fmul:
|
||||||
|
return size == 32 ? fui(1.0f) : _mesa_float_to_half(1.0f);
|
||||||
|
case nir_op_umax:
|
||||||
|
return 0;
|
||||||
|
case nir_op_imax:
|
||||||
|
return size == 32 ? INT32_MIN : (uint32_t)INT16_MIN;
|
||||||
|
case nir_op_fmax:
|
||||||
|
return size == 32 ? fui(-INFINITY) : _mesa_float_to_half(-INFINITY);
|
||||||
|
case nir_op_umin:
|
||||||
|
return size == 32 ? UINT32_MAX : UINT16_MAX;
|
||||||
|
case nir_op_imin:
|
||||||
|
return size == 32 ? INT32_MAX : (uint32_t)INT16_MAX;
|
||||||
|
case nir_op_fmin:
|
||||||
|
return size == 32 ? fui(INFINITY) : _mesa_float_to_half(INFINITY);
|
||||||
|
case nir_op_iand:
|
||||||
|
return size == 32 ? ~0 : (size == 16 ? (uint32_t)(uint16_t)~0 : 1);
|
||||||
|
case nir_op_ior:
|
||||||
|
return 0;
|
||||||
|
case nir_op_ixor:
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
unreachable("unknown NIR reduce op");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ir3_instruction *
|
||||||
|
emit_intrinsic_reduce(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||||
|
{
|
||||||
|
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
|
||||||
|
nir_op nir_reduce_op = (nir_op) nir_intrinsic_reduction_op(intr);
|
||||||
|
reduce_op_t reduce_op = get_reduce_op(nir_reduce_op);
|
||||||
|
unsigned dst_size = nir_dest_bit_size(intr->dest);
|
||||||
|
unsigned flags = (ir3_bitsize(ctx, dst_size) == 16) ? IR3_REG_HALF : 0;
|
||||||
|
|
||||||
|
/* Note: the shared reg is initialized to the identity, so we need it to
|
||||||
|
* always be 32-bit even when the source isn't because half shared regs are
|
||||||
|
* not supported.
|
||||||
|
*/
|
||||||
|
struct ir3_instruction *identity =
|
||||||
|
create_immed(ctx->block, get_reduce_identity(nir_reduce_op, dst_size));
|
||||||
|
identity = ir3_READ_FIRST_MACRO(ctx->block, identity, 0);
|
||||||
|
identity->dsts[0]->flags |= IR3_REG_SHARED;
|
||||||
|
|
||||||
|
/* OPC_SCAN_MACRO has the following destinations:
|
||||||
|
* - Exclusive scan result (interferes with source)
|
||||||
|
* - Inclusive scan result
|
||||||
|
* - Shared reg reduction result, must be initialized to the identity
|
||||||
|
*
|
||||||
|
* The loop computes all three results at the same time, we just have to
|
||||||
|
* choose which destination to return.
|
||||||
|
*/
|
||||||
|
struct ir3_instruction *scan =
|
||||||
|
ir3_instr_create(ctx->block, OPC_SCAN_MACRO, 3, 2);
|
||||||
|
scan->cat1.reduce_op = reduce_op;
|
||||||
|
|
||||||
|
struct ir3_register *exclusive = __ssa_dst(scan);
|
||||||
|
exclusive->flags |= flags | IR3_REG_EARLY_CLOBBER;
|
||||||
|
struct ir3_register *inclusive = __ssa_dst(scan);
|
||||||
|
inclusive->flags |= flags;
|
||||||
|
struct ir3_register *reduce = __ssa_dst(scan);
|
||||||
|
reduce->flags |= IR3_REG_SHARED;
|
||||||
|
|
||||||
|
/* The 32-bit multiply macro reads its sources after writing a partial result
|
||||||
|
* to the destination, therefore inclusive also interferes with the source.
|
||||||
|
*/
|
||||||
|
if (reduce_op == REDUCE_OP_MUL_U && dst_size == 32)
|
||||||
|
inclusive->flags |= IR3_REG_EARLY_CLOBBER;
|
||||||
|
|
||||||
|
/* Normal source */
|
||||||
|
__ssa_src(scan, src, 0);
|
||||||
|
|
||||||
|
/* shared reg tied source */
|
||||||
|
struct ir3_register *reduce_init = __ssa_src(scan, identity, IR3_REG_SHARED);
|
||||||
|
ir3_reg_tie(reduce, reduce_init);
|
||||||
|
|
||||||
|
struct ir3_register *dst;
|
||||||
|
switch (intr->intrinsic) {
|
||||||
|
case nir_intrinsic_reduce: dst = reduce; break;
|
||||||
|
case nir_intrinsic_inclusive_scan: dst = inclusive; break;
|
||||||
|
case nir_intrinsic_exclusive_scan: dst = exclusive; break;
|
||||||
|
default:
|
||||||
|
unreachable("unknown reduce intrinsic");
|
||||||
|
}
|
||||||
|
|
||||||
|
return create_multidst_mov(ctx->block, dst);
|
||||||
|
}
|
||||||
|
|
||||||
static void setup_input(struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
static void setup_input(struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
||||||
static void setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
static void setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
||||||
|
|
||||||
|
|
@ -2425,6 +2567,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_reduce:
|
||||||
|
case nir_intrinsic_inclusive_scan:
|
||||||
|
case nir_intrinsic_exclusive_scan:
|
||||||
|
dst[0] = emit_intrinsic_reduce(ctx, intr);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
|
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
|
||||||
nir_intrinsic_infos[intr->intrinsic].name);
|
nir_intrinsic_infos[intr->intrinsic].name);
|
||||||
|
|
|
||||||
|
|
@ -71,14 +71,106 @@ mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
|
||||||
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
|
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mov_reg(struct ir3_block *block, struct ir3_register *dst,
|
||||||
|
struct ir3_register *src)
|
||||||
|
{
|
||||||
|
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
|
||||||
|
|
||||||
|
struct ir3_register *mov_dst =
|
||||||
|
ir3_dst_create(mov, dst->num, dst->flags & (IR3_REG_HALF | IR3_REG_SHARED));
|
||||||
|
struct ir3_register *mov_src =
|
||||||
|
ir3_src_create(mov, src->num, src->flags & (IR3_REG_HALF | IR3_REG_SHARED));
|
||||||
|
mov_dst->wrmask = dst->wrmask;
|
||||||
|
mov_src->wrmask = src->wrmask;
|
||||||
|
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
|
||||||
|
|
||||||
|
mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||||
|
mov->cat1.src_type = (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
binop(struct ir3_block *block, opc_t opc, struct ir3_register *dst,
|
||||||
|
struct ir3_register *src0, struct ir3_register *src1)
|
||||||
|
{
|
||||||
|
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2);
|
||||||
|
|
||||||
|
unsigned flags = dst->flags & IR3_REG_HALF;
|
||||||
|
struct ir3_register *instr_dst = ir3_dst_create(instr, dst->num, flags);
|
||||||
|
struct ir3_register *instr_src0 = ir3_src_create(instr, src0->num, flags);
|
||||||
|
struct ir3_register *instr_src1 = ir3_src_create(instr, src1->num, flags);
|
||||||
|
|
||||||
|
instr_dst->wrmask = dst->wrmask;
|
||||||
|
instr_src0->wrmask = src0->wrmask;
|
||||||
|
instr_src1->wrmask = src1->wrmask;
|
||||||
|
instr->repeat = util_last_bit(instr_dst->wrmask) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
triop(struct ir3_block *block, opc_t opc, struct ir3_register *dst,
|
||||||
|
struct ir3_register *src0, struct ir3_register *src1,
|
||||||
|
struct ir3_register *src2)
|
||||||
|
{
|
||||||
|
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 3);
|
||||||
|
|
||||||
|
unsigned flags = dst->flags & IR3_REG_HALF;
|
||||||
|
struct ir3_register *instr_dst = ir3_dst_create(instr, dst->num, flags);
|
||||||
|
struct ir3_register *instr_src0 = ir3_src_create(instr, src0->num, flags);
|
||||||
|
struct ir3_register *instr_src1 = ir3_src_create(instr, src1->num, flags);
|
||||||
|
struct ir3_register *instr_src2 = ir3_src_create(instr, src2->num, flags);
|
||||||
|
|
||||||
|
instr_dst->wrmask = dst->wrmask;
|
||||||
|
instr_src0->wrmask = src0->wrmask;
|
||||||
|
instr_src1->wrmask = src1->wrmask;
|
||||||
|
instr_src2->wrmask = src2->wrmask;
|
||||||
|
instr->repeat = util_last_bit(instr_dst->wrmask) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_reduce(struct ir3_block *block, reduce_op_t opc,
|
||||||
|
struct ir3_register *dst, struct ir3_register *src0,
|
||||||
|
struct ir3_register *src1)
|
||||||
|
{
|
||||||
|
switch (opc) {
|
||||||
|
#define CASE(name) \
|
||||||
|
case REDUCE_OP_##name: \
|
||||||
|
binop(block, OPC_##name, dst, src0, src1); \
|
||||||
|
break;
|
||||||
|
|
||||||
|
CASE(ADD_U)
|
||||||
|
CASE(ADD_F)
|
||||||
|
CASE(MUL_F)
|
||||||
|
CASE(MIN_U)
|
||||||
|
CASE(MIN_S)
|
||||||
|
CASE(MIN_F)
|
||||||
|
CASE(MAX_U)
|
||||||
|
CASE(MAX_S)
|
||||||
|
CASE(MAX_F)
|
||||||
|
CASE(AND_B)
|
||||||
|
CASE(OR_B)
|
||||||
|
CASE(XOR_B)
|
||||||
|
|
||||||
|
#undef CASE
|
||||||
|
|
||||||
|
case REDUCE_OP_MUL_U:
|
||||||
|
if (dst->flags & IR3_REG_HALF) {
|
||||||
|
binop(block, OPC_MUL_S24, dst, src0, src1);
|
||||||
|
} else {
|
||||||
|
/* 32-bit multiplication macro - see ir3_nir_imul */
|
||||||
|
binop(block, OPC_MULL_U, dst, src0, src1);
|
||||||
|
triop(block, OPC_MADSH_M16, dst, src0, src1, dst);
|
||||||
|
triop(block, OPC_MADSH_M16, dst, src1, src0, dst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static struct ir3_block *
|
static struct ir3_block *
|
||||||
split_block(struct ir3 *ir, struct ir3_block *before_block,
|
split_block(struct ir3 *ir, struct ir3_block *before_block,
|
||||||
struct ir3_instruction *instr, struct ir3_block **then)
|
struct ir3_instruction *instr)
|
||||||
{
|
{
|
||||||
struct ir3_block *then_block = ir3_block_create(ir);
|
|
||||||
struct ir3_block *after_block = ir3_block_create(ir);
|
struct ir3_block *after_block = ir3_block_create(ir);
|
||||||
list_add(&then_block->node, &before_block->node);
|
list_add(&after_block->node, &before_block->node);
|
||||||
list_add(&after_block->node, &then_block->node);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
|
for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
|
||||||
after_block->successors[i] = before_block->successors[i];
|
after_block->successors[i] = before_block->successors[i];
|
||||||
|
|
@ -96,19 +188,8 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
before_block->successors[0] = then_block;
|
before_block->successors[0] = before_block->successors[1] = NULL;
|
||||||
before_block->successors[1] = after_block;
|
before_block->physical_successors[0] = before_block->physical_successors[1] = NULL;
|
||||||
before_block->physical_successors[0] = then_block;
|
|
||||||
before_block->physical_successors[1] = after_block;
|
|
||||||
ir3_block_add_predecessor(then_block, before_block);
|
|
||||||
ir3_block_add_predecessor(after_block, before_block);
|
|
||||||
ir3_block_add_physical_predecessor(then_block, before_block);
|
|
||||||
ir3_block_add_physical_predecessor(after_block, before_block);
|
|
||||||
|
|
||||||
then_block->successors[0] = after_block;
|
|
||||||
then_block->physical_successors[0] = after_block;
|
|
||||||
ir3_block_add_predecessor(after_block, then_block);
|
|
||||||
ir3_block_add_physical_predecessor(after_block, then_block);
|
|
||||||
|
|
||||||
foreach_instr_from_safe (rem_instr, &instr->node,
|
foreach_instr_from_safe (rem_instr, &instr->node,
|
||||||
&before_block->instr_list) {
|
&before_block->instr_list) {
|
||||||
|
|
@ -120,10 +201,39 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
||||||
after_block->brtype = before_block->brtype;
|
after_block->brtype = before_block->brtype;
|
||||||
after_block->condition = before_block->condition;
|
after_block->condition = before_block->condition;
|
||||||
|
|
||||||
*then = then_block;
|
|
||||||
return after_block;
|
return after_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
link_blocks_physical(struct ir3_block *pred, struct ir3_block *succ,
|
||||||
|
unsigned index)
|
||||||
|
{
|
||||||
|
pred->physical_successors[index] = succ;
|
||||||
|
ir3_block_add_physical_predecessor(succ, pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index)
|
||||||
|
{
|
||||||
|
pred->successors[index] = succ;
|
||||||
|
ir3_block_add_predecessor(succ, pred);
|
||||||
|
link_blocks_physical(pred, succ, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ir3_block *
|
||||||
|
create_if(struct ir3 *ir, struct ir3_block *before_block,
|
||||||
|
struct ir3_block *after_block)
|
||||||
|
{
|
||||||
|
struct ir3_block *then_block = ir3_block_create(ir);
|
||||||
|
list_add(&then_block->node, &before_block->node);
|
||||||
|
|
||||||
|
link_blocks(before_block, then_block, 0);
|
||||||
|
link_blocks(before_block, after_block, 1);
|
||||||
|
link_blocks(then_block, after_block, 0);
|
||||||
|
|
||||||
|
return then_block;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *instr)
|
lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *instr)
|
||||||
{
|
{
|
||||||
|
|
@ -135,106 +245,156 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in
|
||||||
case OPC_READ_COND_MACRO:
|
case OPC_READ_COND_MACRO:
|
||||||
case OPC_READ_FIRST_MACRO:
|
case OPC_READ_FIRST_MACRO:
|
||||||
case OPC_SWZ_SHARED_MACRO:
|
case OPC_SWZ_SHARED_MACRO:
|
||||||
|
case OPC_SCAN_MACRO:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ir3_block *before_block = *block;
|
struct ir3_block *before_block = *block;
|
||||||
struct ir3_block *then_block;
|
struct ir3_block *after_block = split_block(ir, before_block, instr);
|
||||||
struct ir3_block *after_block =
|
|
||||||
split_block(ir, before_block, instr, &then_block);
|
|
||||||
|
|
||||||
/* For ballot, the destination must be initialized to 0 before we do
|
if (instr->opc == OPC_SCAN_MACRO) {
|
||||||
* the movmsk because the condition may be 0 and then the movmsk will
|
/* The pseudo-code for the scan macro is:
|
||||||
* be skipped. Because it's a shared register we have to wrap the
|
*
|
||||||
* initialization in a getone block.
|
* while (true) {
|
||||||
*/
|
* header:
|
||||||
if (instr->opc == OPC_BALLOT_MACRO) {
|
* if (elect()) {
|
||||||
before_block->brtype = IR3_BRANCH_GETONE;
|
* exit:
|
||||||
before_block->condition = NULL;
|
* exclusive = reduce;
|
||||||
mov_immed(instr->dsts[0], then_block, 0);
|
* inclusive = src OP exclusive;
|
||||||
before_block = after_block;
|
* reduce = inclusive;
|
||||||
after_block = split_block(ir, before_block, instr, &then_block);
|
* }
|
||||||
}
|
* footer:
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* This is based on the blob's sequence, and carefully crafted to avoid
|
||||||
|
* using the shared register "reduce" except in move instructions, since
|
||||||
|
* using it in the actual OP isn't possible for half-registers.
|
||||||
|
*/
|
||||||
|
struct ir3_block *header = ir3_block_create(ir);
|
||||||
|
list_add(&header->node, &before_block->node);
|
||||||
|
|
||||||
switch (instr->opc) {
|
struct ir3_block *exit = ir3_block_create(ir);
|
||||||
case OPC_BALLOT_MACRO:
|
list_add(&exit->node, &header->node);
|
||||||
case OPC_READ_COND_MACRO:
|
|
||||||
case OPC_ANY_MACRO:
|
|
||||||
case OPC_ALL_MACRO:
|
|
||||||
before_block->condition = instr->srcs[0]->def->instr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
before_block->condition = NULL;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (instr->opc) {
|
struct ir3_block *footer = ir3_block_create(ir);
|
||||||
case OPC_BALLOT_MACRO:
|
list_add(&footer->node, &exit->node);
|
||||||
case OPC_READ_COND_MACRO:
|
|
||||||
before_block->brtype = IR3_BRANCH_COND;
|
|
||||||
break;
|
|
||||||
case OPC_ANY_MACRO:
|
|
||||||
before_block->brtype = IR3_BRANCH_ANY;
|
|
||||||
break;
|
|
||||||
case OPC_ALL_MACRO:
|
|
||||||
before_block->brtype = IR3_BRANCH_ALL;
|
|
||||||
break;
|
|
||||||
case OPC_ELECT_MACRO:
|
|
||||||
case OPC_READ_FIRST_MACRO:
|
|
||||||
case OPC_SWZ_SHARED_MACRO:
|
|
||||||
before_block->brtype = IR3_BRANCH_GETONE;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable("bad opcode");
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (instr->opc) {
|
link_blocks(before_block, header, 0);
|
||||||
case OPC_ALL_MACRO:
|
|
||||||
case OPC_ANY_MACRO:
|
|
||||||
case OPC_ELECT_MACRO:
|
|
||||||
mov_immed(instr->dsts[0], then_block, 1);
|
|
||||||
mov_immed(instr->dsts[0], before_block, 0);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPC_BALLOT_MACRO: {
|
link_blocks(header, exit, 0);
|
||||||
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
|
link_blocks(header, footer, 1);
|
||||||
struct ir3_instruction *movmsk =
|
header->brtype = IR3_BRANCH_GETONE;
|
||||||
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
|
|
||||||
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
|
|
||||||
movmsk->repeat = comp_count - 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case OPC_READ_COND_MACRO:
|
link_blocks(exit, after_block, 0);
|
||||||
case OPC_READ_FIRST_MACRO: {
|
link_blocks_physical(exit, footer, 1);
|
||||||
struct ir3_instruction *mov =
|
|
||||||
ir3_instr_create(then_block, OPC_MOV, 1, 1);
|
|
||||||
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
|
|
||||||
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
|
|
||||||
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
|
|
||||||
*new_src = *instr->srcs[src];
|
|
||||||
mov->cat1.dst_type = TYPE_U32;
|
|
||||||
mov->cat1.src_type =
|
|
||||||
(new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case OPC_SWZ_SHARED_MACRO: {
|
link_blocks(footer, header, 0);
|
||||||
struct ir3_instruction *swz =
|
|
||||||
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
|
|
||||||
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
|
|
||||||
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
|
|
||||||
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
|
|
||||||
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
|
|
||||||
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
|
|
||||||
swz->repeat = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
struct ir3_register *exclusive = instr->dsts[0];
|
||||||
unreachable("bad opcode");
|
struct ir3_register *inclusive = instr->dsts[1];
|
||||||
|
struct ir3_register *reduce = instr->dsts[2];
|
||||||
|
struct ir3_register *src = instr->srcs[0];
|
||||||
|
|
||||||
|
mov_reg(exit, exclusive, reduce);
|
||||||
|
do_reduce(exit, instr->cat1.reduce_op, inclusive, src, exclusive);
|
||||||
|
mov_reg(exit, reduce, inclusive);
|
||||||
|
} else {
|
||||||
|
struct ir3_block *then_block = create_if(ir, before_block, after_block);
|
||||||
|
|
||||||
|
/* For ballot, the destination must be initialized to 0 before we do
|
||||||
|
* the movmsk because the condition may be 0 and then the movmsk will
|
||||||
|
* be skipped. Because it's a shared register we have to wrap the
|
||||||
|
* initialization in a getone block.
|
||||||
|
*/
|
||||||
|
if (instr->opc == OPC_BALLOT_MACRO) {
|
||||||
|
before_block->brtype = IR3_BRANCH_GETONE;
|
||||||
|
before_block->condition = NULL;
|
||||||
|
mov_immed(instr->dsts[0], then_block, 0);
|
||||||
|
before_block = after_block;
|
||||||
|
after_block = split_block(ir, before_block, instr);
|
||||||
|
then_block = create_if(ir, before_block, after_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr->opc) {
|
||||||
|
case OPC_BALLOT_MACRO:
|
||||||
|
case OPC_READ_COND_MACRO:
|
||||||
|
case OPC_ANY_MACRO:
|
||||||
|
case OPC_ALL_MACRO:
|
||||||
|
before_block->condition = instr->srcs[0]->def->instr;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
before_block->condition = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr->opc) {
|
||||||
|
case OPC_BALLOT_MACRO:
|
||||||
|
case OPC_READ_COND_MACRO:
|
||||||
|
before_block->brtype = IR3_BRANCH_COND;
|
||||||
|
break;
|
||||||
|
case OPC_ANY_MACRO:
|
||||||
|
before_block->brtype = IR3_BRANCH_ANY;
|
||||||
|
break;
|
||||||
|
case OPC_ALL_MACRO:
|
||||||
|
before_block->brtype = IR3_BRANCH_ALL;
|
||||||
|
break;
|
||||||
|
case OPC_ELECT_MACRO:
|
||||||
|
case OPC_READ_FIRST_MACRO:
|
||||||
|
case OPC_SWZ_SHARED_MACRO:
|
||||||
|
before_block->brtype = IR3_BRANCH_GETONE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("bad opcode");
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr->opc) {
|
||||||
|
case OPC_ALL_MACRO:
|
||||||
|
case OPC_ANY_MACRO:
|
||||||
|
case OPC_ELECT_MACRO:
|
||||||
|
mov_immed(instr->dsts[0], then_block, 1);
|
||||||
|
mov_immed(instr->dsts[0], before_block, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OPC_BALLOT_MACRO: {
|
||||||
|
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
|
||||||
|
struct ir3_instruction *movmsk =
|
||||||
|
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
|
||||||
|
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||||
|
movmsk->repeat = comp_count - 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case OPC_READ_COND_MACRO:
|
||||||
|
case OPC_READ_FIRST_MACRO: {
|
||||||
|
struct ir3_instruction *mov =
|
||||||
|
ir3_instr_create(then_block, OPC_MOV, 1, 1);
|
||||||
|
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
|
||||||
|
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||||
|
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
|
||||||
|
*new_src = *instr->srcs[src];
|
||||||
|
mov->cat1.dst_type = TYPE_U32;
|
||||||
|
mov->cat1.src_type =
|
||||||
|
(new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case OPC_SWZ_SHARED_MACRO: {
|
||||||
|
struct ir3_instruction *swz =
|
||||||
|
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
|
||||||
|
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||||
|
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
|
||||||
|
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
|
||||||
|
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
|
||||||
|
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
|
||||||
|
swz->repeat = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("bad opcode");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*block = after_block;
|
*block = after_block;
|
||||||
|
|
|
||||||
|
|
@ -137,7 +137,51 @@ print_instr_name(struct log_stream *stream, struct ir3_instruction *instr,
|
||||||
disasm_a3xx_instr_name(instr->opc));
|
disasm_a3xx_instr_name(instr->opc));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instr->opc != OPC_MOVMSK) {
|
if (instr->opc == OPC_SCAN_MACRO) {
|
||||||
|
switch (instr->cat1.reduce_op) {
|
||||||
|
case REDUCE_OP_ADD_U:
|
||||||
|
mesa_log_stream_printf(stream, ".add.u");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_ADD_F:
|
||||||
|
mesa_log_stream_printf(stream, ".add.f");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MUL_U:
|
||||||
|
mesa_log_stream_printf(stream, ".mul.u");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MUL_F:
|
||||||
|
mesa_log_stream_printf(stream, ".mul.f");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MIN_U:
|
||||||
|
mesa_log_stream_printf(stream, ".min.u");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MIN_S:
|
||||||
|
mesa_log_stream_printf(stream, ".min.s");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MIN_F:
|
||||||
|
mesa_log_stream_printf(stream, ".min.f");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MAX_U:
|
||||||
|
mesa_log_stream_printf(stream, ".max.u");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MAX_S:
|
||||||
|
mesa_log_stream_printf(stream, ".max.s");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_MAX_F:
|
||||||
|
mesa_log_stream_printf(stream, ".max.f");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_AND_B:
|
||||||
|
mesa_log_stream_printf(stream, ".and.b");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_OR_B:
|
||||||
|
mesa_log_stream_printf(stream, ".or.b");
|
||||||
|
break;
|
||||||
|
case REDUCE_OP_XOR_B:
|
||||||
|
mesa_log_stream_printf(stream, ".xor.b");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instr->opc != OPC_MOVMSK && instr->opc != OPC_SCAN_MACRO) {
|
||||||
mesa_log_stream_printf(stream, ".%s%s",
|
mesa_log_stream_printf(stream, ".%s%s",
|
||||||
type_name(instr->cat1.src_type),
|
type_name(instr->cat1.src_type),
|
||||||
type_name(instr->cat1.dst_type));
|
type_name(instr->cat1.dst_type));
|
||||||
|
|
|
||||||
|
|
@ -238,6 +238,14 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
||||||
} else if (instr->opc == OPC_ELECT_MACRO) {
|
} else if (instr->opc == OPC_ELECT_MACRO) {
|
||||||
validate_assert(ctx, instr->dsts_count == 1);
|
validate_assert(ctx, instr->dsts_count == 1);
|
||||||
validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED));
|
validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED));
|
||||||
|
} else if (instr->opc == OPC_SCAN_MACRO) {
|
||||||
|
validate_assert(ctx, instr->dsts_count == 3);
|
||||||
|
validate_assert(ctx, instr->srcs_count == 2);
|
||||||
|
validate_assert(ctx, reg_class_flags(instr->dsts[0]) ==
|
||||||
|
reg_class_flags(instr->srcs[0]));
|
||||||
|
validate_assert(ctx, reg_class_flags(instr->dsts[1]) ==
|
||||||
|
reg_class_flags(instr->srcs[0]));
|
||||||
|
validate_assert(ctx, reg_class_flags(instr->dsts[2]) == IR3_REG_SHARED);
|
||||||
} else {
|
} else {
|
||||||
foreach_dst (dst, instr)
|
foreach_dst (dst, instr)
|
||||||
validate_reg_size(ctx, dst, instr->cat1.dst_type);
|
validate_reg_size(ctx, dst, instr->cat1.dst_type);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue