nir_to_tgsi: Track our TGSI insns in blocks before emitting tokens.

To do register allocation well, we want to have a point before
ureg_insn_emit() to look at the liveness of the values and allocate them
to TGSI temporaries.  In order to do that, we have to switch from
ureg_OPCODE() emitting TGSI tokens directly to a new ntt_OPCODE() that
stores the ureg args in a block structure.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14404>
This commit is contained in:
Emma Anholt 2022-01-03 13:45:28 -08:00 committed by Marge Bot
parent 3f84c67af8
commit f4ce3178d9
2 changed files with 404 additions and 159 deletions

View file

@ -33,6 +33,28 @@
#include "util/debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_dynarray.h"
struct ntt_insn {
enum tgsi_opcode opcode;
struct ureg_dst dst[2];
struct ureg_src src[4];
enum tgsi_texture_type tex_target;
enum tgsi_return_type tex_return_type;
struct tgsi_texture_offset tex_offset;
unsigned mem_qualifier;
enum pipe_format mem_format;
bool is_tex : 1;
bool is_mem : 1;
bool precise : 1;
};
struct ntt_block {
/* Array of struct ntt_insn */
struct util_dynarray insns;
};
struct ntt_compile {
nir_shader *s;
@ -57,6 +79,15 @@ struct ntt_compile {
nir_instr_liveness *liveness;
/* Map from nir_block to ntt_block */
struct hash_table *blocks;
struct ntt_block *cur_block;
unsigned current_if_else;
unsigned cf_label;
/* Whether we're currently emitting instructiosn for a precise NIR instruction. */
bool precise;
/* Mappings from driver_location to TGSI input/output number.
*
* We'll be declaring TGSI input/outputs in an arbitrary order, and they get
@ -70,7 +101,99 @@ struct ntt_compile {
struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
};
static struct ntt_block *
ntt_block_from_nir(struct ntt_compile *c, struct nir_block *block)
{
struct hash_entry *entry = _mesa_hash_table_search(c->blocks, block);
return entry->data;
}
static void ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list);
static void ntt_emit_cf_list_ureg(struct ntt_compile *c, struct exec_list *list);
static struct ntt_insn *
ntt_insn(struct ntt_compile *c, enum tgsi_opcode opcode,
struct ureg_dst dst,
struct ureg_src src0, struct ureg_src src1,
struct ureg_src src2, struct ureg_src src3)
{
struct ntt_insn insn = {
.opcode = opcode,
.dst = { dst, ureg_dst_undef() },
.src = { src0, src1, src2, src3 },
.precise = c->precise,
};
util_dynarray_append(&c->cur_block->insns, struct ntt_insn, insn);
return util_dynarray_top_ptr(&c->cur_block->insns, struct ntt_insn);
}
#define OP00( op ) \
static inline void ntt_##op(struct ntt_compile *c) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
}
#define OP01( op ) \
static inline void ntt_##op(struct ntt_compile *c, \
struct ureg_src src0) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
}
#define OP10( op ) \
static inline void ntt_##op(struct ntt_compile *c, \
struct ureg_dst tgsi_dst_register) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, dst, ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
}
#define OP11( op ) \
static inline void ntt_##op(struct ntt_compile *c, \
struct ureg_dst dst, \
struct ureg_src src0) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, dst, src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
}
#define OP12( op ) \
static inline void ntt_##op(struct ntt_compile *c, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, dst, src0, src1, ureg_src_undef(), ureg_src_undef()); \
}
#define OP13( op ) \
static inline void ntt_##op(struct ntt_compile *c, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
struct ureg_src src2) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, ureg_src_undef()); \
}
#define OP14( op ) \
static inline void ntt_##op(struct ntt_compile *c, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
struct ureg_src src2, \
struct ureg_src src3) \
{ \
ntt_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, src3); \
}
/* We hand-craft our tex instructions */
#define OP12_TEX(op)
#define OP14_TEX(op)
/* Use a template include to generate a correctly-typed ntt_OP()
* function for each TGSI opcode:
*/
#include "gallium/auxiliary/tgsi/tgsi_opcode_tmp.h"
/**
* Interprets a nir_load_const used as a NIR src as a uint.
@ -375,7 +498,7 @@ ntt_setup_inputs(struct ntt_compile *c)
struct ureg_dst temp = ureg_DECL_temporary(c->ureg);
if (c->native_integers) {
/* NIR is ~0 front and 0 back, while TGSI is +1 front */
ureg_SGE(c->ureg, temp, decl, ureg_imm1f(c->ureg, 0));
ntt_SGE(c, temp, decl, ureg_imm1f(c->ureg, 0));
} else {
/* tgsi docs say that floating point FACE will be positive for
* frontface and negative for backface, but realistically
@ -385,7 +508,7 @@ ntt_setup_inputs(struct ntt_compile *c)
* front face).
*/
temp.Saturate = true;
ureg_MOV(c->ureg, temp, decl);
ntt_MOV(c, temp, decl);
}
decl = ureg_src(temp);
@ -643,9 +766,9 @@ ntt_reladdr(struct ntt_compile *c, struct ureg_src addr, int addr_index)
}
if (c->native_integers)
ureg_UARL(c->ureg, c->addr_reg[addr_index], addr);
ntt_UARL(c, c->addr_reg[addr_index], addr);
else
ureg_ARL(c->ureg, c->addr_reg[addr_index], addr);
ntt_ARL(c, c->addr_reg[addr_index], addr);
return ureg_scalar(ureg_src(c->addr_reg[addr_index]), 0);
}
@ -781,7 +904,7 @@ ntt_store_def(struct ntt_compile *c, nir_ssa_def *def, struct ureg_src src)
}
}
ureg_MOV(c->ureg, ntt_get_ssa_def_decl(c, def), src);
ntt_MOV(c, ntt_get_ssa_def_decl(c, def), src);
}
static void
@ -791,7 +914,7 @@ ntt_store(struct ntt_compile *c, nir_dest *dest, struct ureg_src src)
ntt_store_def(c, &dest->ssa, src);
else {
struct ureg_dst dst = ntt_get_dest(c, dest);
ureg_MOV(c->ureg, dst, src);
ntt_MOV(c, dst, src);
}
}
@ -802,26 +925,18 @@ ntt_emit_scalar(struct ntt_compile *c, unsigned tgsi_op,
struct ureg_src src1)
{
unsigned i;
int num_src;
/* POW is the only 2-operand scalar op. */
if (tgsi_op == TGSI_OPCODE_POW) {
num_src = 2;
} else {
num_src = 1;
if (tgsi_op != TGSI_OPCODE_POW)
src1 = src0;
}
for (i = 0; i < 4; i++) {
if (dst.WriteMask & (1 << i)) {
struct ureg_dst this_dst = dst;
struct ureg_src srcs[2] = {
ureg_scalar(src0, i),
ureg_scalar(src1, i),
};
this_dst.WriteMask = (1 << i);
ureg_insn(c->ureg, tgsi_op, &this_dst, 1, srcs, num_src, false);
ntt_insn(c, tgsi_op,
ureg_writemask(dst, 1 << i),
ureg_scalar(src0, i),
ureg_scalar(src1, i),
ureg_src_undef(), ureg_src_undef());
}
}
}
@ -836,11 +951,14 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
int src_64 = nir_src_bit_size(instr->src[0].src) == 64;
int num_srcs = nir_op_infos[instr->op].num_inputs;
ureg_set_precise(c->ureg, instr->exact);
c->precise = instr->exact;
assert(num_srcs <= ARRAY_SIZE(src));
for (i = 0; i < num_srcs; i++)
src[i] = ntt_get_alu_src(c, instr, i);
for (; i < ARRAY_SIZE(src); i++)
src[i] = ureg_src_undef();
dst = ntt_get_dest(c, &instr->dest.dest);
if (instr->dest.saturate)
@ -983,8 +1101,8 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
bool table_op64 = src_64;
if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op][table_op64] != 0) {
/* The normal path for NIR to TGSI ALU op translation */
ureg_insn(c->ureg, op_map[instr->op][table_op64],
&dst, 1, src, num_srcs, false);
ntt_insn(c, op_map[instr->op][table_op64],
dst, src[0], src[1], src[2], src[3]);
} else {
/* Special cases for NIR to TGSI ALU op translation. */
@ -994,7 +1112,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
switch (instr->op) {
case nir_op_u2u64:
ureg_AND(c->ureg, dst, ureg_swizzle(src[0],
ntt_AND(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
ureg_imm4u(c->ureg, ~0, 0, ~0, 0));
@ -1003,29 +1121,29 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
case nir_op_i2i32:
case nir_op_u2u32:
assert(src_64);
ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
ntt_MOV(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X));
break;
case nir_op_fabs:
if (c->options->lower_fabs)
ureg_MAX(c->ureg, dst, src[0], ureg_negate(src[0]));
ntt_MAX(c, dst, src[0], ureg_negate(src[0]));
else
ureg_MOV(c->ureg, dst, ureg_abs(src[0]));
ntt_MOV(c, dst, ureg_abs(src[0]));
break;
case nir_op_fsat:
if (dst_64) {
ureg_MIN(c->ureg, dst, src[0], ntt_64bit_1f(c));
ureg_MAX(c->ureg, dst, ureg_src(dst), ureg_imm1u(c->ureg, 0));
ntt_MIN(c, dst, src[0], ntt_64bit_1f(c));
ntt_MAX(c, dst, ureg_src(dst), ureg_imm1u(c->ureg, 0));
} else {
ureg_MOV(c->ureg, ureg_saturate(dst), src[0]);
ntt_MOV(c, ureg_saturate(dst), src[0]);
}
break;
case nir_op_fneg:
ureg_MOV(c->ureg, dst, ureg_negate(src[0]));
ntt_MOV(c, dst, ureg_negate(src[0]));
break;
/* NOTE: TGSI 32-bit math ops have the old "one source channel
@ -1034,35 +1152,35 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
*/
case nir_op_frcp:
assert(!dst_64);
ntt_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], ureg_src_undef());
break;
case nir_op_frsq:
assert(!dst_64);
ntt_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], ureg_src_undef());
break;
case nir_op_fsqrt:
assert(!dst_64);
ntt_emit_scalar(c, TGSI_OPCODE_SQRT, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_SQRT, dst, src[0], ureg_src_undef());
break;
case nir_op_fexp2:
assert(!dst_64);
ntt_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], ureg_src_undef());
break;
case nir_op_flog2:
assert(!dst_64);
ntt_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], ureg_src_undef());
break;
case nir_op_b2f32:
ureg_AND(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 1.0));
ntt_AND(c, dst, src[0], ureg_imm1f(c->ureg, 1.0));
break;
case nir_op_b2f64:
ureg_AND(c->ureg, dst,
ntt_AND(c, dst,
ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
@ -1071,24 +1189,24 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
case nir_op_f2b32:
if (src_64)
ureg_DSNE(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 0));
ntt_DSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
else
ureg_FSNE(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 0));
ntt_FSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
break;
case nir_op_i2b32:
if (src_64) {
ureg_U64SNE(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 0));
ntt_U64SNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
} else
ureg_USNE(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 0));
ntt_USNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
break;
case nir_op_b2i32:
ureg_AND(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 1));
ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1));
break;
case nir_op_b2i64:
ureg_AND(c->ureg, dst,
ntt_AND(c, dst,
ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
@ -1096,21 +1214,21 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
break;
case nir_op_fsin:
ntt_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], ureg_src_undef());
break;
case nir_op_fcos:
ntt_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], src[1]);
ntt_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], ureg_src_undef());
break;
case nir_op_fsub:
assert(!dst_64);
ureg_ADD(c->ureg, dst, src[0], ureg_negate(src[1]));
ntt_ADD(c, dst, src[0], ureg_negate(src[1]));
break;
case nir_op_isub:
assert(!dst_64);
ureg_UADD(c->ureg, dst, src[0], ureg_negate(src[1]));
ntt_UADD(c, dst, src[0], ureg_negate(src[1]));
break;
case nir_op_fmod:
@ -1122,40 +1240,40 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
break;
case nir_op_flrp:
ureg_LRP(c->ureg, dst, src[2], src[1], src[0]);
ntt_LRP(c, dst, src[2], src[1], src[0]);
break;
case nir_op_pack_64_2x32_split:
ureg_MOV(c->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XZ),
ntt_MOV(c, ureg_writemask(dst, TGSI_WRITEMASK_XZ),
ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
ureg_MOV(c->ureg, ureg_writemask(dst, TGSI_WRITEMASK_YW),
ntt_MOV(c, ureg_writemask(dst, TGSI_WRITEMASK_YW),
ureg_swizzle(src[1],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
break;
case nir_op_unpack_64_2x32_split_x:
ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
ntt_MOV(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z));
break;
case nir_op_unpack_64_2x32_split_y:
ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
ntt_MOV(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W));
break;
case nir_op_b32csel:
if (nir_src_bit_size(instr->src[1].src) == 64) {
ureg_UCMP(c->ureg, dst, ureg_swizzle(src[0],
ntt_UCMP(c, dst, ureg_swizzle(src[0],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
src[1], src[2]);
} else {
ureg_UCMP(c->ureg, dst, src[0], src[1], src[2]);
ntt_UCMP(c, dst, src[0], src[1], src[2]);
}
break;
@ -1176,9 +1294,9 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
* We don't use this in general because some hardware (i915 FS) the
* LRP gets expanded to MUL/MAD.
*/
ureg_LRP(c->ureg, dst, src[0], src[1], src[2]);
ntt_LRP(c, dst, src[0], src[1], src[2]);
} else {
ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]);
ntt_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]);
}
break;
@ -1207,9 +1325,12 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
chan * 2, chan * 2 + 1,
chan * 2, chan * 2 + 1);
ureg_insn(c->ureg, TGSI_OPCODE_DFRACEXP,
dsts, 2,
&chan_src, 1, false);
struct ntt_insn *insn = ntt_insn(c, TGSI_OPCODE_DFRACEXP,
dsts[0], chan_src,
ureg_src_undef(),
ureg_src_undef(),
ureg_src_undef());
insn->dst[1] = dsts[1];
}
ureg_release_temporary(c->ureg, temp);
@ -1218,7 +1339,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
case nir_op_ldexp:
assert(dst_64); /* 32bit handled in table. */
ureg_DLDEXP(c->ureg, dst, src[0],
ntt_DLDEXP(c, dst, src[0],
ureg_swizzle(src[1],
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
@ -1238,7 +1359,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
/* 64-bit op fixup movs */
if (!ureg_dst_is_undef(real_dst)) {
if (tgsi_64bit_compare) {
ureg_MOV(c->ureg, real_dst,
ntt_MOV(c, real_dst,
ureg_swizzle(ureg_src(dst), 0, 2, 0, 2));
} else {
assert(tgsi_64bit_downconvert);
@ -1246,7 +1367,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
uint32_t second_bit = real_dst.WriteMask & ~(1 << (ffs(real_dst.WriteMask) - 1));
if (second_bit)
swizzle[ffs(second_bit) - 1] = 1;
ureg_MOV(c->ureg, real_dst, ureg_swizzle(ureg_src(dst),
ntt_MOV(c, real_dst, ureg_swizzle(ureg_src(dst),
swizzle[0],
swizzle[1],
swizzle[2],
@ -1255,7 +1376,7 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
ureg_release_temporary(c->ureg, dst);
}
ureg_set_precise(c->ureg, false);
c->precise = false;
}
static struct ureg_src
@ -1345,7 +1466,7 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr)
* subtracting it off here.
*/
addr_temp = ureg_DECL_temporary(c->ureg);
ureg_UADD(c->ureg, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo));
ntt_UADD(c, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo));
src = ureg_src_dimension_indirect(src,
ntt_reladdr(c, ureg_src(addr_temp), 1),
c->first_ubo);
@ -1375,18 +1496,15 @@ ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr)
/* PIPE_CAP_LOAD_CONSTBUF: Not necessarily vec4 aligned, emit a
* TGSI_OPCODE_LOAD instruction from the const file.
*/
struct ureg_dst dst = ntt_get_dest(c, &instr->dest);
struct ureg_src srcs[2] = {
src,
ntt_get_src(c, instr->src[1]),
};
ureg_memory_insn(c->ureg, TGSI_OPCODE_LOAD,
&dst, 1,
srcs, ARRAY_SIZE(srcs),
0 /* qualifier */,
0 /* tex target */,
0 /* format: unused */
);
struct ntt_insn *insn =
ntt_insn(c, TGSI_OPCODE_LOAD,
ntt_get_dest(c, &instr->dest),
src, ntt_get_src(c, instr->src[1]),
ureg_src_undef(), ureg_src_undef());
insn->is_mem = true;
insn->tex_target = 0;
insn->mem_qualifier = 0;
insn->mem_format = 0; /* unused */
}
ureg_release_temporary(c->ureg, addr_temp);
@ -1441,7 +1559,7 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
memory.Index += nir_src_as_uint(instr->src[0]) / 4;
} else {
addr_temp = ureg_DECL_temporary(c->ureg);
ureg_USHR(c->ureg, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, 2));
ntt_USHR(c, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, 2));
memory = ureg_src_indirect(memory, ntt_reladdr(c, ureg_src(addr_temp), 2));
}
memory = ureg_src_dimension(memory, nir_intrinsic_base(instr));
@ -1566,12 +1684,11 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
dst = ntt_get_dest(c, &instr->dest);
}
ureg_memory_insn(c->ureg, opcode,
&dst, 1,
src, num_src,
qualifier,
TGSI_TEXTURE_BUFFER,
0 /* format: unused */);
struct ntt_insn *insn = ntt_insn(c, opcode, dst, src[0], src[1], src[2], src[3]);
insn->tex_target = TGSI_TEXTURE_BUFFER;
insn->mem_qualifier = qualifier;
insn->mem_format = 0; /* unused */
insn->is_mem = true;
ureg_release_temporary(c->ureg, addr_temp);
}
@ -1606,8 +1723,8 @@ ntt_emit_image_load_store(struct ntt_compile *c, nir_intrinsic_instr *instr)
if (dim == GLSL_SAMPLER_DIM_MS) {
temp = ureg_DECL_temporary(c->ureg);
ureg_MOV(c->ureg, temp, coord);
ureg_MOV(c->ureg, ureg_writemask(temp, 1 << (is_array ? 3 : 2)),
ntt_MOV(c, temp, coord);
ntt_MOV(c, ureg_writemask(temp, 1 << (is_array ? 3 : 2)),
ureg_scalar(ntt_get_src(c, instr->src[2]), TGSI_SWIZZLE_X));
coord = ureg_src(temp);
}
@ -1667,10 +1784,11 @@ ntt_emit_image_load_store(struct ntt_compile *c, nir_intrinsic_instr *instr)
unreachable("bad op");
}
ureg_memory_insn(c->ureg, op, &dst, 1, srcs, num_src,
ntt_get_access_qualifier(instr),
target,
nir_intrinsic_format(instr));
struct ntt_insn *insn = ntt_insn(c, op, dst, srcs[0], srcs[1], srcs[2], srcs[3]);
insn->tex_target = target;
insn->mem_qualifier = ntt_get_access_qualifier(instr);
insn->mem_format = nir_intrinsic_format(instr);
insn->is_mem = true;
if (!ureg_dst_is_undef(temp))
ureg_release_temporary(c->ureg, temp);
@ -1751,20 +1869,19 @@ ntt_emit_load_input(struct ntt_compile *c, nir_intrinsic_instr *instr)
if (c->centroid_inputs & (1ull << nir_intrinsic_base(instr))) {
ntt_store(c, &instr->dest, input);
} else {
ureg_INTERP_CENTROID(c->ureg, ntt_get_dest(c, &instr->dest),
input);
ntt_INTERP_CENTROID(c, ntt_get_dest(c, &instr->dest), input);
}
break;
case nir_intrinsic_load_barycentric_at_sample:
/* We stored the sample in the fake "bary" dest. */
ureg_INTERP_SAMPLE(c->ureg, ntt_get_dest(c, &instr->dest), input,
ntt_INTERP_SAMPLE(c, ntt_get_dest(c, &instr->dest), input,
ntt_get_src(c, instr->src[0]));
break;
case nir_intrinsic_load_barycentric_at_offset:
/* We stored the offset in the fake "bary" dest. */
ureg_INTERP_OFFSET(c->ureg, ntt_get_dest(c, &instr->dest), input,
ntt_INTERP_OFFSET(c, ntt_get_dest(c, &instr->dest), input,
ntt_get_src(c, instr->src[0]));
break;
@ -1810,7 +1927,7 @@ ntt_emit_store_output(struct ntt_compile *c, nir_intrinsic_instr *instr)
src = ureg_swizzle(src, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
ureg_MOV(c->ureg, out, src);
ntt_MOV(c, out, src);
}
static void
@ -1832,7 +1949,7 @@ ntt_emit_load_output(struct ntt_compile *c, nir_intrinsic_instr *instr)
out = ntt_ureg_dst_indirect(c, out, instr->src[0]);
}
ureg_MOV(c->ureg, ntt_get_dest(c, &instr->dest), ureg_src(out));
ntt_MOV(c, ntt_get_dest(c, &instr->dest), ureg_src(out));
}
static void
@ -1858,7 +1975,7 @@ ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:
ureg_U2F(c->ureg, ntt_get_dest(c, &instr->dest), sv);
ntt_U2F(c, ntt_get_dest(c, &instr->dest), sv);
return;
default:
@ -1928,7 +2045,7 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_discard:
ureg_KILL(c->ureg);
ntt_KILL(c);
break;
case nir_intrinsic_discard_if: {
@ -1936,12 +2053,12 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
if (c->native_integers) {
struct ureg_dst temp = ureg_writemask(ureg_DECL_temporary(c->ureg), 1);
ureg_AND(c->ureg, temp, cond, ureg_imm1f(c->ureg, 1.0));
ureg_KILL_IF(c->ureg, ureg_scalar(ureg_negate(ureg_src(temp)), 0));
ntt_AND(c, temp, cond, ureg_imm1f(c->ureg, 1.0));
ntt_KILL_IF(c, ureg_scalar(ureg_negate(ureg_src(temp)), 0));
ureg_release_temporary(c->ureg, temp);
} else {
/* For !native_integers, the bool got lowered to 1.0 or 0.0. */
ureg_KILL_IF(c->ureg, ureg_negate(cond));
ntt_KILL_IF(c, ureg_negate(cond));
}
break;
}
@ -2015,48 +2132,48 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_control_barrier:
case nir_intrinsic_memory_barrier_tcs_patch:
ureg_BARRIER(c->ureg);
ntt_BARRIER(c);
break;
case nir_intrinsic_memory_barrier:
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg,
TGSI_MEMBAR_SHADER_BUFFER |
TGSI_MEMBAR_ATOMIC_BUFFER |
TGSI_MEMBAR_SHADER_IMAGE |
TGSI_MEMBAR_SHARED));
ntt_MEMBAR(c, ureg_imm1u(c->ureg,
TGSI_MEMBAR_SHADER_BUFFER |
TGSI_MEMBAR_ATOMIC_BUFFER |
TGSI_MEMBAR_SHADER_IMAGE |
TGSI_MEMBAR_SHARED));
break;
case nir_intrinsic_memory_barrier_atomic_counter:
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER));
ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER));
break;
case nir_intrinsic_memory_barrier_buffer:
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER));
ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER));
break;
case nir_intrinsic_memory_barrier_image:
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE));
ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE));
break;
case nir_intrinsic_memory_barrier_shared:
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED));
ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED));
break;
case nir_intrinsic_group_memory_barrier:
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg,
TGSI_MEMBAR_SHADER_BUFFER |
TGSI_MEMBAR_ATOMIC_BUFFER |
TGSI_MEMBAR_SHADER_IMAGE |
TGSI_MEMBAR_SHARED |
TGSI_MEMBAR_THREAD_GROUP));
ntt_MEMBAR(c, ureg_imm1u(c->ureg,
TGSI_MEMBAR_SHADER_BUFFER |
TGSI_MEMBAR_ATOMIC_BUFFER |
TGSI_MEMBAR_SHADER_IMAGE |
TGSI_MEMBAR_SHARED |
TGSI_MEMBAR_THREAD_GROUP));
break;
case nir_intrinsic_end_primitive:
ureg_ENDPRIM(c->ureg, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
ntt_ENDPRIM(c, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
break;
case nir_intrinsic_emit_vertex:
ureg_EMIT(c->ureg, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
ntt_EMIT(c, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
break;
/* In TGSI we don't actually generate the barycentric coords, and emit
@ -2218,20 +2335,19 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr)
unreachable("unknown texture type");
}
struct tgsi_texture_offset tex_offsets[4];
unsigned num_tex_offsets = 0;
struct tgsi_texture_offset tex_offset = {
.File = TGSI_FILE_NULL
};
int tex_offset_src = nir_tex_instr_src_index(instr, nir_tex_src_offset);
if (tex_offset_src >= 0) {
struct ureg_src offset = ntt_get_src(c, instr->src[tex_offset_src].src);
tex_offsets[0].File = offset.File;
tex_offsets[0].Index = offset.Index;
tex_offsets[0].SwizzleX = offset.SwizzleX;
tex_offsets[0].SwizzleY = offset.SwizzleY;
tex_offsets[0].SwizzleZ = offset.SwizzleZ;
tex_offsets[0].Padding = 0;
num_tex_offsets = 1;
tex_offset.File = offset.File;
tex_offset.Index = offset.Index;
tex_offset.SwizzleX = offset.SwizzleX;
tex_offset.SwizzleY = offset.SwizzleY;
tex_offset.SwizzleZ = offset.SwizzleZ;
tex_offset.Padding = 0;
}
struct ureg_dst tex_dst;
@ -2240,15 +2356,17 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr)
else
tex_dst = dst;
ureg_tex_insn(c->ureg, tex_opcode,
&tex_dst, 1,
target,
tex_type,
tex_offsets, num_tex_offsets,
s.srcs, s.i);
while (s.i < 4)
s.srcs[s.i++] = ureg_src_undef();
struct ntt_insn *insn = ntt_insn(c, tex_opcode, tex_dst, s.srcs[0], s.srcs[1], s.srcs[2], s.srcs[3]);
insn->tex_target = target;
insn->tex_return_type = tex_type;
insn->tex_offset = tex_offset;
insn->is_tex = true;
if (instr->op == nir_texop_query_levels) {
ureg_MOV(c->ureg, dst, ureg_scalar(ureg_src(tex_dst), 3));
ntt_MOV(c, dst, ureg_scalar(ureg_src(tex_dst), 3));
ureg_release_temporary(c->ureg, tex_dst);
}
}
@ -2258,11 +2376,11 @@ ntt_emit_jump(struct ntt_compile *c, nir_jump_instr *jump)
{
switch (jump->type) {
case nir_jump_break:
ureg_BRK(c->ureg);
ntt_BRK(c);
break;
case nir_jump_continue:
ureg_CONT(c->ureg);
ntt_CONT(c);
break;
default:
@ -2326,38 +2444,27 @@ ntt_emit_instr(struct ntt_compile *c, nir_instr *instr)
static void
ntt_emit_if(struct ntt_compile *c, nir_if *if_stmt)
{
unsigned label;
if (c->native_integers)
ureg_UIF(c->ureg, c->if_cond, &label);
ntt_UIF(c, c->if_cond);
else
ureg_IF(c->ureg, c->if_cond, &label);
ntt_IF(c, c->if_cond);
ntt_emit_cf_list(c, &if_stmt->then_list);
if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
ureg_fixup_label(c->ureg, label, ureg_get_instruction_number(c->ureg));
ureg_ELSE(c->ureg, &label);
ntt_ELSE(c);
ntt_emit_cf_list(c, &if_stmt->else_list);
}
ureg_fixup_label(c->ureg, label, ureg_get_instruction_number(c->ureg));
ureg_ENDIF(c->ureg);
ntt_ENDIF(c);
}
static void
ntt_emit_loop(struct ntt_compile *c, nir_loop *loop)
{
/* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
* does reference BGNLOOP's. Follow the former behavior unless something comes up
* with a need.
*/
unsigned begin_label;
ureg_BGNLOOP(c->ureg, &begin_label);
ntt_BGNLOOP(c);
ntt_emit_cf_list(c, &loop->body);
unsigned end_label;
ureg_ENDLOOP(c->ureg, &end_label);
ntt_ENDLOOP(c);
}
static void
@ -2392,9 +2499,20 @@ ntt_src_live_interval_end_cb(nir_src *src, void *state)
static void
ntt_emit_block(struct ntt_compile *c, nir_block *block)
{
struct ntt_block *ntt_block = ntt_block_from_nir(c, block);
c->cur_block = ntt_block;
nir_foreach_instr(instr, block) {
ntt_emit_instr(c, instr);
/* Sanity check that we didn't accidentally ureg_OPCODE() instead of ntt_OPCODE(). */
if (ureg_get_instruction_number(c->ureg) != 0) {
fprintf(stderr, "Emitted ureg insn during: ");
nir_print_instr(instr, stderr);
fprintf(stderr, "\n");
unreachable("emitted ureg insn");
}
nir_foreach_src(instr, ntt_src_live_interval_end_cb, c);
}
@ -2441,6 +2559,117 @@ ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list)
}
}
static void
ntt_emit_block_ureg(struct ntt_compile *c, struct nir_block *block)
{
struct ntt_block *ntt_block = ntt_block_from_nir(c, block);
/* Emit the ntt insns to tgsi_ureg. */
util_dynarray_foreach(&ntt_block->insns, struct ntt_insn, insn) {
const struct tgsi_opcode_info *opcode_info =
tgsi_get_opcode_info(insn->opcode);
switch (insn->opcode) {
case TGSI_OPCODE_UIF:
ureg_UIF(c->ureg, insn->src[0], &c->cf_label);
break;
case TGSI_OPCODE_IF:
ureg_IF(c->ureg, insn->src[0], &c->cf_label);
break;
case TGSI_OPCODE_ELSE:
ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
ureg_ELSE(c->ureg, &c->cf_label);
c->current_if_else = c->cf_label;
break;
case TGSI_OPCODE_ENDIF:
ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
ureg_ENDIF(c->ureg);
break;
case TGSI_OPCODE_BGNLOOP:
/* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
* does reference BGNLOOP's. Follow the former behavior unless something comes up
* with a need.
*/
ureg_BGNLOOP(c->ureg, &c->cf_label);
break;
case TGSI_OPCODE_ENDLOOP:
ureg_ENDLOOP(c->ureg, &c->cf_label);
break;
default:
if (insn->is_tex) {
ureg_tex_insn(c->ureg, insn->opcode,
insn->dst, opcode_info->num_dst,
insn->tex_target, insn->tex_return_type,
&insn->tex_offset,
insn->tex_offset.File != TGSI_FILE_NULL ? 1 : 0,
insn->src, opcode_info->num_src);
} else if (insn->is_mem) {
ureg_memory_insn(c->ureg, insn->opcode,
insn->dst, opcode_info->num_dst,
insn->src, opcode_info->num_src,
insn->mem_qualifier,
insn->tex_target,
insn->mem_format);
} else {
ureg_insn(c->ureg, insn->opcode,
insn->dst, opcode_info->num_dst,
insn->src, opcode_info->num_src,
insn->precise);
}
}
}
}
static void
ntt_emit_if_ureg(struct ntt_compile *c, nir_if *if_stmt)
{
/* Note: the last block emitted our IF opcode. */
int if_stack = c->current_if_else;
c->current_if_else = c->cf_label;
/* Either the then or else block includes the ENDIF, which will fix up the
* IF(/ELSE)'s label for jumping
*/
ntt_emit_cf_list_ureg(c, &if_stmt->then_list);
ntt_emit_cf_list_ureg(c, &if_stmt->else_list);
c->current_if_else = if_stack;
}
static void
ntt_emit_cf_list_ureg(struct ntt_compile *c, struct exec_list *list)
{
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
case nir_cf_node_block:
ntt_emit_block_ureg(c, nir_cf_node_as_block(node));
break;
case nir_cf_node_if:
ntt_emit_if_ureg(c, nir_cf_node_as_if(node));
break;
case nir_cf_node_loop:
/* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
* does reference BGNLOOP's. Follow the former behavior unless something comes up
* with a need.
*/
ntt_emit_cf_list_ureg(c, &nir_cf_node_as_loop(node)->body);
break;
default:
unreachable("unknown CF type");
}
}
}
static void
ntt_emit_impl(struct ntt_compile *c, nir_function_impl *impl)
{
@ -2450,11 +2679,30 @@ ntt_emit_impl(struct ntt_compile *c, nir_function_impl *impl)
c->ssa_temp = rzalloc_array(c, struct ureg_src, impl->ssa_alloc);
c->reg_temp = rzalloc_array(c, struct ureg_dst, impl->reg_alloc);
/* Set up the struct ntt_blocks to put insns in */
c->blocks = _mesa_pointer_hash_table_create(c);
nir_foreach_block(block, impl) {
struct ntt_block *ntt_block = rzalloc(c->blocks, struct ntt_block);
util_dynarray_init(&ntt_block->insns, ntt_block);
_mesa_hash_table_insert(c->blocks, block, ntt_block);
}
c->cur_block = ntt_block_from_nir(c, nir_start_block(impl));
ntt_setup_inputs(c);
ntt_setup_outputs(c);
ntt_setup_uniforms(c);
ntt_setup_registers(c, &impl->registers);
/* Emit the ntt insns */
ntt_emit_cf_list(c, &impl->body);
/* Turn the ntt insns into actual TGSI tokens */
ntt_emit_cf_list_ureg(c, &impl->body);
ralloc_free(c->liveness);
c->liveness = NULL;
}
static int
@ -3200,10 +3448,6 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
c->ureg = ureg_create(pipe_shader_type_from_mesa(s->info.stage));
ureg_setup_shader_info(c->ureg, &s->info);
ntt_setup_inputs(c);
ntt_setup_outputs(c);
ntt_setup_uniforms(c);
if (s->info.stage == MESA_SHADER_FRAGMENT) {
/* The draw module's polygon stipple layer doesn't respect the chosen
* coordinate mode, so leave it as unspecified unless we're actually

View file

@ -210,6 +210,7 @@ OP12(INTERP_OFFSET)
#undef OP11
#undef OP12
#undef OP13
#undef OP14
#undef OP00_LBL
#undef OP01_LBL