mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-29 13:50:25 +01:00
freedreno/ir3: move inputs/outputs to shader
These belong in the shader, rather than the block. Mostly a lot of churn and nothing too interesting. But splitting this out from the rest of ir3_block reshuffling to cut down the noise in the later patch. Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
d52fb2f5ad
commit
c8fb5f8a01
12 changed files with 160 additions and 176 deletions
|
|
@ -62,8 +62,8 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
|
|||
#define FD_DBG_NOBYPASS 0x0040
|
||||
#define FD_DBG_FRAGHALF 0x0080
|
||||
#define FD_DBG_NOBIN 0x0100
|
||||
#define FD_DBG_OPTMSGS 0x0400
|
||||
#define FD_DBG_GLSL120 0x1000
|
||||
#define FD_DBG_OPTMSGS 0x0200
|
||||
#define FD_DBG_GLSL120 0x0400
|
||||
|
||||
extern int fd_mesa_debug;
|
||||
extern bool fd_binning_enabled;
|
||||
|
|
|
|||
|
|
@ -66,12 +66,20 @@ void * ir3_alloc(struct ir3 *shader, int sz)
|
|||
return ptr;
|
||||
}
|
||||
|
||||
struct ir3 * ir3_create(struct ir3_compiler *compiler)
|
||||
struct ir3 * ir3_create(struct ir3_compiler *compiler,
|
||||
unsigned nin, unsigned nout)
|
||||
{
|
||||
struct ir3 *shader =
|
||||
calloc(1, sizeof(struct ir3));
|
||||
struct ir3 *shader = calloc(1, sizeof(struct ir3));
|
||||
|
||||
grow_heap(shader);
|
||||
|
||||
shader->compiler = compiler;
|
||||
shader->ninputs = nin;
|
||||
shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
|
||||
|
||||
shader->noutputs = nout;
|
||||
shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
|
|
@ -601,39 +609,11 @@ static void insert_instr(struct ir3_block *block,
|
|||
array_insert(shader->baryfs, instr);
|
||||
}
|
||||
|
||||
struct ir3_block * ir3_block_create(struct ir3 *shader,
|
||||
unsigned ntmp, unsigned nin, unsigned nout)
|
||||
struct ir3_block * ir3_block_create(struct ir3 *shader)
|
||||
{
|
||||
struct ir3_block *block;
|
||||
unsigned size;
|
||||
char *ptr;
|
||||
|
||||
size = sizeof(*block);
|
||||
size += sizeof(block->temporaries[0]) * ntmp;
|
||||
size += sizeof(block->inputs[0]) * nin;
|
||||
size += sizeof(block->outputs[0]) * nout;
|
||||
|
||||
ptr = ir3_alloc(shader, size);
|
||||
|
||||
block = (void *)ptr;
|
||||
ptr += sizeof(*block);
|
||||
|
||||
block->temporaries = (void *)ptr;
|
||||
block->ntemporaries = ntmp;
|
||||
ptr += sizeof(block->temporaries[0]) * ntmp;
|
||||
|
||||
block->inputs = (void *)ptr;
|
||||
block->ninputs = nin;
|
||||
ptr += sizeof(block->inputs[0]) * nin;
|
||||
|
||||
block->outputs = (void *)ptr;
|
||||
block->noutputs = nout;
|
||||
ptr += sizeof(block->outputs[0]) * nout;
|
||||
|
||||
struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
|
||||
block->shader = shader;
|
||||
|
||||
list_inithead(&block->instr_list);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -333,6 +333,10 @@ struct ir3_heap_chunk;
|
|||
struct ir3 {
|
||||
struct ir3_compiler *compiler;
|
||||
|
||||
unsigned ninputs, noutputs;
|
||||
struct ir3_instruction **inputs;
|
||||
struct ir3_instruction **outputs;
|
||||
|
||||
/* Track bary.f (and ldlv) instructions.. this is needed in
|
||||
* scheduling to ensure that all varying fetches happen before
|
||||
* any potential kill instructions. The hw gets grumpy if all
|
||||
|
|
@ -365,24 +369,19 @@ struct ir3 {
|
|||
|
||||
struct ir3_block {
|
||||
struct ir3 *shader;
|
||||
unsigned ntemporaries, ninputs, noutputs;
|
||||
/* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
|
||||
struct ir3_instruction **temporaries;
|
||||
struct ir3_instruction **inputs;
|
||||
struct ir3_instruction **outputs;
|
||||
/* only a single address register: */
|
||||
struct ir3_instruction *address;
|
||||
struct list_head instr_list;
|
||||
};
|
||||
|
||||
struct ir3 * ir3_create(struct ir3_compiler *compiler);
|
||||
struct ir3 * ir3_create(struct ir3_compiler *compiler,
|
||||
unsigned nin, unsigned nout);
|
||||
void ir3_destroy(struct ir3 *shader);
|
||||
void * ir3_assemble(struct ir3 *shader,
|
||||
struct ir3_info *info, uint32_t gpu_id);
|
||||
void * ir3_alloc(struct ir3 *shader, int sz);
|
||||
|
||||
struct ir3_block * ir3_block_create(struct ir3 *shader,
|
||||
unsigned ntmp, unsigned nin, unsigned nout);
|
||||
struct ir3_block * ir3_block_create(struct ir3 *shader);
|
||||
|
||||
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
|
||||
int category, opc_t opc);
|
||||
|
|
@ -780,32 +779,28 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
|
|||
void ir3_print(struct ir3 *ir);
|
||||
void ir3_print_instr(struct ir3_instruction *instr);
|
||||
|
||||
/* flatten if/else: */
|
||||
int ir3_block_flatten(struct ir3_block *block);
|
||||
|
||||
/* depth calculation: */
|
||||
int ir3_delayslots(struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned n);
|
||||
void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
|
||||
void ir3_block_depth(struct ir3_block *block);
|
||||
void ir3_depth(struct ir3 *ir);
|
||||
|
||||
/* copy-propagate: */
|
||||
void ir3_block_cp(struct ir3_block *block);
|
||||
void ir3_cp(struct ir3 *ir);
|
||||
|
||||
/* group neighbors and insert mov's to resolve conflicts: */
|
||||
void ir3_block_group(struct ir3_block *block);
|
||||
void ir3_group(struct ir3 *ir);
|
||||
|
||||
/* scheduling: */
|
||||
int ir3_block_sched(struct ir3_block *block);
|
||||
int ir3_sched(struct ir3 *ir);
|
||||
|
||||
/* register assignment: */
|
||||
struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx);
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
||||
int ir3_ra(struct ir3 *ir3, enum shader_t type,
|
||||
bool frag_coord, bool frag_face);
|
||||
|
||||
/* legalize: */
|
||||
void ir3_block_legalize(struct ir3_block *block,
|
||||
bool *has_samp, int *max_bary);
|
||||
void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
|
||||
|
||||
/* ************************************************************************* */
|
||||
/* instruction helpers */
|
||||
|
|
|
|||
|
|
@ -66,34 +66,34 @@ static void dump_info(struct ir3_shader_variant *so, const char *str)
|
|||
// TODO make gpu_id configurable on cmdline
|
||||
bin = ir3_shader_assemble(so, 320);
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
struct ir3_block *block = so->ir->block;
|
||||
struct ir3 *ir = so->ir;
|
||||
struct ir3_register *reg;
|
||||
uint8_t regid;
|
||||
unsigned i;
|
||||
|
||||
debug_printf("; %s: %s\n", type, str);
|
||||
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
if (!block->inputs[i]) {
|
||||
for (i = 0; i < ir->ninputs; i++) {
|
||||
if (!ir->inputs[i]) {
|
||||
debug_printf("; in%d unused\n", i);
|
||||
continue;
|
||||
}
|
||||
reg = block->inputs[i]->regs[0];
|
||||
reg = ir->inputs[i]->regs[0];
|
||||
regid = reg->num;
|
||||
debug_printf("@in(%sr%d.%c)\tin%d\n",
|
||||
(reg->flags & IR3_REG_HALF) ? "h" : "",
|
||||
(regid >> 2), "xyzw"[regid & 0x3], i);
|
||||
}
|
||||
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
if (!block->outputs[i]) {
|
||||
for (i = 0; i < ir->noutputs; i++) {
|
||||
if (!ir->outputs[i]) {
|
||||
debug_printf("; out%d unused\n", i);
|
||||
continue;
|
||||
}
|
||||
/* kill shows up as a virtual output.. skip it! */
|
||||
if (is_kill(block->outputs[i]))
|
||||
if (is_kill(ir->outputs[i]))
|
||||
continue;
|
||||
reg = block->outputs[i]->regs[0];
|
||||
reg = ir->outputs[i]->regs[0];
|
||||
regid = reg->num;
|
||||
debug_printf("@out(%sr%d.%c)\tout%d\n",
|
||||
(reg->flags & IR3_REG_HALF) ? "h" : "",
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@
|
|||
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
|
||||
|
||||
struct ir3_compile {
|
||||
struct ir3_compiler *compiler;
|
||||
|
||||
const struct tgsi_token *tokens;
|
||||
struct nir_shader *s;
|
||||
|
||||
|
|
@ -170,7 +172,8 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
|
|||
|
||||
/* TODO nir doesn't lower everything for us yet, but ideally it would: */
|
||||
static const struct tgsi_token *
|
||||
lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
|
||||
lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
|
||||
struct ir3_shader_variant *so)
|
||||
{
|
||||
struct tgsi_shader_info info;
|
||||
struct tgsi_lowering_config lconfig = {
|
||||
|
|
@ -192,7 +195,7 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
|
|||
break;
|
||||
}
|
||||
|
||||
if (so->ir->compiler->gpu_id >= 400) {
|
||||
if (ctx->compiler->gpu_id >= 400) {
|
||||
/* a4xx seems to have *no* sam.p */
|
||||
lconfig.lower_TXP = ~0; /* lower all txp */
|
||||
} else {
|
||||
|
|
@ -204,13 +207,14 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
|
|||
}
|
||||
|
||||
static struct ir3_compile *
|
||||
compile_init(struct ir3_shader_variant *so,
|
||||
compile_init(struct ir3_compiler *compiler,
|
||||
struct ir3_shader_variant *so,
|
||||
const struct tgsi_token *tokens)
|
||||
{
|
||||
struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
|
||||
const struct tgsi_token *lowered_tokens;
|
||||
|
||||
if (so->ir->compiler->gpu_id >= 400) {
|
||||
if (compiler->gpu_id >= 400) {
|
||||
/* need special handling for "flat" */
|
||||
ctx->flat_bypass = true;
|
||||
ctx->levels_add_one = false;
|
||||
|
|
@ -230,6 +234,7 @@ compile_init(struct ir3_shader_variant *so,
|
|||
break;
|
||||
}
|
||||
|
||||
ctx->compiler = compiler;
|
||||
ctx->ir = so->ir;
|
||||
ctx->so = so;
|
||||
ctx->next_inloc = 8;
|
||||
|
|
@ -240,7 +245,7 @@ compile_init(struct ir3_shader_variant *so,
|
|||
ctx->addr_ht = _mesa_hash_table_create(ctx,
|
||||
_mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
|
||||
lowered_tokens = lower_tgsi(tokens, so);
|
||||
lowered_tokens = lower_tgsi(ctx, tokens, so);
|
||||
if (!lowered_tokens)
|
||||
lowered_tokens = tokens;
|
||||
ctx->s = to_nir(lowered_tokens);
|
||||
|
|
@ -454,7 +459,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr,
|
|||
return NULL;
|
||||
|
||||
collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz);
|
||||
ir3_reg_create(collect, 0, 0);
|
||||
ir3_reg_create(collect, 0, 0); /* dst */
|
||||
for (unsigned i = 0; i < arrsz; i++)
|
||||
ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i];
|
||||
|
||||
|
|
@ -1134,8 +1139,8 @@ static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
|
|||
so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
|
||||
so->total_in++;
|
||||
|
||||
ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1);
|
||||
ctx->block->inputs[r] = instr;
|
||||
ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
|
||||
ctx->ir->inputs[r] = instr;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1174,17 +1179,18 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_load_input:
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = b->inputs[n];
|
||||
dst[i] = ctx->ir->inputs[n];
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_input_indirect:
|
||||
src = get_src(ctx, &intr->src[0]);
|
||||
struct ir3_instruction *collect =
|
||||
create_collect(b, b->inputs, b->ninputs);
|
||||
create_collect(b, ctx->ir->inputs, ctx->ir->ninputs);
|
||||
struct ir3_instruction *addr = get_addr(ctx, src[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect);
|
||||
dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
|
||||
n, addr, collect);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_var:
|
||||
|
|
@ -1197,7 +1203,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
src = get_src(ctx, &intr->src[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
b->outputs[n] = src[i];
|
||||
ctx->ir->outputs[n] = src[i];
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
|
|
@ -1707,7 +1713,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
|
|||
instr = create_input(ctx->block, NULL, idx);
|
||||
}
|
||||
|
||||
ctx->block->inputs[idx] = instr;
|
||||
ctx->ir->inputs[idx] = instr;
|
||||
}
|
||||
|
||||
if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
|
||||
|
|
@ -1774,7 +1780,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
|
|||
for (int i = 0; i < ncomp; i++) {
|
||||
unsigned idx = (n * 4) + i;
|
||||
|
||||
ctx->block->outputs[idx] = create_immed(ctx->block, fui(0.0));
|
||||
ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1794,12 +1800,14 @@ emit_instructions(struct ir3_compile *ctx)
|
|||
ninputs += 8;
|
||||
}
|
||||
|
||||
ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs);
|
||||
ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
|
||||
ctx->block = ir3_block_create(ctx->ir);
|
||||
ctx->ir->block = ctx->block;
|
||||
|
||||
if (ctx->so->type == SHADER_FRAGMENT) {
|
||||
ctx->block->noutputs -= ARRAY_SIZE(ctx->kill);
|
||||
ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill);
|
||||
} else if (ctx->so->type == SHADER_VERTEX) {
|
||||
ctx->block->ninputs -= 8;
|
||||
ctx->ir->ninputs -= 8;
|
||||
}
|
||||
|
||||
/* for fragment shader, we have a single input register (usually
|
||||
|
|
@ -1849,12 +1857,12 @@ static void
|
|||
fixup_frag_inputs(struct ir3_compile *ctx)
|
||||
{
|
||||
struct ir3_shader_variant *so = ctx->so;
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3 *ir = ctx->ir;
|
||||
struct ir3_instruction **inputs;
|
||||
struct ir3_instruction *instr;
|
||||
int n, regid = 0;
|
||||
|
||||
block->ninputs = 0;
|
||||
ir->ninputs = 0;
|
||||
|
||||
n = 4; /* always have frag_pos */
|
||||
n += COND(so->frag_face, 4);
|
||||
|
|
@ -1866,15 +1874,15 @@ fixup_frag_inputs(struct ir3_compile *ctx)
|
|||
/* this ultimately gets assigned to hr0.x so doesn't conflict
|
||||
* with frag_coord/frag_pos..
|
||||
*/
|
||||
inputs[block->ninputs++] = ctx->frag_face;
|
||||
inputs[ir->ninputs++] = ctx->frag_face;
|
||||
ctx->frag_face->regs[0]->num = 0;
|
||||
|
||||
/* remaining channels not used, but let's avoid confusing
|
||||
* other parts that expect inputs to come in groups of vec4
|
||||
*/
|
||||
inputs[block->ninputs++] = NULL;
|
||||
inputs[block->ninputs++] = NULL;
|
||||
inputs[block->ninputs++] = NULL;
|
||||
inputs[ir->ninputs++] = NULL;
|
||||
inputs[ir->ninputs++] = NULL;
|
||||
inputs[ir->ninputs++] = NULL;
|
||||
}
|
||||
|
||||
/* since we don't know where to set the regid for frag_coord,
|
||||
|
|
@ -1888,28 +1896,28 @@ fixup_frag_inputs(struct ir3_compile *ctx)
|
|||
ctx->frag_coord[2]->regs[0]->num = regid++;
|
||||
ctx->frag_coord[3]->regs[0]->num = regid++;
|
||||
|
||||
inputs[block->ninputs++] = ctx->frag_coord[0];
|
||||
inputs[block->ninputs++] = ctx->frag_coord[1];
|
||||
inputs[block->ninputs++] = ctx->frag_coord[2];
|
||||
inputs[block->ninputs++] = ctx->frag_coord[3];
|
||||
inputs[ir->ninputs++] = ctx->frag_coord[0];
|
||||
inputs[ir->ninputs++] = ctx->frag_coord[1];
|
||||
inputs[ir->ninputs++] = ctx->frag_coord[2];
|
||||
inputs[ir->ninputs++] = ctx->frag_coord[3];
|
||||
}
|
||||
|
||||
/* we always have frag_pos: */
|
||||
so->pos_regid = regid;
|
||||
|
||||
/* r0.x */
|
||||
instr = create_input(block, NULL, block->ninputs);
|
||||
instr = create_input(ctx->block, NULL, ir->ninputs);
|
||||
instr->regs[0]->num = regid++;
|
||||
inputs[block->ninputs++] = instr;
|
||||
inputs[ir->ninputs++] = instr;
|
||||
ctx->frag_pos->regs[1]->instr = instr;
|
||||
|
||||
/* r0.y */
|
||||
instr = create_input(block, NULL, block->ninputs);
|
||||
instr = create_input(ctx->block, NULL, ir->ninputs);
|
||||
instr->regs[0]->num = regid++;
|
||||
inputs[block->ninputs++] = instr;
|
||||
inputs[ir->ninputs++] = instr;
|
||||
ctx->frag_pos->regs[2]->instr = instr;
|
||||
|
||||
block->inputs = inputs;
|
||||
ir->inputs = inputs;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
@ -1919,18 +1927,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
struct ir3_shader_key key)
|
||||
{
|
||||
struct ir3_compile *ctx;
|
||||
struct ir3_block *block;
|
||||
struct ir3 *ir;
|
||||
struct ir3_instruction **inputs;
|
||||
unsigned i, j, actual_in;
|
||||
int ret = 0, max_bary;
|
||||
|
||||
assert(!so->ir);
|
||||
|
||||
so->ir = ir3_create(compiler);
|
||||
|
||||
assert(so->ir);
|
||||
|
||||
ctx = compile_init(so, tokens);
|
||||
ctx = compile_init(compiler, so, tokens);
|
||||
if (!ctx) {
|
||||
DBG("INIT failed!");
|
||||
ret = -1;
|
||||
|
|
@ -1945,11 +1949,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
goto out;
|
||||
}
|
||||
|
||||
block = ctx->block;
|
||||
so->ir->block = block;
|
||||
ir = so->ir = ctx->ir;
|
||||
|
||||
/* keep track of the inputs from TGSI perspective.. */
|
||||
inputs = block->inputs;
|
||||
inputs = ir->inputs;
|
||||
|
||||
/* but fixup actual inputs for frag shader: */
|
||||
if (so->type == SHADER_FRAGMENT)
|
||||
|
|
@ -1966,24 +1969,24 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
(name == TGSI_SEMANTIC_PSIZE))) {
|
||||
if (i != j) {
|
||||
so->outputs[j] = so->outputs[i];
|
||||
block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
|
||||
block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
|
||||
block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
|
||||
block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
|
||||
ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
|
||||
ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
|
||||
ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
|
||||
ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
so->outputs_count = j;
|
||||
block->noutputs = j * 4;
|
||||
ir->noutputs = j * 4;
|
||||
}
|
||||
|
||||
/* if we want half-precision outputs, mark the output registers
|
||||
* as half:
|
||||
*/
|
||||
if (key.half_precision) {
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
struct ir3_instruction *out = block->outputs[i];
|
||||
for (i = 0; i < ir->noutputs; i++) {
|
||||
struct ir3_instruction *out = ir->outputs[i];
|
||||
if (!out)
|
||||
continue;
|
||||
out->regs[0]->flags |= IR3_REG_HALF;
|
||||
|
|
@ -2004,36 +2007,34 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
*/
|
||||
if (so->type == SHADER_FRAGMENT) {
|
||||
for (i = 0; i < ctx->kill_count; i++)
|
||||
block->outputs[block->noutputs++] = ctx->kill[i];
|
||||
ir->outputs[ir->noutputs++] = ctx->kill[i];
|
||||
}
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||
printf("BEFORE CP:\n");
|
||||
ir3_print(so->ir);
|
||||
ir3_print(ir);
|
||||
}
|
||||
|
||||
ir3_block_depth(block);
|
||||
|
||||
ir3_block_cp(block);
|
||||
ir3_cp(ir);
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||
printf("BEFORE GROUPING:\n");
|
||||
ir3_print(so->ir);
|
||||
ir3_print(ir);
|
||||
}
|
||||
|
||||
/* Group left/right neighbors, inserting mov's where needed to
|
||||
* solve conflicts:
|
||||
*/
|
||||
ir3_block_group(block);
|
||||
ir3_group(ir);
|
||||
|
||||
ir3_block_depth(block);
|
||||
ir3_depth(ir);
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||
printf("AFTER DEPTH:\n");
|
||||
ir3_print(so->ir);
|
||||
ir3_print(ir);
|
||||
}
|
||||
|
||||
ret = ir3_block_sched(block);
|
||||
ret = ir3_sched(ir);
|
||||
if (ret) {
|
||||
DBG("SCHED failed!");
|
||||
goto out;
|
||||
|
|
@ -2041,10 +2042,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||
printf("AFTER SCHED:\n");
|
||||
ir3_print(so->ir);
|
||||
ir3_print(ir);
|
||||
}
|
||||
|
||||
ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
|
||||
ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face);
|
||||
if (ret) {
|
||||
DBG("RA failed!");
|
||||
goto out;
|
||||
|
|
@ -2052,14 +2053,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||
printf("AFTER RA:\n");
|
||||
ir3_print(so->ir);
|
||||
ir3_print(ir);
|
||||
}
|
||||
|
||||
ir3_block_legalize(block, &so->has_samp, &max_bary);
|
||||
ir3_legalize(ir, &so->has_samp, &max_bary);
|
||||
|
||||
/* fixup input/outputs: */
|
||||
for (i = 0; i < so->outputs_count; i++) {
|
||||
so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
|
||||
so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
|
||||
/* preserve hack for depth output.. tgsi writes depth to .z,
|
||||
* but what we give the hw is the scalar register:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -387,16 +387,17 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
|
|||
return instr;
|
||||
}
|
||||
|
||||
void ir3_block_cp(struct ir3_block *block)
|
||||
void
|
||||
ir3_cp(struct ir3 *ir)
|
||||
{
|
||||
ir3_clear_mark(block->shader);
|
||||
ir3_clear_mark(ir->block->shader);
|
||||
|
||||
for (unsigned i = 0; i < block->noutputs; i++) {
|
||||
if (block->outputs[i]) {
|
||||
for (unsigned i = 0; i < ir->noutputs; i++) {
|
||||
if (ir->outputs[i]) {
|
||||
struct ir3_instruction *out =
|
||||
instr_cp(block->outputs[i], NULL);
|
||||
instr_cp(ir->outputs[i], NULL);
|
||||
|
||||
block->outputs[i] = out;
|
||||
ir->outputs[i] = out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -144,22 +144,23 @@ remove_unused_by_block(struct ir3_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
void ir3_block_depth(struct ir3_block *block)
|
||||
void
|
||||
ir3_depth(struct ir3 *ir)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
ir3_clear_mark(block->shader);
|
||||
for (i = 0; i < block->noutputs; i++)
|
||||
if (block->outputs[i])
|
||||
ir3_instr_depth(block->outputs[i]);
|
||||
ir3_clear_mark(ir->block->shader);
|
||||
for (i = 0; i < ir->noutputs; i++)
|
||||
if (ir->outputs[i])
|
||||
ir3_instr_depth(ir->outputs[i]);
|
||||
|
||||
/* mark un-used instructions: */
|
||||
remove_unused_by_block(block);
|
||||
remove_unused_by_block(ir->block);
|
||||
|
||||
/* cleanup unused inputs: */
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
struct ir3_instruction *in = block->inputs[i];
|
||||
for (i = 0; i < ir->ninputs; i++) {
|
||||
struct ir3_instruction *in = ir->inputs[i];
|
||||
if (in && (in->depth == DEPTH_UNUSED))
|
||||
block->inputs[i] = NULL;
|
||||
ir->inputs[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,7 +99,8 @@ static struct ir3_instruction *instr_get(void *arr, int idx)
|
|||
{
|
||||
return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
|
||||
}
|
||||
static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
|
||||
static void
|
||||
instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
|
||||
{
|
||||
((struct ir3_instruction *)arr)->regs[idx+1]->instr =
|
||||
ir3_MOV(instr->block, instr, TYPE_F32);
|
||||
|
|
@ -107,7 +108,8 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
|
|||
static struct group_ops instr_ops = { instr_get, instr_insert_mov };
|
||||
|
||||
|
||||
static void group_n(struct group_ops *ops, void *arr, unsigned n)
|
||||
static void
|
||||
group_n(struct group_ops *ops, void *arr, unsigned n)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
||||
|
|
@ -170,7 +172,8 @@ restart:
|
|||
}
|
||||
}
|
||||
|
||||
static void instr_find_neighbors(struct ir3_instruction *instr)
|
||||
static void
|
||||
instr_find_neighbors(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_instruction *src;
|
||||
|
||||
|
|
@ -189,7 +192,8 @@ static void instr_find_neighbors(struct ir3_instruction *instr)
|
|||
* we need to insert dummy/padding instruction for grouping, and
|
||||
* then take it back out again before anyone notices.
|
||||
*/
|
||||
static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
|
||||
static void
|
||||
pad_and_group_input(struct ir3_instruction **input, unsigned n)
|
||||
{
|
||||
int i, mask = 0;
|
||||
struct ir3_block *block = NULL;
|
||||
|
|
@ -214,7 +218,8 @@ static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
|
|||
}
|
||||
}
|
||||
|
||||
static void block_find_neighbors(struct ir3_block *block)
|
||||
static void
|
||||
find_neighbors(struct ir3 *ir)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
|
@ -232,22 +237,23 @@ static void block_find_neighbors(struct ir3_block *block)
|
|||
* This logic won't quite cut it if we don't align smaller
|
||||
* on vec4 boundaries
|
||||
*/
|
||||
for (i = 0; i < block->ninputs; i += 4)
|
||||
pad_and_group_input(&block->inputs[i], 4);
|
||||
for (i = 0; i < block->noutputs; i += 4)
|
||||
group_n(&arr_ops_out, &block->outputs[i], 4);
|
||||
for (i = 0; i < ir->ninputs; i += 4)
|
||||
pad_and_group_input(&ir->inputs[i], 4);
|
||||
for (i = 0; i < ir->noutputs; i += 4)
|
||||
group_n(&arr_ops_out, &ir->outputs[i], 4);
|
||||
|
||||
|
||||
for (i = 0; i < block->noutputs; i++) {
|
||||
if (block->outputs[i]) {
|
||||
struct ir3_instruction *instr = block->outputs[i];
|
||||
for (i = 0; i < ir->noutputs; i++) {
|
||||
if (ir->outputs[i]) {
|
||||
struct ir3_instruction *instr = ir->outputs[i];
|
||||
instr_find_neighbors(instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ir3_block_group(struct ir3_block *block)
|
||||
void
|
||||
ir3_group(struct ir3 *ir)
|
||||
{
|
||||
ir3_clear_mark(block->shader);
|
||||
block_find_neighbors(block);
|
||||
ir3_clear_mark(ir->block->shader);
|
||||
find_neighbors(ir);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "freedreno_util.h"
|
||||
|
|
@ -48,7 +47,8 @@ struct ir3_legalize_ctx {
|
|||
int max_bary;
|
||||
};
|
||||
|
||||
static void legalize(struct ir3_legalize_ctx *ctx)
|
||||
static void
|
||||
legalize(struct ir3_legalize_ctx *ctx)
|
||||
{
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3_instruction *last_input = NULL;
|
||||
|
|
@ -220,11 +220,11 @@ static void legalize(struct ir3_legalize_ctx *ctx)
|
|||
->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
|
||||
}
|
||||
|
||||
void ir3_block_legalize(struct ir3_block *block,
|
||||
bool *has_samp, int *max_bary)
|
||||
void
|
||||
ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary)
|
||||
{
|
||||
struct ir3_legalize_ctx ctx = {
|
||||
.block = block,
|
||||
.block = ir->block,
|
||||
.max_bary = -1,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -198,10 +198,10 @@ ir3_print(struct ir3 *ir)
|
|||
|
||||
print_block(block, 0);
|
||||
|
||||
for (unsigned i = 0; i < block->noutputs; i++) {
|
||||
if (!block->outputs[i])
|
||||
for (unsigned i = 0; i < ir->noutputs; i++) {
|
||||
if (!ir->outputs[i])
|
||||
continue;
|
||||
printf("out%d: ", i);
|
||||
print_instr(block->outputs[i], 0);
|
||||
print_instr(ir->outputs[i], 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -527,13 +527,13 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
static void
|
||||
ra_add_interference(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
struct ir3_block *block = ctx->ir->block;
|
||||
struct ir3 *ir = ctx->ir;
|
||||
|
||||
ra_block_compute_live_ranges(ctx, ctx->ir->block);
|
||||
|
||||
/* need to fix things up to keep outputs live: */
|
||||
for (unsigned i = 0; i < block->noutputs; i++) {
|
||||
struct ir3_instruction *instr = block->outputs[i];
|
||||
for (unsigned i = 0; i < ir->noutputs; i++) {
|
||||
struct ir3_instruction *instr = ir->outputs[i];
|
||||
struct ir3_instruction *defn;
|
||||
int cls, sz, off;
|
||||
|
||||
|
|
@ -682,10 +682,10 @@ ra_alloc(struct ir3_ra_ctx *ctx)
|
|||
* constraints/unknowns about setup for some of these regs:
|
||||
*/
|
||||
if (ctx->type == SHADER_FRAGMENT) {
|
||||
struct ir3_block *block = ctx->ir->block;
|
||||
struct ir3 *ir = ctx->ir;
|
||||
unsigned i = 0, j;
|
||||
if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
|
||||
struct ir3_instruction *instr = block->inputs[i];
|
||||
if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) {
|
||||
struct ir3_instruction *instr = ir->inputs[i];
|
||||
unsigned cls = size_to_class(1, true);
|
||||
unsigned name = ctx->class_base[cls] + instr->name;
|
||||
unsigned reg = ctx->set->gpr_to_ra_reg[cls][0];
|
||||
|
|
@ -695,8 +695,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
|
|||
i += 4;
|
||||
}
|
||||
|
||||
for (j = 0; i < block->ninputs; i++) {
|
||||
struct ir3_instruction *instr = block->inputs[i];
|
||||
for (j = 0; i < ir->ninputs; i++) {
|
||||
struct ir3_instruction *instr = ir->inputs[i];
|
||||
if (instr) {
|
||||
struct ir3_instruction *defn;
|
||||
int cls, sz, off;
|
||||
|
|
@ -725,14 +725,14 @@ ra_alloc(struct ir3_ra_ctx *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
||||
int ir3_ra(struct ir3 *ir, enum shader_t type,
|
||||
bool frag_coord, bool frag_face)
|
||||
{
|
||||
struct ir3_ra_ctx ctx = {
|
||||
.ir = block->shader,
|
||||
.ir = ir,
|
||||
.type = type,
|
||||
.frag_face = frag_face,
|
||||
.set = block->shader->compiler->set,
|
||||
.set = ir->compiler->set,
|
||||
};
|
||||
int ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -424,11 +424,11 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
int ir3_block_sched(struct ir3_block *block)
|
||||
int ir3_sched(struct ir3 *ir)
|
||||
{
|
||||
struct ir3_sched_ctx ctx = {0};
|
||||
ir3_clear_mark(block->shader);
|
||||
sched_block(&ctx, block);
|
||||
ir3_clear_mark(ir->block->shader);
|
||||
sched_block(&ctx, ir->block);
|
||||
if (ctx.error)
|
||||
return -1;
|
||||
return 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue