mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 05:20:09 +01:00
freedreno/ir3: rework varying packing
Originally we kept track of a table of inputs. But with new-style frag inputs this becomes awkward. Re-work it so that initially we assigned un-packed varying locations, and then after the shader is compiled scan to find actual used inputs, and re-pack. Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
91a1354cd6
commit
831f1a05c0
1 changed files with 98 additions and 30 deletions
|
|
@ -83,12 +83,12 @@ create_input(struct ir3_context *ctx, unsigned n)
|
|||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_frag_input(struct ir3_context *ctx, bool use_ldlv)
|
||||
create_frag_input(struct ir3_context *ctx, bool use_ldlv, unsigned n)
|
||||
{
|
||||
struct ir3_block *block = ctx->block;
|
||||
struct ir3_instruction *instr;
|
||||
/* actual inloc is assigned and fixed up later: */
|
||||
struct ir3_instruction *inloc = create_immed(block, 0);
|
||||
/* packed inloc is fixed up later: */
|
||||
struct ir3_instruction *inloc = create_immed(block, n);
|
||||
|
||||
if (use_ldlv) {
|
||||
instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
|
||||
|
|
@ -2275,7 +2275,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
|||
*/
|
||||
so->inputs[n].slot = VARYING_SLOT_VAR8;
|
||||
so->inputs[n].bary = true;
|
||||
instr = create_frag_input(ctx, false);
|
||||
instr = create_frag_input(ctx, false, idx);
|
||||
} else {
|
||||
bool use_ldlv = false;
|
||||
|
||||
|
|
@ -2304,7 +2304,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
|||
|
||||
so->inputs[n].bary = true;
|
||||
|
||||
instr = create_frag_input(ctx, use_ldlv);
|
||||
instr = create_frag_input(ctx, use_ldlv, idx);
|
||||
}
|
||||
|
||||
compile_assert(ctx, idx < ctx->ir->ninputs);
|
||||
|
|
@ -2326,6 +2326,92 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
|||
}
|
||||
}
|
||||
|
||||
/* Initially we assign non-packed inloc's for varyings, as we don't really
|
||||
* know up-front which components will be unused. After all the compilation
|
||||
* stages we scan the shader to see which components are actually used, and
|
||||
* re-pack the inlocs to eliminate unneeded varyings.
|
||||
*/
|
||||
static void
|
||||
pack_inlocs(struct ir3_context *ctx)
|
||||
{
|
||||
struct ir3_shader_variant *so = ctx->so;
|
||||
uint8_t used_components[so->inputs_count];
|
||||
|
||||
memset(used_components, 0, sizeof(used_components));
|
||||
|
||||
/*
|
||||
* First Step: scan shader to find which bary.f/ldlv remain:
|
||||
*/
|
||||
|
||||
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
if (is_input(instr)) {
|
||||
unsigned inloc = instr->regs[1]->iim_val;
|
||||
unsigned i = inloc / 4;
|
||||
unsigned j = inloc % 4;
|
||||
|
||||
compile_assert(ctx, instr->regs[1]->flags & IR3_REG_IMMED);
|
||||
compile_assert(ctx, i < so->inputs_count);
|
||||
|
||||
used_components[i] |= 1 << j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Second Step: reassign varying inloc/slots:
|
||||
*/
|
||||
|
||||
unsigned actual_in = 0;
|
||||
unsigned inloc = 0;
|
||||
|
||||
for (unsigned i = 0; i < so->inputs_count; i++) {
|
||||
unsigned compmask = 0, maxcomp = 0;
|
||||
|
||||
so->inputs[i].ncomp = 0;
|
||||
so->inputs[i].inloc = inloc;
|
||||
so->inputs[i].bary = false;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
if (!(used_components[i] & (1 << j)))
|
||||
continue;
|
||||
|
||||
compmask |= (1 << j);
|
||||
actual_in++;
|
||||
so->inputs[i].ncomp++;
|
||||
maxcomp = j + 1;
|
||||
|
||||
/* at this point, since used_components[i] mask is only
|
||||
* considering varyings (ie. not sysvals) we know this
|
||||
* is a varying:
|
||||
*/
|
||||
so->inputs[i].bary = true;
|
||||
}
|
||||
|
||||
if (so->inputs[i].bary) {
|
||||
so->varying_in++;
|
||||
so->inputs[i].compmask = (1 << maxcomp) - 1;
|
||||
inloc += maxcomp;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Third Step: reassign packed inloc's:
|
||||
*/
|
||||
|
||||
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
if (is_input(instr)) {
|
||||
unsigned inloc = instr->regs[1]->iim_val;
|
||||
unsigned i = inloc / 4;
|
||||
unsigned j = inloc % 4;
|
||||
|
||||
instr->regs[1]->iim_val = so->inputs[i].inloc + j;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
setup_output(struct ir3_context *ctx, nir_variable *out)
|
||||
{
|
||||
|
|
@ -2596,7 +2682,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
struct ir3_context *ctx;
|
||||
struct ir3 *ir;
|
||||
struct ir3_instruction **inputs;
|
||||
unsigned i, actual_in, inloc;
|
||||
unsigned i;
|
||||
int ret = 0, max_bary;
|
||||
|
||||
assert(!so->ir);
|
||||
|
|
@ -2741,6 +2827,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
ir3_print(ir);
|
||||
}
|
||||
|
||||
if (so->type == MESA_SHADER_FRAGMENT)
|
||||
pack_inlocs(ctx);
|
||||
|
||||
/* fixup input/outputs: */
|
||||
for (i = 0; i < so->outputs_count; i++) {
|
||||
/* sometimes we get outputs that don't write the .x coord, like:
|
||||
|
|
@ -2761,34 +2850,15 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
}
|
||||
|
||||
/* Note that some or all channels of an input may be unused: */
|
||||
actual_in = 0;
|
||||
inloc = 0;
|
||||
for (i = 0; i < so->inputs_count; i++) {
|
||||
unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
|
||||
so->inputs[i].ncomp = 0;
|
||||
so->inputs[i].inloc = inloc;
|
||||
unsigned j, reg = regid(63,0);
|
||||
for (j = 0; j < 4; j++) {
|
||||
struct ir3_instruction *in = inputs[(i*4) + j];
|
||||
|
||||
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
|
||||
compmask |= (1 << j);
|
||||
reg = in->regs[0]->num - j;
|
||||
actual_in++;
|
||||
so->inputs[i].ncomp++;
|
||||
if ((so->type == MESA_SHADER_FRAGMENT) && so->inputs[i].bary) {
|
||||
/* assign inloc: */
|
||||
assert(in->regs[1]->flags & IR3_REG_IMMED);
|
||||
in->regs[1]->iim_val = inloc + j;
|
||||
maxcomp = j + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((so->type == MESA_SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
|
||||
so->varying_in++;
|
||||
so->inputs[i].compmask = (1 << maxcomp) - 1;
|
||||
inloc += maxcomp;
|
||||
} else if (!so->inputs[i].sysval) {
|
||||
so->inputs[i].compmask = compmask;
|
||||
}
|
||||
so->inputs[i].regid = reg;
|
||||
}
|
||||
|
||||
|
|
@ -2808,9 +2878,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
so->branchstack = ctx->max_stack;
|
||||
|
||||
/* Note that actual_in counts inputs that are not bary.f'd for FS: */
|
||||
if (so->type == MESA_SHADER_VERTEX)
|
||||
so->total_in = actual_in;
|
||||
else
|
||||
if (so->type == MESA_SHADER_FRAGMENT)
|
||||
so->total_in = max_bary + 1;
|
||||
|
||||
so->max_sun = ir->max_sun;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue