mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 17:58:26 +02:00
freedreno/ir3: rework varying packing
Originally we kept track of a table of inputs. But with new-style frag inputs this becomes awkward. Re-work it so that initially we assigned un-packed varying locations, and then after the shader is compiled scan to find actual used inputs, and re-pack. Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
91a1354cd6
commit
831f1a05c0
1 changed files with 98 additions and 30 deletions
|
|
@ -83,12 +83,12 @@ create_input(struct ir3_context *ctx, unsigned n)
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ir3_instruction *
|
static struct ir3_instruction *
|
||||||
create_frag_input(struct ir3_context *ctx, bool use_ldlv)
|
create_frag_input(struct ir3_context *ctx, bool use_ldlv, unsigned n)
|
||||||
{
|
{
|
||||||
struct ir3_block *block = ctx->block;
|
struct ir3_block *block = ctx->block;
|
||||||
struct ir3_instruction *instr;
|
struct ir3_instruction *instr;
|
||||||
/* actual inloc is assigned and fixed up later: */
|
/* packed inloc is fixed up later: */
|
||||||
struct ir3_instruction *inloc = create_immed(block, 0);
|
struct ir3_instruction *inloc = create_immed(block, n);
|
||||||
|
|
||||||
if (use_ldlv) {
|
if (use_ldlv) {
|
||||||
instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
|
instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
|
||||||
|
|
@ -2275,7 +2275,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
||||||
*/
|
*/
|
||||||
so->inputs[n].slot = VARYING_SLOT_VAR8;
|
so->inputs[n].slot = VARYING_SLOT_VAR8;
|
||||||
so->inputs[n].bary = true;
|
so->inputs[n].bary = true;
|
||||||
instr = create_frag_input(ctx, false);
|
instr = create_frag_input(ctx, false, idx);
|
||||||
} else {
|
} else {
|
||||||
bool use_ldlv = false;
|
bool use_ldlv = false;
|
||||||
|
|
||||||
|
|
@ -2304,7 +2304,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
||||||
|
|
||||||
so->inputs[n].bary = true;
|
so->inputs[n].bary = true;
|
||||||
|
|
||||||
instr = create_frag_input(ctx, use_ldlv);
|
instr = create_frag_input(ctx, use_ldlv, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
compile_assert(ctx, idx < ctx->ir->ninputs);
|
compile_assert(ctx, idx < ctx->ir->ninputs);
|
||||||
|
|
@ -2326,6 +2326,92 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Initially we assign non-packed inloc's for varyings, as we don't really
|
||||||
|
* know up-front which components will be unused. After all the compilation
|
||||||
|
* stages we scan the shader to see which components are actually used, and
|
||||||
|
* re-pack the inlocs to eliminate unneeded varyings.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
pack_inlocs(struct ir3_context *ctx)
|
||||||
|
{
|
||||||
|
struct ir3_shader_variant *so = ctx->so;
|
||||||
|
uint8_t used_components[so->inputs_count];
|
||||||
|
|
||||||
|
memset(used_components, 0, sizeof(used_components));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First Step: scan shader to find which bary.f/ldlv remain:
|
||||||
|
*/
|
||||||
|
|
||||||
|
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
|
||||||
|
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||||
|
if (is_input(instr)) {
|
||||||
|
unsigned inloc = instr->regs[1]->iim_val;
|
||||||
|
unsigned i = inloc / 4;
|
||||||
|
unsigned j = inloc % 4;
|
||||||
|
|
||||||
|
compile_assert(ctx, instr->regs[1]->flags & IR3_REG_IMMED);
|
||||||
|
compile_assert(ctx, i < so->inputs_count);
|
||||||
|
|
||||||
|
used_components[i] |= 1 << j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Second Step: reassign varying inloc/slots:
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned actual_in = 0;
|
||||||
|
unsigned inloc = 0;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < so->inputs_count; i++) {
|
||||||
|
unsigned compmask = 0, maxcomp = 0;
|
||||||
|
|
||||||
|
so->inputs[i].ncomp = 0;
|
||||||
|
so->inputs[i].inloc = inloc;
|
||||||
|
so->inputs[i].bary = false;
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < 4; j++) {
|
||||||
|
if (!(used_components[i] & (1 << j)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
compmask |= (1 << j);
|
||||||
|
actual_in++;
|
||||||
|
so->inputs[i].ncomp++;
|
||||||
|
maxcomp = j + 1;
|
||||||
|
|
||||||
|
/* at this point, since used_components[i] mask is only
|
||||||
|
* considering varyings (ie. not sysvals) we know this
|
||||||
|
* is a varying:
|
||||||
|
*/
|
||||||
|
so->inputs[i].bary = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (so->inputs[i].bary) {
|
||||||
|
so->varying_in++;
|
||||||
|
so->inputs[i].compmask = (1 << maxcomp) - 1;
|
||||||
|
inloc += maxcomp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Third Step: reassign packed inloc's:
|
||||||
|
*/
|
||||||
|
|
||||||
|
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
|
||||||
|
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||||
|
if (is_input(instr)) {
|
||||||
|
unsigned inloc = instr->regs[1]->iim_val;
|
||||||
|
unsigned i = inloc / 4;
|
||||||
|
unsigned j = inloc % 4;
|
||||||
|
|
||||||
|
instr->regs[1]->iim_val = so->inputs[i].inloc + j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_output(struct ir3_context *ctx, nir_variable *out)
|
setup_output(struct ir3_context *ctx, nir_variable *out)
|
||||||
{
|
{
|
||||||
|
|
@ -2596,7 +2682,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||||
struct ir3_context *ctx;
|
struct ir3_context *ctx;
|
||||||
struct ir3 *ir;
|
struct ir3 *ir;
|
||||||
struct ir3_instruction **inputs;
|
struct ir3_instruction **inputs;
|
||||||
unsigned i, actual_in, inloc;
|
unsigned i;
|
||||||
int ret = 0, max_bary;
|
int ret = 0, max_bary;
|
||||||
|
|
||||||
assert(!so->ir);
|
assert(!so->ir);
|
||||||
|
|
@ -2741,6 +2827,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||||
ir3_print(ir);
|
ir3_print(ir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (so->type == MESA_SHADER_FRAGMENT)
|
||||||
|
pack_inlocs(ctx);
|
||||||
|
|
||||||
/* fixup input/outputs: */
|
/* fixup input/outputs: */
|
||||||
for (i = 0; i < so->outputs_count; i++) {
|
for (i = 0; i < so->outputs_count; i++) {
|
||||||
/* sometimes we get outputs that don't write the .x coord, like:
|
/* sometimes we get outputs that don't write the .x coord, like:
|
||||||
|
|
@ -2761,34 +2850,15 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note that some or all channels of an input may be unused: */
|
/* Note that some or all channels of an input may be unused: */
|
||||||
actual_in = 0;
|
|
||||||
inloc = 0;
|
|
||||||
for (i = 0; i < so->inputs_count; i++) {
|
for (i = 0; i < so->inputs_count; i++) {
|
||||||
unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
|
unsigned j, reg = regid(63,0);
|
||||||
so->inputs[i].ncomp = 0;
|
|
||||||
so->inputs[i].inloc = inloc;
|
|
||||||
for (j = 0; j < 4; j++) {
|
for (j = 0; j < 4; j++) {
|
||||||
struct ir3_instruction *in = inputs[(i*4) + j];
|
struct ir3_instruction *in = inputs[(i*4) + j];
|
||||||
|
|
||||||
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
|
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
|
||||||
compmask |= (1 << j);
|
|
||||||
reg = in->regs[0]->num - j;
|
reg = in->regs[0]->num - j;
|
||||||
actual_in++;
|
|
||||||
so->inputs[i].ncomp++;
|
|
||||||
if ((so->type == MESA_SHADER_FRAGMENT) && so->inputs[i].bary) {
|
|
||||||
/* assign inloc: */
|
|
||||||
assert(in->regs[1]->flags & IR3_REG_IMMED);
|
|
||||||
in->regs[1]->iim_val = inloc + j;
|
|
||||||
maxcomp = j + 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((so->type == MESA_SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
|
|
||||||
so->varying_in++;
|
|
||||||
so->inputs[i].compmask = (1 << maxcomp) - 1;
|
|
||||||
inloc += maxcomp;
|
|
||||||
} else if (!so->inputs[i].sysval) {
|
|
||||||
so->inputs[i].compmask = compmask;
|
|
||||||
}
|
|
||||||
so->inputs[i].regid = reg;
|
so->inputs[i].regid = reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2808,9 +2878,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||||
so->branchstack = ctx->max_stack;
|
so->branchstack = ctx->max_stack;
|
||||||
|
|
||||||
/* Note that actual_in counts inputs that are not bary.f'd for FS: */
|
/* Note that actual_in counts inputs that are not bary.f'd for FS: */
|
||||||
if (so->type == MESA_SHADER_VERTEX)
|
if (so->type == MESA_SHADER_FRAGMENT)
|
||||||
so->total_in = actual_in;
|
|
||||||
else
|
|
||||||
so->total_in = max_bary + 1;
|
so->total_in = max_bary + 1;
|
||||||
|
|
||||||
so->max_sun = ir->max_sun;
|
so->max_sun = ir->max_sun;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue