diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 9283696a108..459491ae517 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -5290,14 +5290,6 @@ fixup_tg4(struct ir3_context *ctx) } } -static bool -output_slot_used_for_binning(gl_varying_slot slot) -{ - return slot == VARYING_SLOT_POS || slot == VARYING_SLOT_PSIZ || - slot == VARYING_SLOT_CLIP_DIST0 || slot == VARYING_SLOT_CLIP_DIST1 || - slot == VARYING_SLOT_VIEWPORT; -} - static struct ir3_instruction * find_end(struct ir3 *ir) { @@ -5310,48 +5302,6 @@ find_end(struct ir3 *ir) unreachable("couldn't find end instruction"); } -static void -fixup_binning_pass(struct ir3_context *ctx, struct ir3_instruction *end) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned i, j; - - /* first pass, remove unused outputs from the IR level outputs: */ - for (i = 0, j = 0; i < end->srcs_count; i++) { - unsigned outidx = end->end.outidxs[i]; - unsigned slot = so->outputs[outidx].slot; - - if (output_slot_used_for_binning(slot)) { - end->srcs[j] = end->srcs[i]; - end->end.outidxs[j] = end->end.outidxs[i]; - j++; - } - } - end->srcs_count = j; - - /* second pass, cleanup the unused slots in ir3_shader_variant::outputs - * table: - */ - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned slot = so->outputs[i].slot; - - if (output_slot_used_for_binning(slot)) { - so->outputs[j] = so->outputs[i]; - - /* fixup outidx to point to new output table entry: */ - for (unsigned k = 0; k < end->srcs_count; k++) { - if (end->end.outidxs[k] == i) { - end->end.outidxs[k] = j; - break; - } - } - - j++; - } - } - so->outputs_count = j; -} - static void collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir) { @@ -5601,10 +5551,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, memcpy(end->end.outidxs, outidxs, sizeof(unsigned) * outputs_count); array_insert(ctx->block, ctx->block->keeps, end); - - /* at this point, for binning pass, throw away unneeded outputs: */ - if (so->binning_pass && (ctx->compiler->gen < 6)) - fixup_binning_pass(ctx, end); } if (so->type == MESA_SHADER_FRAGMENT && @@ -5637,18 +5583,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, progress |= IR3_PASS(ir, ir3_shared_fold); } while (progress); - /* at this point, for binning pass, throw away unneeded outputs: - * Note that for a6xx and later, we do this after ir3_cp to ensure - * that the uniform/constant layout for BS and VS matches, so that - * we can re-use same VS_CONST state group. - */ - if (so->binning_pass && (ctx->compiler->gen >= 6)) { - fixup_binning_pass(ctx, find_end(ctx->so->ir)); - /* cleanup the result of removing unneeded outputs: */ - while (IR3_PASS(ir, ir3_dce, so)) { - } - } - IR3_PASS(ir, ir3_sched_add_deps); /* At this point, all the dead code should be long gone: */ diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 3407b4fe5f6..fb6a41cbd3b 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -810,6 +810,36 @@ lower_ucp_vs(struct ir3_shader_variant *so) return so->type == last_geom_stage; } +static bool +output_slot_used_for_binning(gl_varying_slot slot) +{ + return slot == VARYING_SLOT_POS || slot == VARYING_SLOT_PSIZ || + slot == VARYING_SLOT_CLIP_DIST0 || slot == VARYING_SLOT_CLIP_DIST1 || + slot == VARYING_SLOT_VIEWPORT; +} + +static bool +remove_nonbinning_output(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics io = nir_intrinsic_io_semantics(intr); + + if (output_slot_used_for_binning(io.location)) + return false; + + nir_instr_remove(&intr->instr); + return true; +} + +static bool +lower_binning(nir_shader *s) +{ + return nir_shader_intrinsics_pass(s, remove_nonbinning_output, + nir_metadata_control_flow, NULL); +} + void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) { @@ -856,6 +886,15 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) } } + if (so->binning_pass) { + if (OPT(s, lower_binning)) { + progress = true; + + /* outputs_written has changed. */ + nir_shader_gather_info(s, nir_shader_get_entrypoint(s)); + } + } + /* Note that it is intentional to use the VS lowering pass for GS, since we * lower GS into something that looks more like a VS in ir3_nir_lower_gs(): */