pan/midgard: Generate MRT writeout loops

They need a very particular form; the naive way we did before is not sufficient in practice, it doesn't look like. So let's follow the rough structure of the blob's writeout since this is fixed code anyway. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
2026-01-23 08:20:25 +01:00 · 2020-01-02 12:27:59 -05:00 · 2020-01-02 12:27:59 -05:00 · 5bc62af2a0
commit 5bc62af2a0
parent db879b034a
5 changed files with 84 additions and 31 deletions
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@ -110,6 +110,7 @@ typedef struct midgard_instruction {

        bool compact_branch;
        bool writeout;
+        bool last_writeout;

        /* Kind of a hack, but hint against aggressive DCE */
        bool dont_eliminate;
@ -218,6 +219,7 @@ typedef struct midgard_bundle {
        bool has_embedded_constants;
        float constants[4];
        bool has_blend_constant;
+        bool last_writeout;
 } midgard_bundle;

 typedef struct compiler_context {
@ -303,6 +305,9 @@ typedef struct compiler_context {

        /* Model-specific quirk set */
        uint32_t quirks;
+
+        /* Writeout instructions for each render target */
+        midgard_instruction *writeout_branch[4];
 } compiler_context;

 /* Per-block live_in/live_out */
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -1331,11 +1331,6 @@ compute_builtin_arg(nir_op op)
        }
 }

-/* Emit store for a fragment shader, which is encoded via a fancy branch. TODO:
- * Handle MRT here */
-static void
-emit_fragment_epilogue(compiler_context *ctx, unsigned rt);
-
 static void
 emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
 {
@ -1353,9 +1348,15 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
        /* Emit the branch */
        midgard_instruction *br = emit_mir_instruction(ctx, ins);
        schedule_barrier(ctx);
-        br->branch.target_block = ctx->block_count - 1;

-        emit_fragment_epilogue(ctx, rt);
+        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+        assert(!ctx->writeout_branch[rt]);
+        ctx->writeout_branch[rt] = br;
+
+        /* Push our current location = current block count - 1 = where we'll
+         * jump to. Maybe a bit too clever for my own good */
+
+        br->branch.target_block = ctx->block_count - 1;
 }

 static void
@ -2284,28 +2285,20 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
        return progress;
 }

-static void
+static unsigned
 emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
 {
-        /* Include a move to specify the render target */
-
-        if (rt > 0) {
-                midgard_instruction rt_move = v_mov(SSA_FIXED_REGISTER(1),
-                                SSA_FIXED_REGISTER(1));
-                rt_move.mask = 1 << COMPONENT_Z;
-                rt_move.unit = UNIT_SADD;
-                emit_mir_instruction(ctx, rt_move);
-        }
-
        /* Loop to ourselves */

        struct midgard_instruction ins = v_branch(false, false);
        ins.writeout = true;
        ins.branch.target_block = ctx->block_count - 1;
+        ins.constants[0] = rt * 0x100;
        emit_mir_instruction(ctx, ins);

        ctx->current_block->epilogue = true;
        schedule_barrier(ctx);
+        return ins.branch.target_block;
 }

 static midgard_block *
@ -2557,6 +2550,36 @@ pan_format_from_glsl(const struct glsl_type *type)
                MALI_NR_CHANNELS(4);
 }

+/* For each fragment writeout instruction, generate a writeout loop to
+ * associate with it */
+
+static void
+mir_add_writeout_loops(compiler_context *ctx)
+{
+        for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
+                midgard_instruction *br = ctx->writeout_branch[rt];
+                if (!br) continue;
+
+                unsigned popped = br->branch.target_block;
+                midgard_block_add_successor(mir_get_block(ctx, popped - 1), ctx->current_block);
+                br->branch.target_block = emit_fragment_epilogue(ctx, rt);
+
+                /* If we have more RTs, we'll need to restore back after our
+                 * loop terminates */
+
+                if ((rt + 1) < ARRAY_SIZE(ctx->writeout_branch) && ctx->writeout_branch[rt + 1]) {
+                        midgard_instruction uncond = v_branch(false, false);
+                        uncond.branch.target_block = popped;
+                        emit_mir_instruction(ctx, uncond);
+                        midgard_block_add_successor(ctx->current_block, mir_get_block(ctx, popped));
+                        schedule_barrier(ctx);
+                } else {
+                        /* We're last, so we can terminate here */
+                        br->last_writeout = true;
+                }
+        }
+}
+
 int
 midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb)
 {
@ -2700,6 +2723,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                assert(!ins->invert);
        }

+        if (ctx->stage == MESA_SHADER_FRAGMENT)
+                mir_add_writeout_loops(ctx);
+
        /* Schedule! */
        schedule_program(ctx);
        mir_ra(ctx);
@ -2836,22 +2862,14 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl

        /* Midgard prefetches instruction types, so during emission we
         * need to lookahead. Unless this is the last instruction, in
-         * which we return 1. Or if this is the second to last and the
-         * last is an ALU, then it's also 1... */
+         * which we return 1. */

        mir_foreach_block(ctx, block) {
                mir_foreach_bundle_in_block(block, bundle) {
                        int lookahead = 1;

-                        if (current_bundle + 1 < bundle_count) {
-                                uint8_t next = source_order_bundles[current_bundle + 1]->tag;
-
-                                if (!(current_bundle + 2 < bundle_count) && IS_ALU(next)) {
-                                        lookahead = 1;
-                                } else {
-                                        lookahead = next;
-                                }
-                        }
+                        if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
+                                lookahead = source_order_bundles[current_bundle + 1]->tag;

                        emit_binary_bundle(ctx, bundle, compiled, lookahead);
                        ++current_bundle;
--- a/src/panfrost/midgard/midgard_liveness.c
+++ b/src/panfrost/midgard/midgard_liveness.c
@ -153,14 +153,20 @@ mir_compute_liveness(compiler_context *ctx)

                /* If we made progress, we need to process the predecessors */

-                if (progress || (blk == exit) || blk->epilogue) {
+                if (progress || !blk->visited) {
                        mir_foreach_predecessor(blk, pred)
                                _mesa_set_add(work_list, pred);
                }
+
+                blk->visited = true;
        } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);

        /* Liveness is now valid */
        ctx->metadata |= MIDGARD_METADATA_LIVENESS;
+
+        mir_foreach_block(ctx, block) {
+                block->visited = false;
+        }
 }

 /* Once liveness data is no longer valid, call this */
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@ -380,6 +380,27 @@ mir_compute_interference(
        /* First, we need liveness information to be computed per block */
        mir_compute_liveness(ctx);

+        /* We need to force r1.w live throughout a blend shader */
+
+        if (ctx->is_blend) {
+                unsigned r1w = ~0;
+
+                mir_foreach_block(ctx, block) {
+                        mir_foreach_instr_in_block_rev(block, ins) {
+                                if (ins->writeout)
+                                        r1w = ins->src[2];
+                        }
+
+                        if (r1w != ~0)
+                                break;
+                }
+
+                mir_foreach_instr_global(ctx, ins) {
+                        if (ins->dest < ctx->temp_count)
+                                lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w, 0xF);
+                }
+        }
+
        /* Now that every block has live_in/live_out computed, we can determine
         * interference by walking each block linearly. Take live_out at the
         * end of each block and walk the block backwards. */
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@ -890,6 +890,9 @@ mir_schedule_alu(
                mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);

        if (writeout) {
+                /* Propagate up */
+                bundle.last_writeout = branch->last_writeout;
+
                midgard_instruction add = v_mov(~0, make_compiler_temp(ctx));

                if (!ctx->is_blend) {
@ -938,7 +941,7 @@ mir_schedule_alu(

        /* If we have a render target reference, schedule a move for it */

-        if (branch && branch->writeout && branch->constants[0]) {
+        if (branch && branch->writeout && (branch->constants[0] || ctx->is_blend)) {
                midgard_instruction mov = v_mov(~0, make_compiler_temp(ctx));
                sadd = mem_dup(&mov, sizeof(midgard_instruction));
                sadd->unit = UNIT_SADD;