broadcom/compiler: rework scratch lowering

Let's rely on nir_lower_mem_access_bit_sizes doing all the heavy work, so v3d_nir_lower_scratch can be cleaned up quite a lot. Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29711>
2026-05-05 05:18:08 +02:00 · 2024-06-13 11:27:47 +02:00 · 2024-06-13 11:27:47 +02:00 · 05b9705ae0
commit 05b9705ae0
parent 75196e86f1
3 changed files with 26 additions and 74 deletions
--- a/src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c
+++ b/src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c
@ -133,6 +133,16 @@ v3d_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
                  uint32_t align_offset, bool offset_is_const,
                  const void *cb_data)
 {
+        /* we only support single component 32 bit load/stores on scratch */
+        if (intrin == nir_intrinsic_load_scratch ||
+            intrin == nir_intrinsic_store_scratch) {
+                return (nir_mem_access_size_align){
+                        .num_components = 1,
+                        .bit_size = 32,
+                        .align = 4,
+                };
+        }
+
        align = nir_combined_align(align, align_offset);
        assert(util_is_power_of_two_nonzero(align));

@ -210,7 +220,7 @@ v3d_nir_lower_load_store_bitsize(nir_shader *s)
        nir_lower_mem_access_bit_sizes_options lower_options = {
                .modes = nir_var_mem_global | nir_var_mem_ssbo |
                         nir_var_mem_ubo | nir_var_mem_constant |
-                         nir_var_mem_shared,
+                         nir_var_mem_shared | nir_var_function_temp,
                .callback = v3d_size_align_cb,
        };

--- a/src/broadcom/compiler/v3d_nir_lower_scratch.c
+++ b/src/broadcom/compiler/v3d_nir_lower_scratch.c
@ -30,18 +30,17 @@
 *
 * Swizzles around the addresses of
 * nir_intrinsic_load_scratch/nir_intrinsic_store_scratch so that a QPU stores
- * a cacheline at a time per dword of scratch access, scalarizing and removing
- * writemasks in the process.
+ * a cacheline at a time per dword of scratch access.
 */

 static nir_def *
 v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
 {
-        bool is_store = instr->intrinsic == nir_intrinsic_store_scratch;
-        nir_def *offset = instr->src[is_store ? 1 : 0].ssa;
+        b->cursor = nir_before_instr(&instr->instr);
+        nir_def *offset = nir_get_io_offset_src(instr)->ssa;

        assert(nir_intrinsic_align_mul(instr) >= 4);
-        assert(nir_intrinsic_align_offset(instr) == 0);
+        assert(nir_intrinsic_align_offset(instr) % 4 == 0);

        /* The spill_offset register will already have the subgroup ID (EIDX)
         * shifted and ORed in at bit 2, so all we need to do is to move the
@ -51,67 +50,13 @@ v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
 }

 static void
-v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
+v3d_nir_lower_scratch_instr(nir_builder *b, nir_intrinsic_instr *instr)
 {
-        b->cursor = nir_before_instr(&instr->instr);
-
-        nir_def *offset = v3d_nir_scratch_offset(b,instr);
-
-        nir_def *chans[NIR_MAX_VEC_COMPONENTS];
-        for (int i = 0; i < instr->num_components; i++) {
-                nir_def *chan_offset =
-                        nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
-
-                nir_intrinsic_instr *chan_instr =
-                        nir_intrinsic_instr_create(b->shader, instr->intrinsic);
-                chan_instr->num_components = 1;
-                nir_def_init(&chan_instr->instr, &chan_instr->def, 1,
-                             instr->def.bit_size);
-
-                chan_instr->src[0] = nir_src_for_ssa(chan_offset);
-
-                nir_intrinsic_set_align(chan_instr, 4, 0);
-
-                nir_builder_instr_insert(b, &chan_instr->instr);
-
-                chans[i] = &chan_instr->def;
-        }
-
-        nir_def *result = nir_vec(b, chans, instr->num_components);
-        nir_def_rewrite_uses(&instr->def, result);
-        nir_instr_remove(&instr->instr);
-}
-
-static void
-v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr)
-{
-        b->cursor = nir_before_instr(&instr->instr);
+        /* scalarized through nir_lower_mem_access_bit_sizes */
+        assert(instr->num_components == 1);

        nir_def *offset = v3d_nir_scratch_offset(b, instr);
-        nir_def *value = instr->src[0].ssa;
-
-        for (int i = 0; i < instr->num_components; i++) {
-                if (!(nir_intrinsic_write_mask(instr) & (1 << i)))
-                        continue;
-
-                nir_def *chan_offset =
-                        nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
-
-                nir_intrinsic_instr *chan_instr =
-                        nir_intrinsic_instr_create(b->shader, instr->intrinsic);
-                chan_instr->num_components = 1;
-
-                chan_instr->src[0] = nir_src_for_ssa(nir_channel(b,
-                                                                 value,
-                                                                 i));
-                chan_instr->src[1] = nir_src_for_ssa(chan_offset);
-                nir_intrinsic_set_write_mask(chan_instr, 0x1);
-                nir_intrinsic_set_align(chan_instr, 4, 0);
-
-                nir_builder_instr_insert(b, &chan_instr->instr);
-        }
-
-        nir_instr_remove(&instr->instr);
+        nir_src_rewrite(nir_get_io_offset_src(instr), offset);
 }

 static bool
@ -121,10 +66,8 @@ v3d_nir_lower_scratch_cb(nir_builder *b,
 {
        switch (intr->intrinsic) {
        case nir_intrinsic_load_scratch:
-                v3d_nir_lower_load_scratch(b, intr);
-                return true;
        case nir_intrinsic_store_scratch:
-                v3d_nir_lower_store_scratch(b, intr);
+                v3d_nir_lower_scratch_instr(b, intr);
                return true;
        default:
                return false;
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@ -725,13 +725,7 @@ v3d_lower_nir(struct v3d_compile *c)
        }

        NIR_PASS(_, c->s, nir_lower_compute_system_values, NULL);
-
-        NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
-                 nir_var_function_temp,
-                 0,
-                 glsl_get_natural_size_align_bytes);
        NIR_PASS(_, c->s, nir_lower_is_helper_invocation);
-        NIR_PASS(_, c->s, v3d_nir_lower_scratch);
        NIR_PASS(_, c->s, v3d_nir_lower_null_pointers);
 }

@ -1708,10 +1702,15 @@ v3d_attempt_compile(struct v3d_compile *c)
                NIR_PASS(_, c->s, nir_lower_robust_access, &opts);
        }

+        NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
+                 nir_var_function_temp,
+                 0,
+                 glsl_get_natural_size_align_bytes);
+
        NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);
        NIR_PASS(_, c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
-
        NIR_PASS(_, c->s, v3d_nir_lower_load_store_bitsize);
+        NIR_PASS(_, c->s, v3d_nir_lower_scratch);

        NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);