From 6a12ea02feeebac4be8986616327db31926b3ce7 Mon Sep 17 00:00:00 2001
From: Italo Nicola <italonicola@collabora.com>
Date: Fri, 16 Apr 2021 10:23:48 +0000
Subject: [PATCH] pan/mdg: properly encode/decode ldst instructions

Signed-off-by: Italo Nicola <italonicola@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9461>
---
 src/panfrost/midgard/compiler.h               |  13 +-
 src/panfrost/midgard/disassemble.c            | 295 ++++++++++++------
 src/panfrost/midgard/helpers.h                |  43 ++-
 src/panfrost/midgard/midgard.h                |  84 +++--
 src/panfrost/midgard/midgard_address.c        |  39 ++-
 src/panfrost/midgard/midgard_compile.c        | 113 ++++---
 src/panfrost/midgard/midgard_emit.c           |  87 ++++--
 .../midgard/midgard_opt_perspective.c         |  11 +-
 src/panfrost/midgard/midgard_ra.c             |   6 +-
 src/panfrost/midgard/midgard_schedule.c       |  17 +-
 src/panfrost/midgard/mir_promote_uniforms.c   |   4 +-
 11 files changed, 465 insertions(+), 247 deletions(-)

diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 5a5623c2cd3..828464a26a5 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -516,9 +516,9 @@ void mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block
 void mir_flip(midgard_instruction *ins);
 void mir_compute_temp_count(compiler_context *ctx);
 
-#define LDST_GLOBAL 0x3E
-#define LDST_SHARED 0x2E
-#define LDST_SCRATCH 0x2A
+#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2)
+#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
+#define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z)
 
 void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg);
 void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
@@ -573,8 +573,11 @@ v_load_store_scratch(
                 .op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
                 .load_store = {
                         /* For register spilling - to thread local storage */
-                        .arg_1 = 0xEA,
-                        .arg_2 = 0x1E,
+                        .arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
+                        .arg_comp = COMPONENT_Z,
+                        .bitsize_toggle = true,
+                        .index_format = midgard_index_address_u32,
+                        .index_reg = REGISTER_LDST_ZERO,
                 },
 
                 /* If we spill an unspill, RA goes into an infinite loop */
diff --git a/src/panfrost/midgard/disassemble.c b/src/panfrost/midgard/disassemble.c
index b784f99f014..1e8e8a48e42 100644
--- a/src/panfrost/midgard/disassemble.c
+++ b/src/panfrost/midgard/disassemble.c
@@ -304,6 +304,13 @@ static char *argmod_names[3] = {
         ".x2",
 };
 
+static char *index_format_names[4] = {
+        "",
+        ".u64",
+        ".u32",
+        ".s32"
+};
+
 static void
 print_outmod(FILE *fp, unsigned outmod, bool is_int)
 {
@@ -760,6 +767,21 @@ print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink
                 fprintf(fp, " /* %X */", mask);
 }
 
+/* TODO: 16-bit mode */
+static void
+print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) {
+        fprintf(fp, ".");
+
+        for (unsigned i = 0; i < 4; ++i) {
+                bool write = (mask & (1 << i)) != 0;
+                unsigned c = (swizzle >> (i * 2)) & 3;
+                /* We can't omit the swizzle here since many ldst ops have a
+                 * combined swizzle/writemask, and it would be ambiguous to not
+                 * print the masked-out components. */
+                fprintf(fp, "%c", write ? components[c] : '~');
+        }
+}
+
 /* Prints the 4-bit masks found in texture and load/store ops, as opposed to
  * the 8-bit masks found in (vector) ALU ops. Supports texture-style 16-bit
  * mode as well, but not load/store-style 16-bit mode. */
@@ -1235,41 +1257,40 @@ print_alu_word(FILE *fp, uint32_t *words, unsigned num_quad_words,
         return branch_forward;
 }
 
-static void
+/* TODO: how can we use this now that we know that these params can't be known
+ * before run time in every single case? Maybe just use it in the cases we can? */
+UNUSED static void
 print_varying_parameters(FILE *fp, midgard_load_store_word *word)
 {
-        midgard_varying_parameter param;
-        unsigned v = word->varying_parameters;
-        memcpy(&param, &v, sizeof(param));
+        midgard_varying_params p = midgard_unpack_varying_params(*word);
 
-        if (param.is_varying) {
-                /* If a varying, there are qualifiers */
-                if (param.flat)
-                        fprintf(fp, ".flat");
+        /* If a varying, there are qualifiers */
+        if (p.flat_shading)
+                fprintf(fp, ".flat");
 
-                if (param.interpolation != midgard_interp_default) {
-                        if (param.interpolation == midgard_interp_centroid)
-                                fprintf(fp, ".centroid");
-                        else if (param.interpolation == midgard_interp_sample)
-                                fprintf(fp, ".sample");
-                        else
-                                fprintf(fp, ".interp%d", param.interpolation);
-                }
+        if (p.perspective_correction)
+                fprintf(fp, ".correction");
 
-                if (param.modifier != midgard_varying_mod_none) {
-                        if (param.modifier == midgard_varying_mod_perspective_w)
-                                fprintf(fp, ".perspectivew");
-                        else if (param.modifier == midgard_varying_mod_perspective_z)
-                                fprintf(fp, ".perspectivez");
-                        else
-                                fprintf(fp, ".mod%d", param.modifier);
-                }
-        } else if (param.flat || param.interpolation || param.modifier) {
-                fprintf(fp, " /* is_varying not set but varying metadata attached */");
+        if (p.centroid_mapping)
+                fprintf(fp, ".centroid");
+
+        if (p.interpolate_sample)
+                fprintf(fp, ".sample");
+
+        switch (p.modifier) {
+                case midgard_varying_mod_perspective_y:
+                        fprintf(fp, ".perspectivey");
+                        break;
+                case midgard_varying_mod_perspective_z:
+                        fprintf(fp, ".perspectivez");
+                        break;
+                case midgard_varying_mod_perspective_w:
+                        fprintf(fp, ".perspectivew");
+                        break;
+                default:
+                        unreachable("invalid varying modifier");
+                        break;
         }
-
-        if (param.zero0 || param.zero1 || param.zero2)
-                fprintf(fp, " /* zero tripped, %u %u %u */ ", param.zero0, param.zero1, param.zero2);
 }
 
 static bool
@@ -1304,33 +1325,14 @@ is_op_attribute(unsigned op)
         return false;
 }
 
+/* Helper to print integer well-formatted, but only when non-zero. */
 static void
-print_load_store_arg(FILE *fp, uint8_t arg, unsigned index)
+midgard_print_sint(FILE *fp, int n)
 {
-        /* Try to interpret as a register */
-        midgard_ldst_register_select sel;
-        memcpy(&sel, &arg, sizeof(arg));
-
-        /* If unknown is set, we're not sure what this is or how to
-         * interpret it. But if it's zero, we get it. */
-
-        if (sel.unknown) {
-                fprintf(fp, "0x%02X", arg);
-                return;
-        }
-
-        print_ldst_read_reg(fp, sel.select);
-        fprintf(fp, ".%c", components[sel.component]);
-
-        /* Only print a shift if it's non-zero. Shifts only make sense for the
-         * second index. For the first, we're not sure what it means yet */
-
-        if (index == 1) {
-                if (sel.shift)
-                        fprintf(fp, " << %u", sel.shift);
-        } else {
-                fprintf(fp, " /* %X */", sel.shift);
-        }
+        if (n > 0)
+                fprintf(fp, " + 0x%X", n);
+        else if (n < 0)
+                fprintf(fp, " - 0x%X", -n);
 }
 
 static void
@@ -1347,63 +1349,160 @@ print_load_store_instr(FILE *fp, uint64_t data)
 
         print_ld_st_opcode(fp, word->op);
 
-        unsigned address = word->address;
+        if (word->op == midgard_op_trap) {
+                fprintf(fp, " 0x%X\n", word->signed_offset);
+                return;
+        }
+
+        /* Print opcode modifiers */
+
+        if (OP_USES_ATTRIB(word->op)) /* which attrib table? */
+                fprintf(fp, ".%s", (word->index_format >> 1) ? "secondary" : "primary");
+        else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op))
+                fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");
+
+        fprintf(fp, " ");
+
+        /* src/dest register */
+
+        if (!OP_IS_STORE(word->op)) {
+                print_ldst_write_reg(fp, word->reg);
+
+                /* Some opcodes don't have a swizzable src register, and
+                 * instead the swizzle is applied before the result is written
+                 * to the dest reg. For these ops, we combine the writemask
+                 * with the swizzle to display them in the disasm compactly. */
+                unsigned swizzle = word->swizzle;
+                if ((OP_IS_REG2REG_LDST(word->op) &&
+                        word->op != midgard_op_lea &&
+                        word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op))
+                        swizzle = 0xE4;
+                print_ldst_mask(fp, word->mask, swizzle);
+        } else {
+                print_ldst_read_reg(fp, word->reg);
+                print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
+                                  midgard_reg_mode_32, 0xFF);
+        }
+
+        /* ld_ubo args */
+        if (OP_IS_UBO_READ(word->op)) {
+                if (word->signed_offset & 1) { /* buffer index imm */
+                        unsigned imm = midgard_unpack_ubo_index_imm(*word);
+                        fprintf(fp, ", %u", imm);
+                } else { /* buffer index from reg */
+                        fprintf(fp, ", ");
+                        print_ldst_read_reg(fp, word->arg_reg);
+                        fprintf(fp, ".%c", components[word->arg_comp]);
+                }
+
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->index_reg);
+                fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
+                midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));
+        }
+
+        /* mem addr expression */
+        if (OP_HAS_ADDRESS(word->op)) {
+                fprintf(fp, ", [ ");
+                print_ldst_read_reg(fp, word->arg_reg);
+                fprintf(fp, ".u%d.%c",
+                        word->bitsize_toggle ? 64 : 32, components[word->arg_comp]);
+
+                if ((word->op < midgard_op_atomic_cmpxchg ||
+                     word->op > midgard_op_atomic_cmpxchg64_be) &&
+                     word->index_reg != 0x7) {
+                        fprintf(fp, " + (");
+                        print_ldst_read_reg(fp, word->index_reg);
+                        fprintf(fp, "%s.%c << %u)",
+                                index_format_names[word->index_format],
+                                components[word->index_comp], word->index_shift);
+                }
+
+                midgard_print_sint(fp, word->signed_offset);
+
+                fprintf(fp, " ]");
+        }
+
+        /* src reg for reg2reg ldst opcodes */
+        if (OP_IS_REG2REG_LDST(word->op)) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->arg_reg);
+                print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
+                                  midgard_reg_mode_32, 0xFF);
+        }
+
+        /* atomic ops encode the source arg where the ldst swizzle would be. */
+        if (OP_IS_ATOMIC(word->op)) {
+                unsigned src = (word->swizzle >> 2) & 0x7;
+                unsigned src_comp = word->swizzle & 0x3;
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, src);
+                fprintf(fp, ".%c", components[src_comp]);
+        }
+
+        /* CMPXCHG encodes the extra comparison arg where the index reg would be. */
+        if (word->op >= midgard_op_atomic_cmpxchg &&
+            word->op <= midgard_op_atomic_cmpxchg64_be) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->index_reg);
+                fprintf(fp, ".%c", components[word->index_comp]);
+        }
+
+        /* index reg for attr/vary/images, selector for ld/st_special */
+        if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->index_reg);
+                fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
+                midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
+        }
+
+        /* vertex reg for attrib/varying ops, coord reg for image ops */
+        if (OP_USES_ATTRIB(word->op)) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->arg_reg);
+
+                if (OP_IS_IMAGE(word->op))
+                        fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);
+
+                fprintf(fp, ".%c", components[word->arg_comp]);
+
+                if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))
+                        midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));
+        }
+
+        /* TODO: properly decode format specifier for PACK/UNPACK ops */
+        if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {
+                fprintf(fp, ", ");
+                unsigned format_specifier = (word->signed_offset << 4) | word->index_shift;
+                fprintf(fp, "0x%X", format_specifier);
+        }
+
+        fprintf(fp, "\n");
+
+        /* Debugging stuff */
 
         if (is_op_varying(word->op)) {
-                print_varying_parameters(fp, word);
+                /* Do some analysis: check if direct access */
 
-                /* Do some analysis: check if direct cacess */
-
-                if ((word->arg_2 == 0x1E) && midg_stats.varying_count >= 0)
-                        update_stats(&midg_stats.varying_count, address);
+                if (word->index_reg == 0x7 && midg_stats.varying_count >= 0)
+                        update_stats(&midg_stats.varying_count,
+                                     UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
                 else
                         midg_stats.varying_count = -16;
         } else if (is_op_attribute(word->op)) {
-                if ((word->arg_2 == 0x1E) && midg_stats.attribute_count >= 0)
-                        update_stats(&midg_stats.attribute_count, address);
+                if (word->index_reg == 0x7 && midg_stats.attribute_count >= 0)
+                        update_stats(&midg_stats.attribute_count,
+                                     UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
                 else
                         midg_stats.attribute_count = -16;
         }
 
-        fprintf(fp, " ");
-
-        if (!OP_IS_STORE(word->op))
-                print_ldst_write_reg(fp, word->reg);
-        else
-                print_ldst_read_reg(fp, word->reg);
-
-        print_mask_4(fp, word->mask, false);
-
         if (!OP_IS_STORE(word->op))
                 update_dest(word->reg);
 
-        bool is_ubo = OP_IS_UBO_READ(word->op);
-
-        if (is_ubo) {
-                /* UBOs use their own addressing scheme */
-
-                int lo = word->varying_parameters >> 7;
-                int hi = word->address;
-
-                /* TODO: Combine fields logically */
-                address = (hi << 3) | lo;
-        }
-
-        fprintf(fp, ", %u", address);
-
-        print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF);
-
-        fprintf(fp, ", ");
-
-        if (is_ubo) {
-                fprintf(fp, "ubo%u", word->arg_1);
-                update_stats(&midg_stats.uniform_buffer_count, word->arg_1);
-        } else
-                print_load_store_arg(fp, word->arg_1, 0);
-
-        fprintf(fp, ", ");
-        print_load_store_arg(fp, word->arg_2, 1);
-        fprintf(fp, " /* %X */\n", word->varying_parameters);
+        if (OP_IS_UBO_READ(word->op))
+                update_stats(&midg_stats.uniform_buffer_count,
+                             UNPACK_LDST_UBO_OFS(word->signed_offset));
 
         midg_stats.instruction_count++;
 }
diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h
index fffe0d21142..c8efbf60ca9 100644
--- a/src/panfrost/midgard/helpers.h
+++ b/src/panfrost/midgard/helpers.h
@@ -355,9 +355,9 @@ mir_is_simple_swizzle(unsigned *swizzle, unsigned mask)
 /* Packs a load/store argument */
 
 static inline uint8_t
-midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
+midgard_ldst_comp(unsigned reg, unsigned component, unsigned size)
 {
-        assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1));
+        assert((reg & ~1) == 0);
         assert(size == 16 || size == 32 || size == 64);
 
         /* Shift so everything is in terms of 32-bit units */
@@ -369,17 +369,38 @@ midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
                 component >>= 1;
         }
 
-        midgard_ldst_register_select sel = {
-                .component = component,
-                .select = reg - 26
-        };
-
-        uint8_t packed;
-        memcpy(&packed, &sel, sizeof(packed));
-
-        return packed;
+        return component;
 }
 
+/* Packs/unpacks a ubo index immediate */
+
+void midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index);
+unsigned midgard_unpack_ubo_index_imm(midgard_load_store_word word);
+
+/* Packs/unpacks varying parameters.
+ * FIXME: IMPORTANT: We currently handle varying mode weirdly, by passing all
+ * parameters via an offset and using REGISTER_LDST_ZERO as base. This works
+ * for most parameters, but does not allow us to encode/decode direct sample
+ * position. */
+void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p);
+midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word);
+
+/* Load/store ops' displacement helpers.
+ * This is useful because different types of load/store ops have different
+ * displacement bitsize. */
+
+#define UNPACK_LDST_ATTRIB_OFS(a) ((a) >> 9)
+#define UNPACK_LDST_VERTEX_OFS(a) util_sign_extend((a) & 0x1FF, 9)
+#define UNPACK_LDST_SELECTOR_OFS(a) ((a) >> 9)
+#define UNPACK_LDST_UBO_OFS(a) ((a) >> 2)
+#define UNPACK_LDST_MEM_OFS(a) ((a))
+
+#define PACK_LDST_ATTRIB_OFS(a) ((a) << 9)
+#define PACK_LDST_VERTEX_OFS(a) ((a) & 0x1FF)
+#define PACK_LDST_SELECTOR_OFS(a) ((a) << 9)
+#define PACK_LDST_UBO_OFS(a) ((a) << 2)
+#define PACK_LDST_MEM_OFS(a) ((a))
+
 static inline bool
 midgard_is_branch_unit(unsigned unit)
 {
diff --git a/src/panfrost/midgard/midgard.h b/src/panfrost/midgard/midgard.h
index e9559659c1c..1d873de5325 100644
--- a/src/panfrost/midgard/midgard.h
+++ b/src/panfrost/midgard/midgard.h
@@ -664,31 +664,37 @@ typedef enum {
 typedef enum {
         midgard_varying_mod_none = 0,
 
-        /* Other values unknown */
-
-        /* Take the would-be result and divide all components by its z/w
+        /* Take the would-be result and divide all components by its y/z/w
          * (perspective division baked in with the load)  */
+        midgard_varying_mod_perspective_y = 1,
         midgard_varying_mod_perspective_z = 2,
         midgard_varying_mod_perspective_w = 3,
+
+        /* The result is a 64-bit cubemap descriptor to use with
+         * midgard_tex_op_normal or midgard_tex_op_gradient */
+        midgard_varying_mod_cubemap = 4,
 } midgard_varying_modifier;
 
 typedef struct
 __attribute__((__packed__))
 {
-        unsigned zero0 : 1; /* Always zero */
+        midgard_varying_modifier modifier : 3;
 
-        midgard_varying_modifier modifier : 2;
+        bool flat_shading : 1;
 
-        unsigned zero1: 1; /* Always zero */
+        /* These are ignored if flat_shading is enabled. */
+        bool perspective_correction : 1;
+        bool centroid_mapping : 1;
 
-        /* Varying qualifiers, zero if not a varying */
-        unsigned flat    : 1;
-        unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
-        midgard_interpolation interpolation : 2;
+        /* This is ignored if the shader only runs once per pixel. */
+        bool interpolate_sample : 1;
 
-        unsigned zero2 : 2; /* Always zero */
+        bool zero0 : 1; /* Always zero */
+
+        unsigned direct_sample_pos_x : 4;
+        unsigned direct_sample_pos_y : 4;
 }
-midgard_varying_parameter;
+midgard_varying_params;
 
 /* 8-bit register/etc selector for load/store ops */
 typedef struct
@@ -711,26 +717,56 @@ __attribute__((__packed__))
 }
 midgard_ldst_register_select;
 
+typedef enum {
+        /* 0 is reserved */
+        midgard_index_address_u64 = 1,
+        midgard_index_address_u32 = 2,
+        midgard_index_address_s32 = 3,
+} midgard_index_address_format;
+
 typedef struct
 __attribute__((__packed__))
 {
         midgard_load_store_op op : 8;
-        unsigned reg     : 5;
-        unsigned mask    : 4;
+
+        /* Source/dest reg */
+        unsigned reg  : 5;
+
+        /* Generally is a writemask.
+         * For ST_ATTR and ST_TEX, unused.
+         * For other stores, each bit masks 1/4th of the output. */
+        unsigned mask : 4;
+
+        /* Swizzle for stores, but for atomics it encodes also the source
+         * register. This fits because atomics dont need a swizzle since they
+         * are not vectorized instructions. */
         unsigned swizzle : 8;
 
-        /* Load/store ops can take two additional registers as arguments, but
-         * these are limited to load/store registers with only a few supported
-         * mask/swizzle combinations. The tradeoff is these are much more
-         * compact, requiring 8-bits each rather than 17-bits for a full
-         * reg/mask/swizzle. Usually (?) encoded as
-         * midgard_ldst_register_select. */
-        unsigned arg_1   : 8;
-        unsigned arg_2   : 8;
+        /* Arg reg, meaning changes according to each opcode */
+        unsigned arg_comp : 2;
+        unsigned arg_reg  : 3;
 
-        unsigned varying_parameters : 10;
+        /* 64-bit address enable
+         * 32-bit data type enable for CUBEMAP and perspective div.
+         * Explicit indexing enable for LD_ATTR.
+         * 64-bit coordinate enable for LD_IMAGE. */
+        bool bitsize_toggle : 1;
 
-        unsigned address : 9;
+        /* These are mainly used for opcodes that have addresses.
+         * For cmpxchg, index_reg is used for the comparison value.
+         * For ops that access the attrib table, bit 1 encodes which table.
+         * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
+        midgard_index_address_format index_format : 2;
+        unsigned index_comp  : 2;
+        unsigned index_reg   : 3;
+        unsigned index_shift : 4;
+
+        /* Generaly is a signed offset, but has different bitsize and starts at
+         * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
+         * are recommended when packing/unpacking this attribute.
+         * For LD_UBO, bit 0 enables ubo index immediate.
+         * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
+        int signed_offset : 18;
 }
 midgard_load_store_word;
 
diff --git a/src/panfrost/midgard/midgard_address.c b/src/panfrost/midgard/midgard_address.c
index c55e0fbe2c5..beb661ec50c 100644
--- a/src/panfrost/midgard/midgard_address.c
+++ b/src/panfrost/midgard/midgard_address.c
@@ -36,17 +36,11 @@
  * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
  */
 
-enum index_type {
-        ITYPE_U64 = 1 << 6,
-        ITYPE_U32 = 2 << 6, // zero-extend
-        ITYPE_I32 = 3 << 6, // sign-extend
-};
-
 struct mir_address {
         nir_ssa_scalar A;
         nir_ssa_scalar B;
 
-        enum index_type type;
+        midgard_index_address_format type;
         unsigned shift;
         unsigned bias;
 };
@@ -136,7 +130,7 @@ mir_match_u2u64(struct mir_address *address)
         nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
 
         address->B = arg;
-        address->type = ITYPE_U32;
+        address->type = midgard_index_address_u32;
 }
 
 /* Matches i2i64 and sets type */
@@ -155,7 +149,7 @@ mir_match_i2i64(struct mir_address *address)
         nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
 
         address->B = arg;
-        address->type = ITYPE_I32;
+        address->type = midgard_index_address_s32;
 }
 
 /* Matches ishl to shift */
@@ -210,7 +204,7 @@ mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)
 {
         struct mir_address address = {
                 .B = { .def = offset },
-                .type = extend ? ITYPE_U64 : ITYPE_U32,
+                .type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
         };
 
         mir_match_mov(&address);
@@ -243,14 +237,16 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
         bool force_sext = (nir_src_bit_size(*offset) < 64);
 
         if (!offset->is_ssa) {
-                ins->load_store.arg_1 |= seg;
+                ins->load_store.bitsize_toggle = true;
+                ins->load_store.arg_comp = seg & 0x3;
+                ins->load_store.arg_reg = (seg >> 2) & 0x7;
                 ins->src[2] = nir_src_index(ctx, offset);
                 ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
 
                 if (force_sext)
-                        ins->load_store.arg_1 |= ITYPE_I32;
+                        ins->load_store.index_format = midgard_index_address_s32;
                 else
-                        ins->load_store.arg_1 |= ITYPE_U64;
+                        ins->load_store.index_format = midgard_index_address_u64;
 
                 return;
         }
@@ -263,23 +259,26 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
                 ins->src[1] = nir_ssa_index(match.A.def);
                 ins->swizzle[1][0] = match.A.comp;
                 ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
-        } else
-                ins->load_store.arg_1 |= seg;
+        } else {
+                ins->load_store.bitsize_toggle = true;
+                ins->load_store.arg_comp = seg & 0x3;
+                ins->load_store.arg_reg = (seg >> 2) & 0x7;
+        }
 
         if (match.B.def) {
                 ins->src[2] = nir_ssa_index(match.B.def);
                 ins->swizzle[2][0] = match.B.comp;
                 ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
         } else
-                ins->load_store.arg_2 = 0x1E;
+                ins->load_store.index_reg = REGISTER_LDST_ZERO;
 
         if (force_sext)
-                match.type = ITYPE_I32;
+                match.type = midgard_index_address_s32;
 
-        ins->load_store.arg_1 |= match.type;
+        ins->load_store.index_format = match.type;
 
         assert(match.shift <= 7);
-        ins->load_store.arg_2 |= (match.shift) << 5;
+        ins->load_store.index_shift = match.shift;
 
         ins->constants.u32[0] = match.bias;
 }
@@ -298,6 +297,6 @@ mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)
                         ins->swizzle[2][i] = match.B.comp;
         }
 
-        ins->load_store.arg_2 |= (match.shift) << 5;
+        ins->load_store.index_shift = match.shift;
         ins->constants.u32[0] = match.bias + bias;
 }
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 9f945d0dc93..1e17f6ceef5 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -112,7 +112,7 @@ schedule_barrier(compiler_context *ctx)
                         .swizzle = SWIZZLE_IDENTITY_4, \
                         .op = midgard_op_##name, \
 			.load_store = { \
-				.address = address \
+				.signed_offset = address \
 			} \
 		}; \
                 \
@@ -1164,20 +1164,20 @@ emit_ubo_read(
         if (indirect_offset) {
                 ins.src[2] = nir_src_index(ctx, indirect_offset);
                 ins.src_types[2] = nir_type_uint32;
-                ins.load_store.arg_2 = (indirect_shift << 5);
+                ins.load_store.index_shift = indirect_shift;
 
                 /* X component for the whole swizzle to prevent register
                  * pressure from ballooning from the extra components */
                 for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
                         ins.swizzle[2][i] = 0;
         } else {
-                ins.load_store.arg_2 = 0x1E;
+                ins.load_store.index_reg = REGISTER_LDST_ZERO;
         }
 
         if (indirect_offset && indirect_offset->is_ssa && !indirect_shift)
                 mir_set_ubo_offset(&ins, indirect_offset, offset);
 
-        ins.load_store.arg_1 = index;
+        midgard_pack_ubo_index_imm(&ins.load_store, index);
 
         return emit_mir_instruction(ctx, ins);
 }
@@ -1274,12 +1274,6 @@ emit_atomic(
         nir_src *src_offset = nir_get_io_offset_src(instr);
 
         if (op == midgard_op_atomic_cmpxchg) {
-                for(unsigned i = 0; i < 2; ++i)
-                        ins.swizzle[1][i] = i;
-
-                ins.src[1] = is_image ? image_direct_address : nir_src_index(ctx, src_offset);
-                ins.src_types[1] = nir_type_uint64;
-
                 unsigned xchg_val_src = is_image ? 4 : 2;
                 unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]);
                 emit_explicit_constant(ctx, xchg_val, xchg_val);
@@ -1288,8 +1282,18 @@ emit_atomic(
                 ins.src_types[2] = type | bitsize;
                 ins.src[3] = xchg_val;
 
-                if (is_shared)
-                        ins.load_store.arg_1 |= 0x6E;
+                if (is_shared) {
+                        ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR;
+                        ins.load_store.arg_comp = COMPONENT_Z;
+                        ins.load_store.bitsize_toggle = true;
+                } else {
+                        for(unsigned i = 0; i < 2; ++i)
+                                ins.swizzle[1][i] = i;
+
+                        ins.src[1] = is_image ? image_direct_address :
+                                                nir_src_index(ctx, src_offset);
+                        ins.src_types[1] = nir_type_uint64;
+                }
         } else if (is_image) {
                 for(unsigned i = 0; i < 2; ++i)
                         ins.swizzle[2][i] = i;
@@ -1297,7 +1301,9 @@ emit_atomic(
                 ins.src[2] = image_direct_address;
                 ins.src_types[2] = nir_type_uint64;
 
-                ins.load_store.arg_1 |= 0x7E;
+                ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+                ins.load_store.bitsize_toggle = true;
+                ins.load_store.index_format = midgard_index_address_u64;
         } else
                 mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL);
 
@@ -1316,7 +1322,7 @@ emit_varying_read(
         /* XXX: Half-floats? */
         /* TODO: swizzle, mask */
 
-        midgard_instruction ins = m_ld_vary_32(dest, offset);
+        midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
         ins.mask = mask_of(nr_comp);
         ins.dest_type = type;
 
@@ -1328,23 +1334,22 @@ emit_varying_read(
         for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
                 ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
 
-        midgard_varying_parameter p = {
-                .is_varying = 1,
-                .interpolation = midgard_interp_default,
-                .flat = flat,
-        };
 
-        unsigned u;
-        memcpy(&u, &p, sizeof(p));
-        ins.load_store.varying_parameters = u;
+        midgard_varying_params p = {
+                .flat_shading = flat,
+                .perspective_correction = 1,
+                .interpolate_sample = true,
+        };
+        midgard_pack_varying_params(&ins.load_store, p);
 
         if (indirect_offset) {
                 ins.src[2] = nir_src_index(ctx, indirect_offset);
                 ins.src_types[2] = nir_type_uint32;
         } else
-                ins.load_store.arg_2 = 0x1E;
+                ins.load_store.index_reg = REGISTER_LDST_ZERO;
 
-        ins.load_store.arg_1 = 0x9E;
+        ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+        ins.load_store.index_format = midgard_index_address_u32;
 
         /* Use the type appropriate load */
         switch (type) {
@@ -1402,16 +1407,16 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
                 emit_explicit_constant(ctx, val, val);
 
                 nir_alu_type type = nir_intrinsic_src_type(instr);
-                ins = st_image(type, val, address);
+                ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address));
                 nir_alu_type base_type = nir_alu_type_get_base_type(type);
                 ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
         } else if (is_atomic) { /* emit lea_image */
                 unsigned dest = make_compiler_temp_reg(ctx);
-                ins = m_lea_image(dest, address);
+                ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address));
                 ins.mask = mask_of(2); /* 64-bit memory address */
         } else { /* emit ld_image_* */
                 nir_alu_type type = nir_intrinsic_dest_type(instr);
-                ins = ld_image(type, nir_dest_index(&instr->dest), address);
+                ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address));
                 ins.mask = mask_of(nir_intrinsic_dest_components(instr));
                 ins.dest_type = type;
         }
@@ -1420,7 +1425,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
         ins.src[1] = coord_reg;
         ins.src_types[1] = nir_type_uint16;
         if (nr_dim == 3 || is_array) {
-                ins.load_store.arg_1 |= 0x20;
+                ins.load_store.bitsize_toggle = true;
         }
 
         /* Image index reg */
@@ -1428,7 +1433,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
                 ins.src[2] = nir_src_index(ctx, index);
                 ins.src_types[2] = nir_type_uint32;
         } else
-                ins.load_store.arg_2 = 0x1E;
+                ins.load_store.index_reg = REGISTER_LDST_ZERO;
 
         emit_mir_instruction(ctx, ins);
 
@@ -1441,9 +1446,9 @@ emit_attr_read(
         unsigned dest, unsigned offset,
         unsigned nr_comp, nir_alu_type t)
 {
-        midgard_instruction ins = m_ld_attr_32(dest, offset);
-        ins.load_store.arg_1 = 0x1E;
-        ins.load_store.arg_2 = 0x1E;
+        midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset));
+        ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+        ins.load_store.index_reg = REGISTER_LDST_ZERO;
         ins.mask = mask_of(nr_comp);
 
         /* Use the type appropriate load */
@@ -1493,12 +1498,12 @@ compute_builtin_arg(nir_op op)
 {
         switch (op) {
         case nir_intrinsic_load_work_group_id:
-                return 0x14;
+                return REGISTER_LDST_GROUP_ID;
         case nir_intrinsic_load_local_invocation_id:
-                return 0x10;
+                return REGISTER_LDST_LOCAL_THREAD_ID;
         case nir_intrinsic_load_global_invocation_id:
         case nir_intrinsic_load_global_invocation_id_zero_base:
-                return 0x18;
+                return REGISTER_LDST_GLOBAL_THREAD_ID;
         default:
                 unreachable("Invalid compute paramater loaded");
         }
@@ -1567,7 +1572,7 @@ emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
         midgard_instruction ins = m_ldst_mov(reg, 0);
         ins.mask = mask_of(3);
         ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
-        ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic);
+        ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic);
         emit_mir_instruction(ctx, ins);
 }
 
@@ -1598,8 +1603,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx)
 
         midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
         ld.op = midgard_op_ld_special_32u;
-        ld.load_store.address = idx;
-        ld.load_store.arg_2 = 0x1E;
+        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx);
+        ld.load_store.index_reg = REGISTER_LDST_ZERO;
 
         for (int i = 0; i < 4; ++i)
                 ld.swizzle[0][i] = COMPONENT_X;
@@ -1790,20 +1795,25 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
 
                 midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
 
-                ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
+                unsigned target = output_load_rt_addr(ctx, instr);
+                ld.load_store.index_comp = target & 0x3;
+                ld.load_store.index_reg = target >> 2;
 
                 if (nir_src_is_const(instr->src[0])) {
-                        ld.load_store.arg_1 = nir_src_as_uint(instr->src[0]);
+                        unsigned sample = nir_src_as_uint(instr->src[0]);
+                        ld.load_store.arg_comp = sample & 0x3;
+                        ld.load_store.arg_reg = sample >> 2;
                 } else {
-                        ld.load_store.varying_parameters = 2;
+                        /* Enable sample index via register. */
+                        ld.load_store.signed_offset |= 1;
                         ld.src[1] = nir_src_index(ctx, &instr->src[0]);
                         ld.src_types[1] = nir_type_int32;
                 }
 
                 if (ctx->quirks & MIDGARD_OLD_BLEND) {
                         ld.op = midgard_op_ld_special_32u;
-                        ld.load_store.address = 16;
-                        ld.load_store.arg_2 = 0x1E;
+                        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16);
+                        ld.load_store.index_reg = REGISTER_LDST_ZERO;
                 }
 
                 emit_mir_instruction(ctx, ld);
@@ -1821,7 +1831,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 else
                         ld = m_ld_tilebuffer_32f(reg, 0);
 
-                ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
+                unsigned index = output_load_rt_addr(ctx, instr);
+                ld.load_store.index_comp = index & 0x3;
+                ld.load_store.index_reg = index >> 2;
 
                 for (unsigned c = 4; c < 16; ++c)
                         ld.swizzle[0][c] = 0;
@@ -1831,8 +1843,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                 ld.op = midgard_op_ld_special_16f;
                         else
                                 ld.op = midgard_op_ld_special_32f;
-                        ld.load_store.address = 1;
-                        ld.load_store.arg_2 = 0x1E;
+                        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1);
+                        ld.load_store.index_reg = REGISTER_LDST_ZERO;
                 }
 
                 emit_mir_instruction(ctx, ld);
@@ -1924,9 +1936,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                         unsigned dst_component = nir_intrinsic_component(instr);
                         unsigned nr_comp = nir_src_num_components(instr->src[0]);
 
-                        midgard_instruction st = m_st_vary_32(reg, offset);
-                        st.load_store.arg_1 = 0x9E;
-                        st.load_store.arg_2 = 0x1E;
+                        midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
+                        st.load_store.arg_reg = REGISTER_LDST_ZERO;
+                        st.load_store.index_format = midgard_index_address_u32;
+                        st.load_store.index_reg = REGISTER_LDST_ZERO;
 
                         switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) {
                         case nir_type_uint:
@@ -2211,7 +2224,7 @@ set_tex_coord(compiler_context *ctx, nir_tex_instr *instr,
                 ld.src[1] = coords;
                 ld.src_types[1] = ins->src_types[1];
                 ld.mask = 0x3; /* xy */
-                ld.load_store.arg_1 = 0x20;
+                ld.load_store.bitsize_toggle = true;
                 ld.swizzle[1][3] = COMPONENT_X;
                 emit_mir_instruction(ctx, ld);
 
diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c
index 47bd57e1018..057a1f49dca 100644
--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@@ -44,6 +44,47 @@ mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
                 return midgard_int_zero_extend;
 }
 
+void
+midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
+{
+        word->arg_comp = index & 0x3;
+        word->arg_reg = (index >> 2) & 0x7;
+        word->bitsize_toggle = (index >> 5) & 0x1;
+        word->index_format = (index >> 6) & 0x3;
+}
+
+unsigned
+midgard_unpack_ubo_index_imm(midgard_load_store_word word)
+{
+        unsigned ubo = word.arg_comp |
+                       (word.arg_reg << 2)  |
+                       (word.bitsize_toggle << 5) |
+                       (word.index_format << 6);
+
+        return ubo;
+}
+
+void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
+{
+        /* Currently these parameters are not supported. */
+        assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
+
+        unsigned u;
+        memcpy(&u, &p, sizeof(p));
+
+        word->signed_offset |= u & 0x1FF;
+}
+
+midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
+{
+        unsigned params = word.signed_offset & 0x1FF;
+
+        midgard_varying_params p;
+        memcpy(&p, &params, sizeof(p));
+
+        return p;
+}
+
 unsigned
 mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
 {
@@ -578,15 +619,15 @@ load_store_from_instr(midgard_instruction *ins)
         }
 
         if (ins->src[1] != ~0) {
-                unsigned src = SSA_REG_FROM_FIXED(ins->src[1]);
+                ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
-                ldst.arg_1 |= midgard_ldst_reg(src, ins->swizzle[1][0], sz);
+                ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
         }
 
         if (ins->src[2] != ~0) {
-                unsigned src = SSA_REG_FROM_FIXED(ins->src[2]);
+                ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
-                ldst.arg_2 |= midgard_ldst_reg(src, ins->swizzle[2][0], sz);
+                ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
         }
 
         return ldst;
@@ -876,13 +917,22 @@ emit_alu_bundle(compiler_context *ctx,
  * over some other semantic distinction else well, but it unifies things in the
  * compiler so I don't mind. */
 
-static unsigned
-mir_ldst_imm_shift(midgard_load_store_op op)
+static void
+mir_ldst_pack_offset(midgard_instruction *ins, int offset)
 {
-        if (OP_IS_UBO_READ(op))
-                return 3;
+        /* These opcodes don't support offsets */
+        assert(!OP_IS_REG2REG_LDST(ins->op) ||
+               ins->op == midgard_op_lea    ||
+               ins->op == midgard_op_lea_image);
+
+        if (OP_IS_UBO_READ(ins->op))
+                ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
+        else if (OP_IS_IMAGE(ins->op))
+                ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
+        else if (OP_IS_SPECIAL(ins->op))
+                ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
         else
-                return 1;
+                ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
 }
 
 static enum mali_sampler_type
@@ -931,22 +981,17 @@ emit_binary_bundle(compiler_context *ctx,
                 /* Copy masks */
 
                 for (unsigned i = 0; i < bundle->instruction_count; ++i) {
-                        mir_pack_ldst_mask(bundle->instructions[i]);
+                        midgard_instruction *ins = bundle->instructions[i];
+                        mir_pack_ldst_mask(ins);
 
                         /* Atomic ops don't use this swizzle the same way as other ops */
-                        if (!OP_IS_ATOMIC(bundle->instructions[i]->op))
-                                mir_pack_swizzle_ldst(bundle->instructions[i]);
+                        if (!OP_IS_ATOMIC(ins->op))
+                                mir_pack_swizzle_ldst(ins);
 
                         /* Apply a constant offset */
-                        unsigned offset = bundle->instructions[i]->constants.u32[0];
-
-                        if (offset) {
-                                unsigned shift = mir_ldst_imm_shift(bundle->instructions[i]->op);
-                                unsigned upper_shift = 10 - shift;
-
-                                bundle->instructions[i]->load_store.varying_parameters |= (offset & ((1 << upper_shift) - 1)) << shift;
-                                bundle->instructions[i]->load_store.address |= (offset >> upper_shift);
-                        }
+                        unsigned offset = ins->constants.u32[0];
+                        if (offset)
+                                mir_ldst_pack_offset(ins, offset);
                 }
 
                 midgard_load_store_word ldst0 =
diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c
index 97e9d338519..9a034ac7fbd 100644
--- a/src/panfrost/midgard/midgard_opt_perspective.c
+++ b/src/panfrost/midgard/midgard_opt_perspective.c
@@ -125,7 +125,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
                                 midgard_op_ldst_perspective_div_w :
                                 midgard_op_ldst_perspective_div_z,
                         .load_store = {
-                                .arg_1 = 0x20
+                                .bitsize_toggle = true,
                         }
                 };
 
@@ -167,9 +167,8 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
                         /* We found it, so rewrite it to project. Grab the
                          * modifier */
 
-                        unsigned param = v->load_store.varying_parameters;
-                        midgard_varying_parameter p;
-                        memcpy(&p, &param, sizeof(p));
+                        midgard_varying_params p =
+                                midgard_unpack_varying_params(v->load_store);
 
                         if (p.modifier != midgard_varying_mod_none)
                                 break;
@@ -181,9 +180,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
                                 midgard_varying_mod_perspective_w :
                                 midgard_varying_mod_perspective_z;
 
-                        /* Aliasing rules are annoying */
-                        memcpy(&param, &p, sizeof(p));
-                        v->load_store.varying_parameters = param;
+                        midgard_pack_varying_params(&v->load_store, p);
 
                         /* Use the new destination */
                         v->dest = to;
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index f28e2a6919d..9a537270bbe 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -988,12 +988,14 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                                         .swizzle = SWIZZLE_IDENTITY_4,
                                         .op = midgard_op_ld_ubo_128,
                                         .load_store = {
-                                                .arg_1 = ctx->info->push.words[idx].ubo,
-                                                .arg_2 = 0x1E,
+                                                .index_reg = REGISTER_LDST_ZERO,
                                         },
                                         .constants.u32[0] = ctx->info->push.words[idx].offset
                                 };
 
+                                midgard_pack_ubo_index_imm(&ld.load_store,
+                                                           ctx->info->push.words[idx].ubo);
+
                                 mir_insert_instruction_before_scheduled(ctx, block, before, ld);
 
                                 mir_rewrite_index_src_single(ins, ins->src[i], temp);
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index fb7c4283637..17ad243d68c 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -140,14 +140,17 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                 if (instructions[i]->type == TAG_LOAD_STORE_4 &&
                     load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) {
 
-                        unsigned type;
-                        switch (instructions[i]->load_store.arg_1 & 0x3E) {
-                        case LDST_SHARED: type = 0; break;
-                        case LDST_SCRATCH: type = 1; break;
-                        default: type = 2; break;
+                        unsigned type = instructions[i]->load_store.arg_reg |
+                                        instructions[i]->load_store.arg_comp;
+
+                        unsigned idx;
+                        switch (type) {
+                        case LDST_SHARED: idx = 0; break;
+                        case LDST_SCRATCH: idx = 1; break;
+                        default: idx = 2; break;
                         }
 
-                        unsigned prev = prev_ldst[type];
+                        unsigned prev = prev_ldst[idx];
 
                         if (prev != ~0) {
                                 BITSET_WORD *dependents = instructions[prev]->dependents;
@@ -160,7 +163,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                                 instructions[i]->nr_dependencies++;
                         }
 
-                        prev_ldst[type] = i;
+                        prev_ldst[idx] = i;
                 }
 
                 if (dest < node_count) {
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
index 6a02eaadd15..5a8dbafbd8f 100644
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -73,7 +73,7 @@ mir_analyze_ranges(compiler_context *ctx)
         mir_foreach_instr_global(ctx, ins) {
                 if (!mir_is_direct_aligned_ubo(ins)) continue;
 
-                unsigned ubo = ins->load_store.arg_1;
+                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
                 unsigned offset = ins->constants.u32[0] / 16;
 
                 assert(ubo < res.nr_blocks);
@@ -276,7 +276,7 @@ midgard_promote_uniforms(compiler_context *ctx)
         mir_foreach_instr_global_safe(ctx, ins) {
                 if (!mir_is_direct_aligned_ubo(ins)) continue;
 
-                unsigned ubo = ins->load_store.arg_1;
+                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
                 unsigned qword = ins->constants.u32[0] / 16;
 
                 /* Check if we decided to push this */