pan/mdg: properly encode/decode ldst instructions

Signed-off-by: Italo Nicola <italonicola@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9461>
2026-01-03 20:10:17 +01:00 · 2021-04-16 10:23:48 +00:00 · 2021-04-16 10:23:48 +00:00 · 6a12ea02fe
commit 6a12ea02fe
parent 4b933260d9
11 changed files with 465 additions and 247 deletions
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@ -516,9 +516,9 @@ void mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block
 void mir_flip(midgard_instruction *ins);
 void mir_compute_temp_count(compiler_context *ctx);

-#define LDST_GLOBAL 0x3E
-#define LDST_SHARED 0x2E
-#define LDST_SCRATCH 0x2A
+#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2)
+#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
+#define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z)

 void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg);
 void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
@ -573,8 +573,11 @@ v_load_store_scratch(
                .op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
                .load_store = {
                        /* For register spilling - to thread local storage */
-                        .arg_1 = 0xEA,
-                        .arg_2 = 0x1E,
+                        .arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
+                        .arg_comp = COMPONENT_Z,
+                        .bitsize_toggle = true,
+                        .index_format = midgard_index_address_u32,
+                        .index_reg = REGISTER_LDST_ZERO,
                },

                /* If we spill an unspill, RA goes into an infinite loop */
--- a/src/panfrost/midgard/disassemble.c
+++ b/src/panfrost/midgard/disassemble.c
@ -304,6 +304,13 @@ static char *argmod_names[3] = {
        ".x2",
 };

+static char *index_format_names[4] = {
+        "",
+        ".u64",
+        ".u32",
+        ".s32"
+};
+
 static void
 print_outmod(FILE *fp, unsigned outmod, bool is_int)
 {
@ -760,6 +767,21 @@ print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink
                fprintf(fp, " /* %X */", mask);
 }

+/* TODO: 16-bit mode */
+static void
+print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) {
+        fprintf(fp, ".");
+
+        for (unsigned i = 0; i < 4; ++i) {
+                bool write = (mask & (1 << i)) != 0;
+                unsigned c = (swizzle >> (i * 2)) & 3;
+                /* We can't omit the swizzle here since many ldst ops have a
+                 * combined swizzle/writemask, and it would be ambiguous to not
+                 * print the masked-out components. */
+                fprintf(fp, "%c", write ? components[c] : '~');
+        }
+}
+
 /* Prints the 4-bit masks found in texture and load/store ops, as opposed to
 * the 8-bit masks found in (vector) ALU ops. Supports texture-style 16-bit
 * mode as well, but not load/store-style 16-bit mode. */
@ -1235,41 +1257,40 @@ print_alu_word(FILE *fp, uint32_t *words, unsigned num_quad_words,
        return branch_forward;
 }

-static void
+/* TODO: how can we use this now that we know that these params can't be known
+ * before run time in every single case? Maybe just use it in the cases we can? */
+UNUSED static void
 print_varying_parameters(FILE *fp, midgard_load_store_word *word)
 {
-        midgard_varying_parameter param;
-        unsigned v = word->varying_parameters;
-        memcpy(&param, &v, sizeof(param));
+        midgard_varying_params p = midgard_unpack_varying_params(*word);

-        if (param.is_varying) {
-                /* If a varying, there are qualifiers */
-                if (param.flat)
-                        fprintf(fp, ".flat");
+        /* If a varying, there are qualifiers */
+        if (p.flat_shading)
+                fprintf(fp, ".flat");

-                if (param.interpolation != midgard_interp_default) {
-                        if (param.interpolation == midgard_interp_centroid)
-                                fprintf(fp, ".centroid");
-                        else if (param.interpolation == midgard_interp_sample)
-                                fprintf(fp, ".sample");
-                        else
-                                fprintf(fp, ".interp%d", param.interpolation);
-                }
+        if (p.perspective_correction)
+                fprintf(fp, ".correction");

-                if (param.modifier != midgard_varying_mod_none) {
-                        if (param.modifier == midgard_varying_mod_perspective_w)
-                                fprintf(fp, ".perspectivew");
-                        else if (param.modifier == midgard_varying_mod_perspective_z)
-                                fprintf(fp, ".perspectivez");
-                        else
-                                fprintf(fp, ".mod%d", param.modifier);
-                }
-        } else if (param.flat || param.interpolation || param.modifier) {
-                fprintf(fp, " /* is_varying not set but varying metadata attached */");
+        if (p.centroid_mapping)
+                fprintf(fp, ".centroid");
+
+        if (p.interpolate_sample)
+                fprintf(fp, ".sample");
+
+        switch (p.modifier) {
+                case midgard_varying_mod_perspective_y:
+                        fprintf(fp, ".perspectivey");
+                        break;
+                case midgard_varying_mod_perspective_z:
+                        fprintf(fp, ".perspectivez");
+                        break;
+                case midgard_varying_mod_perspective_w:
+                        fprintf(fp, ".perspectivew");
+                        break;
+                default:
+                        unreachable("invalid varying modifier");
+                        break;
        }
-
-        if (param.zero0 || param.zero1 || param.zero2)
-                fprintf(fp, " /* zero tripped, %u %u %u */ ", param.zero0, param.zero1, param.zero2);
 }

 static bool
@ -1304,33 +1325,14 @@ is_op_attribute(unsigned op)
        return false;
 }

+/* Helper to print integer well-formatted, but only when non-zero. */
 static void
-print_load_store_arg(FILE *fp, uint8_t arg, unsigned index)
+midgard_print_sint(FILE *fp, int n)
 {
-        /* Try to interpret as a register */
-        midgard_ldst_register_select sel;
-        memcpy(&sel, &arg, sizeof(arg));
-
-        /* If unknown is set, we're not sure what this is or how to
-         * interpret it. But if it's zero, we get it. */
-
-        if (sel.unknown) {
-                fprintf(fp, "0x%02X", arg);
-                return;
-        }
-
-        print_ldst_read_reg(fp, sel.select);
-        fprintf(fp, ".%c", components[sel.component]);
-
-        /* Only print a shift if it's non-zero. Shifts only make sense for the
-         * second index. For the first, we're not sure what it means yet */
-
-        if (index == 1) {
-                if (sel.shift)
-                        fprintf(fp, " << %u", sel.shift);
-        } else {
-                fprintf(fp, " /* %X */", sel.shift);
-        }
+        if (n > 0)
+                fprintf(fp, " + 0x%X", n);
+        else if (n < 0)
+                fprintf(fp, " - 0x%X", -n);
 }

 static void
@ -1347,63 +1349,160 @@ print_load_store_instr(FILE *fp, uint64_t data)

        print_ld_st_opcode(fp, word->op);

-        unsigned address = word->address;
+        if (word->op == midgard_op_trap) {
+                fprintf(fp, " 0x%X\n", word->signed_offset);
+                return;
+        }
+
+        /* Print opcode modifiers */
+
+        if (OP_USES_ATTRIB(word->op)) /* which attrib table? */
+                fprintf(fp, ".%s", (word->index_format >> 1) ? "secondary" : "primary");
+        else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op))
+                fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");
+
+        fprintf(fp, " ");
+
+        /* src/dest register */
+
+        if (!OP_IS_STORE(word->op)) {
+                print_ldst_write_reg(fp, word->reg);
+
+                /* Some opcodes don't have a swizzable src register, and
+                 * instead the swizzle is applied before the result is written
+                 * to the dest reg. For these ops, we combine the writemask
+                 * with the swizzle to display them in the disasm compactly. */
+                unsigned swizzle = word->swizzle;
+                if ((OP_IS_REG2REG_LDST(word->op) &&
+                        word->op != midgard_op_lea &&
+                        word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op))
+                        swizzle = 0xE4;
+                print_ldst_mask(fp, word->mask, swizzle);
+        } else {
+                print_ldst_read_reg(fp, word->reg);
+                print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
+                                  midgard_reg_mode_32, 0xFF);
+        }
+
+        /* ld_ubo args */
+        if (OP_IS_UBO_READ(word->op)) {
+                if (word->signed_offset & 1) { /* buffer index imm */
+                        unsigned imm = midgard_unpack_ubo_index_imm(*word);
+                        fprintf(fp, ", %u", imm);
+                } else { /* buffer index from reg */
+                        fprintf(fp, ", ");
+                        print_ldst_read_reg(fp, word->arg_reg);
+                        fprintf(fp, ".%c", components[word->arg_comp]);
+                }
+
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->index_reg);
+                fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
+                midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));
+        }
+
+        /* mem addr expression */
+        if (OP_HAS_ADDRESS(word->op)) {
+                fprintf(fp, ", [ ");
+                print_ldst_read_reg(fp, word->arg_reg);
+                fprintf(fp, ".u%d.%c",
+                        word->bitsize_toggle ? 64 : 32, components[word->arg_comp]);
+
+                if ((word->op < midgard_op_atomic_cmpxchg ||
+                     word->op > midgard_op_atomic_cmpxchg64_be) &&
+                     word->index_reg != 0x7) {
+                        fprintf(fp, " + (");
+                        print_ldst_read_reg(fp, word->index_reg);
+                        fprintf(fp, "%s.%c << %u)",
+                                index_format_names[word->index_format],
+                                components[word->index_comp], word->index_shift);
+                }
+
+                midgard_print_sint(fp, word->signed_offset);
+
+                fprintf(fp, " ]");
+        }
+
+        /* src reg for reg2reg ldst opcodes */
+        if (OP_IS_REG2REG_LDST(word->op)) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->arg_reg);
+                print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
+                                  midgard_reg_mode_32, 0xFF);
+        }
+
+        /* atomic ops encode the source arg where the ldst swizzle would be. */
+        if (OP_IS_ATOMIC(word->op)) {
+                unsigned src = (word->swizzle >> 2) & 0x7;
+                unsigned src_comp = word->swizzle & 0x3;
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, src);
+                fprintf(fp, ".%c", components[src_comp]);
+        }
+
+        /* CMPXCHG encodes the extra comparison arg where the index reg would be. */
+        if (word->op >= midgard_op_atomic_cmpxchg &&
+            word->op <= midgard_op_atomic_cmpxchg64_be) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->index_reg);
+                fprintf(fp, ".%c", components[word->index_comp]);
+        }
+
+        /* index reg for attr/vary/images, selector for ld/st_special */
+        if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->index_reg);
+                fprintf(fp, ".%c << %u", components[word->index_comp], word->index_shift);
+                midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
+        }
+
+        /* vertex reg for attrib/varying ops, coord reg for image ops */
+        if (OP_USES_ATTRIB(word->op)) {
+                fprintf(fp, ", ");
+                print_ldst_read_reg(fp, word->arg_reg);
+
+                if (OP_IS_IMAGE(word->op))
+                        fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);
+
+                fprintf(fp, ".%c", components[word->arg_comp]);
+
+                if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))
+                        midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));
+        }
+
+        /* TODO: properly decode format specifier for PACK/UNPACK ops */
+        if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {
+                fprintf(fp, ", ");
+                unsigned format_specifier = (word->signed_offset << 4) | word->index_shift;
+                fprintf(fp, "0x%X", format_specifier);
+        }
+
+        fprintf(fp, "\n");
+
+        /* Debugging stuff */

        if (is_op_varying(word->op)) {
-                print_varying_parameters(fp, word);
+                /* Do some analysis: check if direct access */

-                /* Do some analysis: check if direct cacess */
-
-                if ((word->arg_2 == 0x1E) && midg_stats.varying_count >= 0)
-                        update_stats(&midg_stats.varying_count, address);
+                if (word->index_reg == 0x7 && midg_stats.varying_count >= 0)
+                        update_stats(&midg_stats.varying_count,
+                                     UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
                else
                        midg_stats.varying_count = -16;
        } else if (is_op_attribute(word->op)) {
-                if ((word->arg_2 == 0x1E) && midg_stats.attribute_count >= 0)
-                        update_stats(&midg_stats.attribute_count, address);
+                if (word->index_reg == 0x7 && midg_stats.attribute_count >= 0)
+                        update_stats(&midg_stats.attribute_count,
+                                     UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
                else
                        midg_stats.attribute_count = -16;
        }

-        fprintf(fp, " ");
-
-        if (!OP_IS_STORE(word->op))
-                print_ldst_write_reg(fp, word->reg);
-        else
-                print_ldst_read_reg(fp, word->reg);
-
-        print_mask_4(fp, word->mask, false);
-
        if (!OP_IS_STORE(word->op))
                update_dest(word->reg);

-        bool is_ubo = OP_IS_UBO_READ(word->op);
-
-        if (is_ubo) {
-                /* UBOs use their own addressing scheme */
-
-                int lo = word->varying_parameters >> 7;
-                int hi = word->address;
-
-                /* TODO: Combine fields logically */
-                address = (hi << 3) | lo;
-        }
-
-        fprintf(fp, ", %u", address);
-
-        print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF);
-
-        fprintf(fp, ", ");
-
-        if (is_ubo) {
-                fprintf(fp, "ubo%u", word->arg_1);
-                update_stats(&midg_stats.uniform_buffer_count, word->arg_1);
-        } else
-                print_load_store_arg(fp, word->arg_1, 0);
-
-        fprintf(fp, ", ");
-        print_load_store_arg(fp, word->arg_2, 1);
-        fprintf(fp, " /* %X */\n", word->varying_parameters);
+        if (OP_IS_UBO_READ(word->op))
+                update_stats(&midg_stats.uniform_buffer_count,
+                             UNPACK_LDST_UBO_OFS(word->signed_offset));

        midg_stats.instruction_count++;
 }
--- a/src/panfrost/midgard/helpers.h
+++ b/src/panfrost/midgard/helpers.h
@ -355,9 +355,9 @@ mir_is_simple_swizzle(unsigned *swizzle, unsigned mask)
 /* Packs a load/store argument */

 static inline uint8_t
-midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
+midgard_ldst_comp(unsigned reg, unsigned component, unsigned size)
 {
-        assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1));
+        assert((reg & ~1) == 0);
        assert(size == 16 || size == 32 || size == 64);

        /* Shift so everything is in terms of 32-bit units */
@ -369,17 +369,38 @@ midgard_ldst_reg(unsigned reg, unsigned component, unsigned size)
                component >>= 1;
        }

-        midgard_ldst_register_select sel = {
-                .component = component,
-                .select = reg - 26
-        };
-
-        uint8_t packed;
-        memcpy(&packed, &sel, sizeof(packed));
-
-        return packed;
+        return component;
 }

+/* Packs/unpacks a ubo index immediate */
+
+void midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index);
+unsigned midgard_unpack_ubo_index_imm(midgard_load_store_word word);
+
+/* Packs/unpacks varying parameters.
+ * FIXME: IMPORTANT: We currently handle varying mode weirdly, by passing all
+ * parameters via an offset and using REGISTER_LDST_ZERO as base. This works
+ * for most parameters, but does not allow us to encode/decode direct sample
+ * position. */
+void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p);
+midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word);
+
+/* Load/store ops' displacement helpers.
+ * This is useful because different types of load/store ops have different
+ * displacement bitsize. */
+
+#define UNPACK_LDST_ATTRIB_OFS(a) ((a) >> 9)
+#define UNPACK_LDST_VERTEX_OFS(a) util_sign_extend((a) & 0x1FF, 9)
+#define UNPACK_LDST_SELECTOR_OFS(a) ((a) >> 9)
+#define UNPACK_LDST_UBO_OFS(a) ((a) >> 2)
+#define UNPACK_LDST_MEM_OFS(a) ((a))
+
+#define PACK_LDST_ATTRIB_OFS(a) ((a) << 9)
+#define PACK_LDST_VERTEX_OFS(a) ((a) & 0x1FF)
+#define PACK_LDST_SELECTOR_OFS(a) ((a) << 9)
+#define PACK_LDST_UBO_OFS(a) ((a) << 2)
+#define PACK_LDST_MEM_OFS(a) ((a))
+
 static inline bool
 midgard_is_branch_unit(unsigned unit)
 {
--- a/src/panfrost/midgard/midgard.h
+++ b/src/panfrost/midgard/midgard.h
@ -664,31 +664,37 @@ typedef enum {
 typedef enum {
        midgard_varying_mod_none = 0,

-        /* Other values unknown */
-
-        /* Take the would-be result and divide all components by its z/w
+        /* Take the would-be result and divide all components by its y/z/w
         * (perspective division baked in with the load)  */
+        midgard_varying_mod_perspective_y = 1,
        midgard_varying_mod_perspective_z = 2,
        midgard_varying_mod_perspective_w = 3,
+
+        /* The result is a 64-bit cubemap descriptor to use with
+         * midgard_tex_op_normal or midgard_tex_op_gradient */
+        midgard_varying_mod_cubemap = 4,
 } midgard_varying_modifier;

 typedef struct
 __attribute__((__packed__))
 {
-        unsigned zero0 : 1; /* Always zero */
+        midgard_varying_modifier modifier : 3;

-        midgard_varying_modifier modifier : 2;
+        bool flat_shading : 1;

-        unsigned zero1: 1; /* Always zero */
+        /* These are ignored if flat_shading is enabled. */
+        bool perspective_correction : 1;
+        bool centroid_mapping : 1;

-        /* Varying qualifiers, zero if not a varying */
-        unsigned flat    : 1;
-        unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
-        midgard_interpolation interpolation : 2;
+        /* This is ignored if the shader only runs once per pixel. */
+        bool interpolate_sample : 1;

-        unsigned zero2 : 2; /* Always zero */
+        bool zero0 : 1; /* Always zero */
+
+        unsigned direct_sample_pos_x : 4;
+        unsigned direct_sample_pos_y : 4;
 }
-midgard_varying_parameter;
+midgard_varying_params;

 /* 8-bit register/etc selector for load/store ops */
 typedef struct
@ -711,26 +717,56 @@ __attribute__((__packed__))
 }
 midgard_ldst_register_select;

+typedef enum {
+        /* 0 is reserved */
+        midgard_index_address_u64 = 1,
+        midgard_index_address_u32 = 2,
+        midgard_index_address_s32 = 3,
+} midgard_index_address_format;
+
 typedef struct
 __attribute__((__packed__))
 {
        midgard_load_store_op op : 8;
-        unsigned reg     : 5;
-        unsigned mask    : 4;
+
+        /* Source/dest reg */
+        unsigned reg  : 5;
+
+        /* Generally is a writemask.
+         * For ST_ATTR and ST_TEX, unused.
+         * For other stores, each bit masks 1/4th of the output. */
+        unsigned mask : 4;
+
+        /* Swizzle for stores, but for atomics it encodes also the source
+         * register. This fits because atomics dont need a swizzle since they
+         * are not vectorized instructions. */
        unsigned swizzle : 8;

-        /* Load/store ops can take two additional registers as arguments, but
-         * these are limited to load/store registers with only a few supported
-         * mask/swizzle combinations. The tradeoff is these are much more
-         * compact, requiring 8-bits each rather than 17-bits for a full
-         * reg/mask/swizzle. Usually (?) encoded as
-         * midgard_ldst_register_select. */
-        unsigned arg_1   : 8;
-        unsigned arg_2   : 8;
+        /* Arg reg, meaning changes according to each opcode */
+        unsigned arg_comp : 2;
+        unsigned arg_reg  : 3;

-        unsigned varying_parameters : 10;
+        /* 64-bit address enable
+         * 32-bit data type enable for CUBEMAP and perspective div.
+         * Explicit indexing enable for LD_ATTR.
+         * 64-bit coordinate enable for LD_IMAGE. */
+        bool bitsize_toggle : 1;

-        unsigned address : 9;
+        /* These are mainly used for opcodes that have addresses.
+         * For cmpxchg, index_reg is used for the comparison value.
+         * For ops that access the attrib table, bit 1 encodes which table.
+         * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
+        midgard_index_address_format index_format : 2;
+        unsigned index_comp  : 2;
+        unsigned index_reg   : 3;
+        unsigned index_shift : 4;
+
+        /* Generaly is a signed offset, but has different bitsize and starts at
+         * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
+         * are recommended when packing/unpacking this attribute.
+         * For LD_UBO, bit 0 enables ubo index immediate.
+         * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
+        int signed_offset : 18;
 }
 midgard_load_store_word;

--- a/src/panfrost/midgard/midgard_address.c
+++ b/src/panfrost/midgard/midgard_address.c
@ -36,17 +36,11 @@
 * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
 */

-enum index_type {
-        ITYPE_U64 = 1 << 6,
-        ITYPE_U32 = 2 << 6, // zero-extend
-        ITYPE_I32 = 3 << 6, // sign-extend
-};
-
 struct mir_address {
        nir_ssa_scalar A;
        nir_ssa_scalar B;

-        enum index_type type;
+        midgard_index_address_format type;
        unsigned shift;
        unsigned bias;
 };
@ -136,7 +130,7 @@ mir_match_u2u64(struct mir_address *address)
        nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);

        address->B = arg;
-        address->type = ITYPE_U32;
+        address->type = midgard_index_address_u32;
 }

 /* Matches i2i64 and sets type */
@ -155,7 +149,7 @@ mir_match_i2i64(struct mir_address *address)
        nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);

        address->B = arg;
-        address->type = ITYPE_I32;
+        address->type = midgard_index_address_s32;
 }

 /* Matches ishl to shift */
@ -210,7 +204,7 @@ mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)
 {
        struct mir_address address = {
                .B = { .def = offset },
-                .type = extend ? ITYPE_U64 : ITYPE_U32,
+                .type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
        };

        mir_match_mov(&address);
@ -243,14 +237,16 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
        bool force_sext = (nir_src_bit_size(*offset) < 64);

        if (!offset->is_ssa) {
-                ins->load_store.arg_1 |= seg;
+                ins->load_store.bitsize_toggle = true;
+                ins->load_store.arg_comp = seg & 0x3;
+                ins->load_store.arg_reg = (seg >> 2) & 0x7;
                ins->src[2] = nir_src_index(ctx, offset);
                ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);

                if (force_sext)
-                        ins->load_store.arg_1 |= ITYPE_I32;
+                        ins->load_store.index_format = midgard_index_address_s32;
                else
-                        ins->load_store.arg_1 |= ITYPE_U64;
+                        ins->load_store.index_format = midgard_index_address_u64;

                return;
        }
@ -263,23 +259,26 @@ mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
                ins->src[1] = nir_ssa_index(match.A.def);
                ins->swizzle[1][0] = match.A.comp;
                ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
-        } else
-                ins->load_store.arg_1 |= seg;
+        } else {
+                ins->load_store.bitsize_toggle = true;
+                ins->load_store.arg_comp = seg & 0x3;
+                ins->load_store.arg_reg = (seg >> 2) & 0x7;
+        }

        if (match.B.def) {
                ins->src[2] = nir_ssa_index(match.B.def);
                ins->swizzle[2][0] = match.B.comp;
                ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
        } else
-                ins->load_store.arg_2 = 0x1E;
+                ins->load_store.index_reg = REGISTER_LDST_ZERO;

        if (force_sext)
-                match.type = ITYPE_I32;
+                match.type = midgard_index_address_s32;

-        ins->load_store.arg_1 |= match.type;
+        ins->load_store.index_format = match.type;

        assert(match.shift <= 7);
-        ins->load_store.arg_2 |= (match.shift) << 5;
+        ins->load_store.index_shift = match.shift;

        ins->constants.u32[0] = match.bias;
 }
@ -298,6 +297,6 @@ mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)
                        ins->swizzle[2][i] = match.B.comp;
        }

-        ins->load_store.arg_2 |= (match.shift) << 5;
+        ins->load_store.index_shift = match.shift;
        ins->constants.u32[0] = match.bias + bias;
 }
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -112,7 +112,7 @@ schedule_barrier(compiler_context *ctx)
                        .swizzle = SWIZZLE_IDENTITY_4, \
                        .op = midgard_op_##name, \
 			.load_store = { \
-				.address = address \
+				.signed_offset = address \
 			} \
 		}; \
                \
@ -1164,20 +1164,20 @@ emit_ubo_read(
        if (indirect_offset) {
                ins.src[2] = nir_src_index(ctx, indirect_offset);
                ins.src_types[2] = nir_type_uint32;
-                ins.load_store.arg_2 = (indirect_shift << 5);
+                ins.load_store.index_shift = indirect_shift;

                /* X component for the whole swizzle to prevent register
                 * pressure from ballooning from the extra components */
                for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
                        ins.swizzle[2][i] = 0;
        } else {
-                ins.load_store.arg_2 = 0x1E;
+                ins.load_store.index_reg = REGISTER_LDST_ZERO;
        }

        if (indirect_offset && indirect_offset->is_ssa && !indirect_shift)
                mir_set_ubo_offset(&ins, indirect_offset, offset);

-        ins.load_store.arg_1 = index;
+        midgard_pack_ubo_index_imm(&ins.load_store, index);

        return emit_mir_instruction(ctx, ins);
 }
@ -1274,12 +1274,6 @@ emit_atomic(
        nir_src *src_offset = nir_get_io_offset_src(instr);

        if (op == midgard_op_atomic_cmpxchg) {
-                for(unsigned i = 0; i < 2; ++i)
-                        ins.swizzle[1][i] = i;
-
-                ins.src[1] = is_image ? image_direct_address : nir_src_index(ctx, src_offset);
-                ins.src_types[1] = nir_type_uint64;
-
                unsigned xchg_val_src = is_image ? 4 : 2;
                unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]);
                emit_explicit_constant(ctx, xchg_val, xchg_val);
@ -1288,8 +1282,18 @@ emit_atomic(
                ins.src_types[2] = type | bitsize;
                ins.src[3] = xchg_val;

-                if (is_shared)
-                        ins.load_store.arg_1 |= 0x6E;
+                if (is_shared) {
+                        ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR;
+                        ins.load_store.arg_comp = COMPONENT_Z;
+                        ins.load_store.bitsize_toggle = true;
+                } else {
+                        for(unsigned i = 0; i < 2; ++i)
+                                ins.swizzle[1][i] = i;
+
+                        ins.src[1] = is_image ? image_direct_address :
+                                                nir_src_index(ctx, src_offset);
+                        ins.src_types[1] = nir_type_uint64;
+                }
        } else if (is_image) {
                for(unsigned i = 0; i < 2; ++i)
                        ins.swizzle[2][i] = i;
@ -1297,7 +1301,9 @@ emit_atomic(
                ins.src[2] = image_direct_address;
                ins.src_types[2] = nir_type_uint64;

-                ins.load_store.arg_1 |= 0x7E;
+                ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+                ins.load_store.bitsize_toggle = true;
+                ins.load_store.index_format = midgard_index_address_u64;
        } else
                mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL);

@ -1316,7 +1322,7 @@ emit_varying_read(
        /* XXX: Half-floats? */
        /* TODO: swizzle, mask */

-        midgard_instruction ins = m_ld_vary_32(dest, offset);
+        midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
        ins.mask = mask_of(nr_comp);
        ins.dest_type = type;

@ -1328,23 +1334,22 @@ emit_varying_read(
        for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
                ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);

-        midgard_varying_parameter p = {
-                .is_varying = 1,
-                .interpolation = midgard_interp_default,
-                .flat = flat,
-        };

-        unsigned u;
-        memcpy(&u, &p, sizeof(p));
-        ins.load_store.varying_parameters = u;
+        midgard_varying_params p = {
+                .flat_shading = flat,
+                .perspective_correction = 1,
+                .interpolate_sample = true,
+        };
+        midgard_pack_varying_params(&ins.load_store, p);

        if (indirect_offset) {
                ins.src[2] = nir_src_index(ctx, indirect_offset);
                ins.src_types[2] = nir_type_uint32;
        } else
-                ins.load_store.arg_2 = 0x1E;
+                ins.load_store.index_reg = REGISTER_LDST_ZERO;

-        ins.load_store.arg_1 = 0x9E;
+        ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+        ins.load_store.index_format = midgard_index_address_u32;

        /* Use the type appropriate load */
        switch (type) {
@ -1402,16 +1407,16 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
                emit_explicit_constant(ctx, val, val);

                nir_alu_type type = nir_intrinsic_src_type(instr);
-                ins = st_image(type, val, address);
+                ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address));
                nir_alu_type base_type = nir_alu_type_get_base_type(type);
                ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
        } else if (is_atomic) { /* emit lea_image */
                unsigned dest = make_compiler_temp_reg(ctx);
-                ins = m_lea_image(dest, address);
+                ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address));
                ins.mask = mask_of(2); /* 64-bit memory address */
        } else { /* emit ld_image_* */
                nir_alu_type type = nir_intrinsic_dest_type(instr);
-                ins = ld_image(type, nir_dest_index(&instr->dest), address);
+                ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address));
                ins.mask = mask_of(nir_intrinsic_dest_components(instr));
                ins.dest_type = type;
        }
@ -1420,7 +1425,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
        ins.src[1] = coord_reg;
        ins.src_types[1] = nir_type_uint16;
        if (nr_dim == 3 || is_array) {
-                ins.load_store.arg_1 |= 0x20;
+                ins.load_store.bitsize_toggle = true;
        }

        /* Image index reg */
@ -1428,7 +1433,7 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
                ins.src[2] = nir_src_index(ctx, index);
                ins.src_types[2] = nir_type_uint32;
        } else
-                ins.load_store.arg_2 = 0x1E;
+                ins.load_store.index_reg = REGISTER_LDST_ZERO;

        emit_mir_instruction(ctx, ins);

@ -1441,9 +1446,9 @@ emit_attr_read(
        unsigned dest, unsigned offset,
        unsigned nr_comp, nir_alu_type t)
 {
-        midgard_instruction ins = m_ld_attr_32(dest, offset);
-        ins.load_store.arg_1 = 0x1E;
-        ins.load_store.arg_2 = 0x1E;
+        midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset));
+        ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+        ins.load_store.index_reg = REGISTER_LDST_ZERO;
        ins.mask = mask_of(nr_comp);

        /* Use the type appropriate load */
@ -1493,12 +1498,12 @@ compute_builtin_arg(nir_op op)
 {
        switch (op) {
        case nir_intrinsic_load_work_group_id:
-                return 0x14;
+                return REGISTER_LDST_GROUP_ID;
        case nir_intrinsic_load_local_invocation_id:
-                return 0x10;
+                return REGISTER_LDST_LOCAL_THREAD_ID;
        case nir_intrinsic_load_global_invocation_id:
        case nir_intrinsic_load_global_invocation_id_zero_base:
-                return 0x18;
+                return REGISTER_LDST_GLOBAL_THREAD_ID;
        default:
                unreachable("Invalid compute paramater loaded");
        }
@ -1567,7 +1572,7 @@ emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
        midgard_instruction ins = m_ldst_mov(reg, 0);
        ins.mask = mask_of(3);
        ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
-        ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic);
+        ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic);
        emit_mir_instruction(ctx, ins);
 }

@ -1598,8 +1603,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx)

        midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
        ld.op = midgard_op_ld_special_32u;
-        ld.load_store.address = idx;
-        ld.load_store.arg_2 = 0x1E;
+        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx);
+        ld.load_store.index_reg = REGISTER_LDST_ZERO;

        for (int i = 0; i < 4; ++i)
                ld.swizzle[0][i] = COMPONENT_X;
@ -1790,20 +1795,25 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)

                midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);

-                ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
+                unsigned target = output_load_rt_addr(ctx, instr);
+                ld.load_store.index_comp = target & 0x3;
+                ld.load_store.index_reg = target >> 2;

                if (nir_src_is_const(instr->src[0])) {
-                        ld.load_store.arg_1 = nir_src_as_uint(instr->src[0]);
+                        unsigned sample = nir_src_as_uint(instr->src[0]);
+                        ld.load_store.arg_comp = sample & 0x3;
+                        ld.load_store.arg_reg = sample >> 2;
                } else {
-                        ld.load_store.varying_parameters = 2;
+                        /* Enable sample index via register. */
+                        ld.load_store.signed_offset |= 1;
                        ld.src[1] = nir_src_index(ctx, &instr->src[0]);
                        ld.src_types[1] = nir_type_int32;
                }

                if (ctx->quirks & MIDGARD_OLD_BLEND) {
                        ld.op = midgard_op_ld_special_32u;
-                        ld.load_store.address = 16;
-                        ld.load_store.arg_2 = 0x1E;
+                        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16);
+                        ld.load_store.index_reg = REGISTER_LDST_ZERO;
                }

                emit_mir_instruction(ctx, ld);
@ -1821,7 +1831,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                else
                        ld = m_ld_tilebuffer_32f(reg, 0);

-                ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
+                unsigned index = output_load_rt_addr(ctx, instr);
+                ld.load_store.index_comp = index & 0x3;
+                ld.load_store.index_reg = index >> 2;

                for (unsigned c = 4; c < 16; ++c)
                        ld.swizzle[0][c] = 0;
@ -1831,8 +1843,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                ld.op = midgard_op_ld_special_16f;
                        else
                                ld.op = midgard_op_ld_special_32f;
-                        ld.load_store.address = 1;
-                        ld.load_store.arg_2 = 0x1E;
+                        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1);
+                        ld.load_store.index_reg = REGISTER_LDST_ZERO;
                }

                emit_mir_instruction(ctx, ld);
@ -1924,9 +1936,10 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                        unsigned dst_component = nir_intrinsic_component(instr);
                        unsigned nr_comp = nir_src_num_components(instr->src[0]);

-                        midgard_instruction st = m_st_vary_32(reg, offset);
-                        st.load_store.arg_1 = 0x9E;
-                        st.load_store.arg_2 = 0x1E;
+                        midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
+                        st.load_store.arg_reg = REGISTER_LDST_ZERO;
+                        st.load_store.index_format = midgard_index_address_u32;
+                        st.load_store.index_reg = REGISTER_LDST_ZERO;

                        switch (nir_alu_type_get_base_type(nir_intrinsic_src_type(instr))) {
                        case nir_type_uint:
@ -2211,7 +2224,7 @@ set_tex_coord(compiler_context *ctx, nir_tex_instr *instr,
                ld.src[1] = coords;
                ld.src_types[1] = ins->src_types[1];
                ld.mask = 0x3; /* xy */
-                ld.load_store.arg_1 = 0x20;
+                ld.load_store.bitsize_toggle = true;
                ld.swizzle[1][3] = COMPONENT_X;
                emit_mir_instruction(ctx, ld);

--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@ -44,6 +44,47 @@ mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
                return midgard_int_zero_extend;
 }

+void
+midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
+{
+        word->arg_comp = index & 0x3;
+        word->arg_reg = (index >> 2) & 0x7;
+        word->bitsize_toggle = (index >> 5) & 0x1;
+        word->index_format = (index >> 6) & 0x3;
+}
+
+unsigned
+midgard_unpack_ubo_index_imm(midgard_load_store_word word)
+{
+        unsigned ubo = word.arg_comp |
+                       (word.arg_reg << 2)  |
+                       (word.bitsize_toggle << 5) |
+                       (word.index_format << 6);
+
+        return ubo;
+}
+
+void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
+{
+        /* Currently these parameters are not supported. */
+        assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
+
+        unsigned u;
+        memcpy(&u, &p, sizeof(p));
+
+        word->signed_offset |= u & 0x1FF;
+}
+
+midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
+{
+        unsigned params = word.signed_offset & 0x1FF;
+
+        midgard_varying_params p;
+        memcpy(&p, &params, sizeof(p));
+
+        return p;
+}
+
 unsigned
 mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
 {
@ -578,15 +619,15 @@ load_store_from_instr(midgard_instruction *ins)
        }

        if (ins->src[1] != ~0) {
-                unsigned src = SSA_REG_FROM_FIXED(ins->src[1]);
+                ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
                unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
-                ldst.arg_1 |= midgard_ldst_reg(src, ins->swizzle[1][0], sz);
+                ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
        }

        if (ins->src[2] != ~0) {
-                unsigned src = SSA_REG_FROM_FIXED(ins->src[2]);
+                ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
                unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
-                ldst.arg_2 |= midgard_ldst_reg(src, ins->swizzle[2][0], sz);
+                ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
        }

        return ldst;
@ -876,13 +917,22 @@ emit_alu_bundle(compiler_context *ctx,
 * over some other semantic distinction else well, but it unifies things in the
 * compiler so I don't mind. */

-static unsigned
-mir_ldst_imm_shift(midgard_load_store_op op)
+static void
+mir_ldst_pack_offset(midgard_instruction *ins, int offset)
 {
-        if (OP_IS_UBO_READ(op))
-                return 3;
+        /* These opcodes don't support offsets */
+        assert(!OP_IS_REG2REG_LDST(ins->op) ||
+               ins->op == midgard_op_lea    ||
+               ins->op == midgard_op_lea_image);
+
+        if (OP_IS_UBO_READ(ins->op))
+                ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
+        else if (OP_IS_IMAGE(ins->op))
+                ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
+        else if (OP_IS_SPECIAL(ins->op))
+                ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
        else
-                return 1;
+                ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
 }

 static enum mali_sampler_type
@ -931,22 +981,17 @@ emit_binary_bundle(compiler_context *ctx,
                /* Copy masks */

                for (unsigned i = 0; i < bundle->instruction_count; ++i) {
-                        mir_pack_ldst_mask(bundle->instructions[i]);
+                        midgard_instruction *ins = bundle->instructions[i];
+                        mir_pack_ldst_mask(ins);

                        /* Atomic ops don't use this swizzle the same way as other ops */
-                        if (!OP_IS_ATOMIC(bundle->instructions[i]->op))
-                                mir_pack_swizzle_ldst(bundle->instructions[i]);
+                        if (!OP_IS_ATOMIC(ins->op))
+                                mir_pack_swizzle_ldst(ins);

                        /* Apply a constant offset */
-                        unsigned offset = bundle->instructions[i]->constants.u32[0];
-
-                        if (offset) {
-                                unsigned shift = mir_ldst_imm_shift(bundle->instructions[i]->op);
-                                unsigned upper_shift = 10 - shift;
-
-                                bundle->instructions[i]->load_store.varying_parameters |= (offset & ((1 << upper_shift) - 1)) << shift;
-                                bundle->instructions[i]->load_store.address |= (offset >> upper_shift);
-                        }
+                        unsigned offset = ins->constants.u32[0];
+                        if (offset)
+                                mir_ldst_pack_offset(ins, offset);
                }

                midgard_load_store_word ldst0 =
--- a/src/panfrost/midgard/midgard_opt_perspective.c
+++ b/src/panfrost/midgard/midgard_opt_perspective.c
@ -125,7 +125,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
                                midgard_op_ldst_perspective_div_w :
                                midgard_op_ldst_perspective_div_z,
                        .load_store = {
-                                .arg_1 = 0x20
+                                .bitsize_toggle = true,
                        }
                };

@ -167,9 +167,8 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
                        /* We found it, so rewrite it to project. Grab the
                         * modifier */

-                        unsigned param = v->load_store.varying_parameters;
-                        midgard_varying_parameter p;
-                        memcpy(&p, &param, sizeof(p));
+                        midgard_varying_params p =
+                                midgard_unpack_varying_params(v->load_store);

                        if (p.modifier != midgard_varying_mod_none)
                                break;
@ -181,9 +180,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
                                midgard_varying_mod_perspective_w :
                                midgard_varying_mod_perspective_z;

-                        /* Aliasing rules are annoying */
-                        memcpy(&param, &p, sizeof(p));
-                        v->load_store.varying_parameters = param;
+                        midgard_pack_varying_params(&v->load_store, p);

                        /* Use the new destination */
                        v->dest = to;
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@ -988,12 +988,14 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                                        .swizzle = SWIZZLE_IDENTITY_4,
                                        .op = midgard_op_ld_ubo_128,
                                        .load_store = {
-                                                .arg_1 = ctx->info->push.words[idx].ubo,
-                                                .arg_2 = 0x1E,
+                                                .index_reg = REGISTER_LDST_ZERO,
                                        },
                                        .constants.u32[0] = ctx->info->push.words[idx].offset
                                };

+                                midgard_pack_ubo_index_imm(&ld.load_store,
+                                                           ctx->info->push.words[idx].ubo);
+
                                mir_insert_instruction_before_scheduled(ctx, block, before, ld);

                                mir_rewrite_index_src_single(ins, ins->src[i], temp);
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@ -140,14 +140,17 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                if (instructions[i]->type == TAG_LOAD_STORE_4 &&
                    load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) {

-                        unsigned type;
-                        switch (instructions[i]->load_store.arg_1 & 0x3E) {
-                        case LDST_SHARED: type = 0; break;
-                        case LDST_SCRATCH: type = 1; break;
-                        default: type = 2; break;
+                        unsigned type = instructions[i]->load_store.arg_reg |
+                                        instructions[i]->load_store.arg_comp;
+
+                        unsigned idx;
+                        switch (type) {
+                        case LDST_SHARED: idx = 0; break;
+                        case LDST_SCRATCH: idx = 1; break;
+                        default: idx = 2; break;
                        }

-                        unsigned prev = prev_ldst[type];
+                        unsigned prev = prev_ldst[idx];

                        if (prev != ~0) {
                                BITSET_WORD *dependents = instructions[prev]->dependents;
@ -160,7 +163,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                                instructions[i]->nr_dependencies++;
                        }

-                        prev_ldst[type] = i;
+                        prev_ldst[idx] = i;
                }

                if (dest < node_count) {
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@ -73,7 +73,7 @@ mir_analyze_ranges(compiler_context *ctx)
        mir_foreach_instr_global(ctx, ins) {
                if (!mir_is_direct_aligned_ubo(ins)) continue;

-                unsigned ubo = ins->load_store.arg_1;
+                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
                unsigned offset = ins->constants.u32[0] / 16;

                assert(ubo < res.nr_blocks);
@ -276,7 +276,7 @@ midgard_promote_uniforms(compiler_context *ctx)
        mir_foreach_instr_global_safe(ctx, ins) {
                if (!mir_is_direct_aligned_ubo(ins)) continue;

-                unsigned ubo = ins->load_store.arg_1;
+                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
                unsigned qword = ins->constants.u32[0] / 16;

                /* Check if we decided to push this */