diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 814a78919c3..2492aa6d9aa 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1354,6 +1354,11 @@ store("uniform_ir3", [], indices=[BASE])
 # vec4's.
 intrinsic("copy_ubo_to_uniform_ir3", [1, 1], indices=[BASE, RANGE])
 
+# IR3-specific intrinsic for ldg.k.
+# base is an offset to apply to the address in bytes, range_base is the
+# const file base in components, range is the amount to copy in vec4's.
+intrinsic("copy_global_to_uniform_ir3", [2], indices=[BASE, RANGE_BASE, RANGE])
+
 # IR3-specific intrinsic for stsc. Loads from push consts to constant file
 # Should be used in the shader preamble.
 intrinsic("copy_push_const_to_uniform_ir3", [1], indices=[BASE, RANGE])
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index d326de7c080..45d28d007bf 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -2582,6 +2582,7 @@ INSTR4(ATOMIC_S_AND)
 INSTR4(ATOMIC_S_OR)
 INSTR4(ATOMIC_S_XOR)
 #endif
+INSTR4NODST(LDG_K)
 
 /* cat7 instructions: */
 INSTR0(BAR)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 8216e95c9f1..e97ec561f37 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -920,6 +920,40 @@ emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx,
    array_insert(b, b->keeps, ldc);
 }
 
+static void
+emit_intrinsic_copy_global_to_uniform(struct ir3_context *ctx,
+                                      nir_intrinsic_instr *intr)
+{
+   struct ir3_block *b = ctx->block;
+
+   unsigned size = nir_intrinsic_range(intr);
+   unsigned dst = nir_intrinsic_range_base(intr);
+   unsigned addr_offset = nir_intrinsic_base(intr);
+   unsigned dst_lo = dst & 0xff;
+   unsigned dst_hi = dst >> 8;
+
+   struct ir3_instruction *a1 = NULL;
+   if (dst_hi)
+      a1 = ir3_get_addr1(ctx, dst_hi << 8);
+
+   struct ir3_instruction *addr_lo = ir3_get_src(ctx, &intr->src[0])[0];
+   struct ir3_instruction *addr_hi = ir3_get_src(ctx, &intr->src[0])[1];
+   struct ir3_instruction *addr = ir3_collect(b, addr_lo, addr_hi);
+   struct ir3_instruction *ldg = ir3_LDG_K(b, create_immed(b, dst_lo), 0, addr, 0, 
+                                           create_immed(b, addr_offset), 0,
+                                           create_immed(b, size), 0);
+   ldg->barrier_class = ldg->barrier_conflict = IR3_BARRIER_CONST_W;
+   ldg->cat6.type = TYPE_U32;
+
+   if (a1) {
+      ir3_instr_set_address(ldg, a1);
+      ldg->flags |= IR3_INSTR_A1EN;
+   }
+
+   array_insert(b, b->keeps, ldg);
+}
+
+
 /* handles direct/indirect UBO reads: */
 static void
 emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
@@ -2277,6 +2311,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
    case nir_intrinsic_copy_ubo_to_uniform_ir3:
       emit_intrinsic_copy_ubo_to_uniform(ctx, intr);
       break;
+   case nir_intrinsic_copy_global_to_uniform_ir3:
+      emit_intrinsic_copy_global_to_uniform(ctx, intr);
+      break;
    case nir_intrinsic_load_frag_coord:
    case nir_intrinsic_load_frag_coord_unscaled_ir3:
       ir3_split_dest(b, dst, get_frag_coord(ctx, intr), 0, 4);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 46587509e0a..8dc68f10083 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -808,6 +808,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
        !(ir3_shader_debug & IR3_DBG_NOPREAMBLE))
       progress |= OPT(s, ir3_nir_opt_preamble, so);
 
+   if (so->compiler->load_shader_consts_via_preamble)
+      progress |= OPT(s, ir3_nir_lower_driver_params_to_ubo, so);
+
+   /* TODO: ldg.k might also work on a6xx */
+   if (so->compiler->gen >= 7)
+      progress |= OPT(s, ir3_nir_lower_const_global_loads, so);
+
    if (!so->binning_pass)
       OPT_V(s, ir3_nir_analyze_ubo_ranges, so);
 
@@ -1053,7 +1060,8 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
    assert((const_state->ubo_state.size % 16) == 0);
    unsigned constoff = v->shader_options.num_reserved_user_consts +
       const_state->ubo_state.size / 16 +
-      const_state->preamble_size;
+      const_state->preamble_size +
+      const_state->global_size;
    unsigned ptrsz = ir3_pointer_size(compiler);
 
    if (const_state->num_ubos > 0 && compiler->gen < 6) {
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index af1ad07d93d..5320340ea2c 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -80,6 +80,7 @@ void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
 bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v);
 void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
 bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
+bool ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v);
 bool ir3_nir_fixup_load_uniform(nir_shader *nir);
 bool ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v);
 bool ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v);
diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
index f364c7cdf9d..4982fdc8c24 100644
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@@ -34,11 +34,18 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
    uint32_t offset = nir_intrinsic_range_base(instr);
    uint32_t size = nir_intrinsic_range(instr);
 
+   if (instr->intrinsic == nir_intrinsic_load_global_ir3) {
+      offset *= 4;
+      size *= 4;
+   }
+
    /* If the offset is constant, the range is trivial (and NIR may not have
     * figured it out).
     */
    if (nir_src_is_const(instr->src[1])) {
       offset = nir_src_as_uint(instr->src[1]);
+      if (instr->intrinsic == nir_intrinsic_load_global_ir3)
+         offset *= 4;
       size = nir_intrinsic_dest_components(instr) * 4;
    }
 
@@ -55,17 +62,28 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
 static bool
 get_ubo_info(nir_intrinsic_instr *instr, struct ir3_ubo_info *ubo)
 {
-   if (nir_src_is_const(instr->src[0])) {
+   if (instr->intrinsic == nir_intrinsic_load_global_ir3) {
+      ubo->global_base = instr->src[0].ssa;
+      ubo->block = 0;
+      ubo->bindless_base = 0;
+      ubo->bindless = false;
+      ubo->global = true;
+      return true;
+   } else if (nir_src_is_const(instr->src[0])) {
+      ubo->global_base = NULL;
       ubo->block = nir_src_as_uint(instr->src[0]);
       ubo->bindless_base = 0;
       ubo->bindless = false;
+      ubo->global = false;
       return true;
    } else {
       nir_intrinsic_instr *rsrc = ir3_bindless_resource(instr->src[0]);
       if (rsrc && nir_src_is_const(rsrc->src[0])) {
+         ubo->global_base = NULL;
          ubo->block = nir_src_as_uint(rsrc->src[0]);
          ubo->bindless_base = nir_intrinsic_desc_set(rsrc);
          ubo->bindless = true;
+         ubo->global = false;
          return true;
       }
    }
@@ -273,7 +291,8 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
 
    struct ir3_ubo_range r;
    if (!get_ubo_load_range(b->shader, instr, alignment, &r)) {
-      track_ubo_use(instr, b, num_ubos);
+      if (instr->intrinsic == nir_intrinsic_load_ubo)
+         track_ubo_use(instr, b, num_ubos);
       return false;
    }
 
@@ -283,7 +302,8 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
     */
    const struct ir3_ubo_range *range = get_existing_range(instr, state, &r);
    if (!range) {
-      track_ubo_use(instr, b, num_ubos);
+      if (instr->intrinsic == nir_intrinsic_load_ubo)
+         track_ubo_use(instr, b, num_ubos);
       return false;
    }
 
@@ -292,20 +312,23 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
 
    handle_partial_const(b, &ubo_offset, &const_offset);
 
-   /* UBO offset is in bytes, but uniform offset is in units of
-    * dwords, so we need to divide by 4 (right-shift by 2). For ldc the
-    * offset is in units of 16 bytes, so we need to multiply by 4. And
-    * also the same for the constant part of the offset:
-    */
-   const int shift = -2;
-   nir_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
-   nir_def *uniform_offset = NULL;
-   if (new_offset) {
-      uniform_offset = new_offset;
-   } else {
-      uniform_offset = shift > 0
-                          ? nir_ishl_imm(b, ubo_offset, shift)
-                          : nir_ushr_imm(b, ubo_offset, -shift);
+   nir_def *uniform_offset = ubo_offset;
+
+   if (instr->intrinsic == nir_intrinsic_load_ubo) {
+      /* UBO offset is in bytes, but uniform offset is in units of
+       * dwords, so we need to divide by 4 (right-shift by 2). For ldc the
+       * offset is in units of 16 bytes, so we need to multiply by 4. And
+       * also the same for the constant part of the offset:
+       */
+      const int shift = -2;
+      nir_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
+      if (new_offset) {
+         uniform_offset = new_offset;
+      } else {
+         uniform_offset = shift > 0
+                             ? nir_ishl_imm(b, ubo_offset, shift)
+                             : nir_ushr_imm(b, ubo_offset, -shift);
+      }
    }
 
    assert(!(const_offset & 0x3));
@@ -336,6 +359,174 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
    return true;
 }
 
+/* This isn't nearly as comprehensive as what's done in nir_opt_preamble, but we
+ * need to hoist the load_global base into the preamble. Currently the only user
+ * is turnip with inline uniforms, so we can be simple and only handle a few
+ * uncomplicated intrinsics.
+ *
+ * TODO: Fold what this pass does into opt_preamble, which will give us a better
+ * heuristic for what to push and we won't need this.
+ */
+static bool
+def_is_rematerializable(nir_def *def)
+{
+   switch (def->parent_instr->type) {
+   case nir_instr_type_load_const:
+      return true;
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(def->parent_instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_ubo:
+         return def_is_rematerializable(intrin->src[0].ssa) &&
+            def_is_rematerializable(intrin->src[1].ssa);
+      case nir_intrinsic_bindless_resource_ir3:
+         return def_is_rematerializable(intrin->src[0].ssa);
+      default:
+         return false;
+      }
+   }
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr);
+      for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+         if (!def_is_rematerializable(alu->src[i].src.ssa))
+            return false;
+      }
+      return true;
+   }
+   default:
+      return false;
+   }
+}
+
+static nir_def *
+_rematerialize_def(nir_builder *b, struct hash_table *remap_ht,
+                   nir_def *def)
+{
+   if (_mesa_hash_table_search(remap_ht, def->parent_instr))
+      return NULL;
+
+   switch (def->parent_instr->type) {
+   case nir_instr_type_load_const:
+      break;
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(def->parent_instr);
+      for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs;
+           i++)
+         _rematerialize_def(b, remap_ht, intrin->src[i].ssa);
+      break;
+   }
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr);
+      for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
+         _rematerialize_def(b, remap_ht, alu->src[i].src.ssa);
+      break;
+   }
+   default:
+      unreachable("should not get here");
+   }
+
+   nir_instr *instr = nir_instr_clone_deep(b->shader, def->parent_instr,
+                                           remap_ht);
+   nir_builder_instr_insert(b, instr);
+   return nir_instr_def(instr);
+}
+
+static nir_def *
+rematerialize_def(nir_builder *b, nir_def *def)
+{
+   struct hash_table *remap_ht = _mesa_pointer_hash_table_create(NULL);
+
+   nir_def *new_def = _rematerialize_def(b, remap_ht, def);
+
+   _mesa_hash_table_destroy(remap_ht, NULL);
+
+   return new_def;
+}
+
+static bool
+rematerialize_load_global_bases(nir_shader *nir,
+                                struct ir3_ubo_analysis_state *state)
+{
+   bool has_load_global = false;
+   for (unsigned i = 0; i < state->num_enabled; i++) {
+      if (state->range[i].ubo.global) {
+         has_load_global = true;
+         break;
+      }
+   }
+
+   if (!has_load_global)
+      return false;
+
+   nir_function_impl *preamble = nir_shader_get_preamble(nir);
+   nir_builder _b = nir_builder_at(nir_after_impl(preamble));
+   nir_builder *b = &_b;
+
+   for (unsigned i = 0; i < state->num_enabled; i++) {
+      struct ir3_ubo_range *range = &state->range[i];
+
+      if (!range->ubo.global)
+         continue;
+
+      range->ubo.global_base = rematerialize_def(b, range->ubo.global_base);
+   }
+
+   return true;
+}
+
+static bool
+copy_global_to_uniform(nir_shader *nir, struct ir3_ubo_analysis_state *state)
+{
+   if (state->num_enabled == 0)
+      return false;
+
+   nir_function_impl *preamble = nir_shader_get_preamble(nir);
+   nir_builder _b = nir_builder_at(nir_after_impl(preamble));
+   nir_builder *b = &_b;
+
+   for (unsigned i = 0; i < state->num_enabled; i++) {
+      const struct ir3_ubo_range *range = &state->range[i];
+      assert(range->ubo.global);
+
+      nir_def *base = rematerialize_def(b, range->ubo.global_base);
+      unsigned start = range->start;
+      if (start > (1 << 10)) {
+         /* This is happening pretty late, so we need to add the offset
+          * manually ourselves.
+          */
+         nir_def *start_val = nir_imm_int(b, start);
+         nir_def *base_lo = nir_channel(b, base, 0);
+         nir_def *base_hi = nir_channel(b, base, 1);
+         nir_def *carry = nir_b2i32(b, nir_ult(b, base_lo, start_val));
+         base_lo = nir_iadd(b, base_lo, start_val);
+         base_hi = nir_iadd(b, base_hi, carry);
+         base = nir_vec2(b, base_lo, base_hi);
+         start = 0;
+      }
+
+      unsigned size = (range->end - range->start);
+      for (unsigned offset = 0; offset < size; offset += 16) {
+         unsigned const_offset = range->offset / 4 + offset / 4;
+         if (const_offset < 256) {
+            nir_copy_global_to_uniform_ir3(b, base,
+                                           .base = start + offset,
+                                           .range_base = const_offset,
+                                           .range = 1);
+         } else {
+            /* It seems that the a1.x format doesn't work, so we need to
+             * decompose the ldg.k into ldg + stc.
+             */
+            nir_def *load =
+               nir_load_global_ir3(b, 4, 32, base,
+                                   nir_imm_int(b, (start + offset) / 4));
+            nir_store_uniform_ir3(b, load, .base = const_offset);
+         }
+      }
+   }
+
+   return true;
+}
+
 static bool
 copy_ubo_to_uniform(nir_shader *nir, const struct ir3_const_state *const_state,
                     bool const_data_via_cp)
@@ -402,6 +593,130 @@ instr_is_load_ubo(nir_instr *instr)
    return op == nir_intrinsic_load_ubo;
 }
 
+static bool
+instr_is_load_const(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_op op = intrin->intrinsic;
+
+   if (op != nir_intrinsic_load_global_ir3)
+      return false;
+
+   /* TODO handle non-aligned accesses */
+   if (nir_intrinsic_align_mul(intrin) < 16 ||
+       nir_intrinsic_align_offset(intrin) % 16 != 0)
+      return false;
+
+   enum gl_access_qualifier access = nir_intrinsic_access(intrin);
+   return (access & ACCESS_NON_WRITEABLE) && (access & ACCESS_CAN_SPECULATE);
+}
+
+/* For now, everything we upload is accessed statically and thus will be
+ * used by the shader. Once we can upload dynamically indexed data, we may
+ * upload sparsely accessed arrays, at which point we probably want to
+ * give priority to smaller UBOs, on the assumption that big UBOs will be
+ * accessed dynamically.  Alternatively, we can track statically and
+ * dynamically accessed ranges separately and upload static rangtes
+ * first.
+ */
+static void
+assign_offsets(struct ir3_ubo_analysis_state *state, unsigned start,
+               unsigned max_upload)
+{
+   uint32_t offset = 0;
+   for (uint32_t i = 0; i < state->num_enabled; i++) {
+      uint32_t range_size = state->range[i].end - state->range[i].start;
+
+      assert(offset <= max_upload);
+      state->range[i].offset = offset + start;
+      assert(offset <= max_upload);
+      offset += range_size;
+   }
+   state->size = offset;
+}
+
+/* Lowering to ldg to ldg.k + const uses the same infrastructure as lowering UBO
+ * loads, but must be done separately because the analysis and transform must be
+ * done in the same pass and we cannot reuse the main variant analysis for the
+ * binning variant.
+ */
+bool
+ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
+{
+   struct ir3_const_state *const_state = ir3_const_state(v);
+   struct ir3_compiler *compiler = v->compiler;
+
+   if (ir3_shader_debug & IR3_DBG_NOUBOOPT)
+      return false;
+
+   unsigned max_upload;
+   if (v->binning_pass) {
+      max_upload = const_state->global_size * 16;
+   } else {
+      struct ir3_const_state worst_case_const_state = {
+         .preamble_size = const_state->preamble_size,
+      };
+      ir3_setup_const_state(nir, v, &worst_case_const_state);
+      max_upload = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16;
+   }
+
+   struct ir3_ubo_analysis_state state = {};
+   uint32_t upload_remaining = max_upload;
+
+   nir_foreach_function (function, nir) {
+      if (function->impl && !function->is_preamble) {
+         nir_foreach_block (block, function->impl) {
+            nir_foreach_instr (instr, block) {
+               if (instr_is_load_const(instr) &&
+                   def_is_rematerializable(nir_instr_as_intrinsic(instr)->src[0].ssa))
+                  gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), &state,
+                                    compiler->const_upload_unit,
+                                    &upload_remaining);
+            }
+         }
+      }
+   }
+
+   uint32_t global_offset = v->shader_options.num_reserved_user_consts * 16;
+   assign_offsets(&state, global_offset, max_upload);
+
+   bool progress = copy_global_to_uniform(nir, &state);
+
+   if (progress) {
+      nir_foreach_function (function, nir) {
+         if (function->impl) {
+            if (function->is_preamble) {
+               nir_metadata_preserve(
+                  function->impl, nir_metadata_all);
+               continue;
+            }
+
+            nir_builder builder = nir_builder_create(function->impl);
+            nir_foreach_block (block, function->impl) {
+               nir_foreach_instr_safe (instr, block) {
+                  if (!instr_is_load_const(instr))
+                     continue;
+                  progress |= lower_ubo_load_to_uniform(
+                     nir_instr_as_intrinsic(instr), &builder, &state, NULL,
+                     compiler->const_upload_unit);
+               }
+            }
+
+            nir_metadata_preserve(
+               function->impl, nir_metadata_block_index | nir_metadata_dominance);
+         }
+      }
+   }
+
+   if (!v->binning_pass)
+      const_state->global_size = DIV_ROUND_UP(state.size, 16);
+
+   return progress;
+}
+
 void
 ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
 {
@@ -417,6 +732,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
     */
    struct ir3_const_state worst_case_const_state = {
       .preamble_size = const_state->preamble_size,
+      .global_size = const_state->global_size,
    };
    ir3_setup_const_state(nir, v, &worst_case_const_state);
    const uint32_t max_upload =
@@ -429,6 +745,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
 
    uint32_t upload_remaining = max_upload;
    bool push_ubos = compiler->options.push_ubo_with_preamble;
+
    nir_foreach_function (function, nir) {
       if (function->impl && (!push_ubos || !function->is_preamble)) {
          nir_foreach_block (block, function->impl) {
@@ -442,25 +759,9 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
       }
    }
 
-   /* For now, everything we upload is accessed statically and thus will be
-    * used by the shader. Once we can upload dynamically indexed data, we may
-    * upload sparsely accessed arrays, at which point we probably want to
-    * give priority to smaller UBOs, on the assumption that big UBOs will be
-    * accessed dynamically.  Alternatively, we can track statically and
-    * dynamically accessed ranges separately and upload static rangtes
-    * first.
-    */
-
-   uint32_t offset = 0;
-   for (uint32_t i = 0; i < state->num_enabled; i++) {
-      uint32_t range_size = state->range[i].end - state->range[i].start;
-
-      assert(offset <= max_upload);
-      state->range[i].offset = offset + v->shader_options.num_reserved_user_consts * 16;
-      assert(offset <= max_upload);
-      offset += range_size;
-   }
-   state->size = offset;
+   uint32_t ubo_offset = v->shader_options.num_reserved_user_consts * 16 +
+      const_state->global_size * 16;
+   assign_offsets(state, ubo_offset, max_upload);
 }
 
 bool
diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c
index cd7926ab252..1fa9baf8f44 100644
--- a/src/freedreno/ir3/ir3_nir_opt_preamble.c
+++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c
@@ -349,7 +349,7 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
    /* First, lower load/store_preamble. */  
    const struct ir3_const_state *const_state = ir3_const_state(v);
    unsigned preamble_base = v->shader_options.num_reserved_user_consts * 4 +
-      const_state->ubo_state.size / 4;
+      const_state->ubo_state.size / 4 + const_state->global_size * 4;
    unsigned preamble_size = const_state->preamble_size * 4;
 
    BITSET_DECLARE(promoted_to_float, preamble_size);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 8f087f2dcc2..2b2cb198c65 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -123,10 +123,14 @@ enum ir3_wavesize_option {
 /**
  * Description of a lowered UBO.
  */
+struct nir_def;
+
 struct ir3_ubo_info {
+   struct nir_def *global_base; /* For global loads, the base address */
    uint32_t block;         /* Which constant block */
    uint16_t bindless_base; /* For bindless, which base register is used */
    bool bindless;
+   bool global;
 };
 
 /**
@@ -230,6 +234,7 @@ struct ir3_const_state {
    uint32_t *immediates;
 
    unsigned preamble_size;
+   unsigned global_size;
 
    /* State of ubo access lowered to push consts: */
    struct ir3_ubo_analysis_state ubo_state;