diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index c4eab8f6e60..59363966626 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -598,6 +598,8 @@ enum tex_logical_srcs {
    TEX_LOGICAL_SRC_SAMPLER_HANDLE,
    /** Texel offset for gathers */
    TEX_LOGICAL_SRC_TG4_OFFSET,
+   /** Texture offset */
+   TEX_LOGICAL_SRC_PACKED_OFFSET,
    /** REQUIRED: Number of coordinate components (as UD immediate) */
    TEX_LOGICAL_SRC_COORD_COMPONENTS,
    /** REQUIRED: Number of derivative components (as UD immediate) */
diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp
index 4954d9c1951..fedd4a65688 100644
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@@ -85,38 +85,6 @@ static void brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
 static void brw_combine_with_vec(const brw_builder &bld, const brw_reg &dst,
                                  const brw_reg &src, unsigned n);
 
-static bool
-brw_texture_offset(const nir_tex_instr *tex, unsigned src,
-                   uint32_t *offset_bits_out)
-{
-   if (!nir_src_is_const(tex->src[src].src))
-      return false;
-
-   const unsigned num_components = nir_tex_instr_src_size(tex, src);
-
-   /* Combine all three offsets into a single unsigned dword:
-    *
-    *    bits 11:8 - U Offset (X component)
-    *    bits  7:4 - V Offset (Y component)
-    *    bits  3:0 - R Offset (Z component)
-    */
-   uint32_t offset_bits = 0;
-   for (unsigned i = 0; i < num_components; i++) {
-      int offset = nir_src_comp_as_int(tex->src[src].src, i);
-
-      /* offset out of bounds; caller will handle it. */
-      if (offset > 7 || offset < -8)
-         return false;
-
-      const unsigned shift = 4 * (2 - i);
-      offset_bits |= (offset & 0xF) << shift;
-   }
-
-   *offset_bits_out = offset_bits;
-
-   return true;
-}
-
 static brw_reg
 setup_imm_b(const brw_builder &bld, int8_t v)
 {
@@ -7332,21 +7300,15 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
          srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD);
          break;
 
-      case nir_tex_src_offset: {
-         uint32_t offset_bits = 0;
-         if (brw_texture_offset(instr, i, &offset_bits)) {
-            header_bits |= offset_bits;
-         } else {
-            /* On gfx12.5+, if the offsets are not both constant and in the
-             * {-8,7} range, nir_lower_tex() will have already lowered the
-             * source offset. So we should never reach this point.
-             */
-            assert(devinfo->verx10 < 125);
-            srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
-               retype(src, BRW_TYPE_D);
-         }
+      case nir_tex_src_offset:
+         /* On gfx12.5+, if the offsets are not both constant and in the
+          * {-8,7} range, nir_lower_tex() will have already lowered the
+          * source offset. So we should never reach this point.
+          */
+         assert(devinfo->verx10 < 125);
+         srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
+            retype(src, BRW_TYPE_D);
          break;
-      }
 
       case nir_tex_src_projector:
          unreachable("should be lowered");
@@ -7390,10 +7352,20 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
        * into a single (32-bit) value.
        */
       case nir_tex_src_backend2:
-         assert(instr->op == nir_texop_tg4);
-         pack_lod_bias_and_offset = true;
-         srcs[TEX_LOGICAL_SRC_LOD] =
-            retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F);
+         /* For TG4, if there is a LOD, it would have been packed together
+          * with offsets, just put everything into SRC_LOD.
+          *
+          * Otherwise this is a packed offset.
+          */
+         if (instr->op == nir_texop_tg4 &&
+             (nir_tex_instr_src_index(instr, nir_tex_src_lod) != -1 ||
+              nir_tex_instr_src_index(instr, nir_tex_src_bias) != -1)) {
+            pack_lod_bias_and_offset = true;
+            srcs[TEX_LOGICAL_SRC_LOD] =
+               retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F);
+         } else {
+            srcs[TEX_LOGICAL_SRC_PACKED_OFFSET] = bld.emit_uniformize(src);
+         }
          break;
 
       /* If this parameter is present, we are packing either the explicit LOD
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp
index 538d4bd54c4..455f6f78066 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -757,6 +757,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
                            const brw_reg &surface_handle,
                            const brw_reg &sampler_handle,
                            const brw_reg &tg4_offset,
+                           const brw_reg &packed_offset,
                            unsigned payload_type_bit_size,
                            unsigned coord_components,
                            unsigned grad_components,
@@ -784,6 +785,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
    assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));
 
    if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 ||
+       packed_offset.file != BAD_FILE ||
        sampler_handle.file != BAD_FILE ||
        is_high_sampler(devinfo, sampler) ||
        residency) {
@@ -825,8 +827,17 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
          ubld.MOV(header, brw_imm_ud(0));
       else
          ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
-      if (inst->offset) {
-         ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
+      if (packed_offset.file != BAD_FILE || inst->offset) {
+         if (inst->offset && packed_offset.file != BAD_FILE) {
+            if (packed_offset.file == IMM)
+               ubld1.MOV(component(header, 2), brw_imm_ud(packed_offset.ud | inst->offset));
+            else
+               ubld1.OR(component(header, 2), packed_offset, brw_imm_ud(inst->offset));
+         } else if (packed_offset.file != BAD_FILE) {
+            ubld1.MOV(component(header, 2), packed_offset);
+         } else {
+            ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
+         }
       } else if (devinfo->ver < 11 &&
                  bld.shader->stage != MESA_SHADER_VERTEX &&
                  bld.shader->stage != MESA_SHADER_FRAGMENT) {
@@ -1286,6 +1297,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
    const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
    const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
    const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
+   const brw_reg packed_offset = inst->src[TEX_LOGICAL_SRC_PACKED_OFFSET];
    assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
    const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
    assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
@@ -1296,6 +1308,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
    const unsigned msg_payload_type_bit_size =
       get_sampler_msg_payload_type_bit_size(devinfo, inst);
 
+   assert(tg4_offset.file == BAD_FILE || packed_offset.file == BAD_FILE);
+
    /* 16-bit payloads are available only on gfx11+ */
    assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);
 
@@ -1304,7 +1318,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
                               sample_index,
                               mcs, surface, sampler,
                               surface_handle, sampler_handle,
-                              tg4_offset,
+                              tg4_offset, packed_offset,
                               msg_payload_type_bit_size,
                               coord_components, grad_components,
                               residency);
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index bee39a40f7d..042f11d7adf 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -1790,11 +1790,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
    if (OPT(nir_lower_tex, &tex_options))
       OPT(nir_lower_tex, &tex_options);
 
-   const struct brw_nir_lower_texture_opts brw_tex_options = {
-      .combined_lod_and_array_index = compiler->devinfo->ver >= 20,
-      .combined_lod_or_bias_and_offset = compiler->devinfo->ver >= 20,
-   };
-   OPT(brw_nir_lower_texture, &brw_tex_options);
+   OPT(brw_nir_lower_texture, devinfo);
 
    OPT(intel_nir_lower_sparse_intrinsics);
 
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 54f0a655b74..95acf75930f 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -205,12 +205,8 @@ bool brw_nir_lower_texel_address(nir_shader *shader,
                                  const struct intel_device_info *devinfo,
                                  enum isl_tiling tiling);
 
-struct brw_nir_lower_texture_opts {
-   bool combined_lod_and_array_index;
-   bool combined_lod_or_bias_and_offset;
-};
 bool brw_nir_lower_texture(nir_shader *nir,
-                           const struct brw_nir_lower_texture_opts *opts);
+                           const struct intel_device_info *devinfo);
 
 bool brw_nir_lower_sample_index_in_coord(nir_shader *nir);
 
diff --git a/src/intel/compiler/brw_nir_lower_texture.c b/src/intel/compiler/brw_nir_lower_texture.c
index aa478175bcc..b2430d26a69 100644
--- a/src/intel/compiler/brw_nir_lower_texture.c
+++ b/src/intel/compiler/brw_nir_lower_texture.c
@@ -22,6 +22,7 @@
  */
 
 #include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
 #include "brw_nir.h"
 
 /**
@@ -103,8 +104,11 @@ pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex)
 static bool
 pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
 {
-   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
-   if (offset_index < 0)
+   /* If there is no backend2, it means there was no offset to pack so just
+    * bail.
+    */
+   int backend2_index = nir_tex_instr_src_index(tex, nir_tex_src_backend2);
+   if (backend2_index < 0)
       return false;
 
    /* If 32-bit texture coordinates are used, pack either the explicit LOD or
@@ -130,18 +134,13 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
       return false;
    }
 
-   nir_def *lod = tex->src[lod_index].src.ssa;
-   nir_def *offset = tex->src[offset_index].src.ssa;
-
-   b->cursor = nir_before_instr(&tex->instr);
-
    /* When using the programmable offsets instruction gather4_po_l_c with
     * SIMD16 or SIMD32 the U, V offsets are combined with LOD/bias parameters
     * on the 12 LSBs. For the offset parameters on gather instructions the 6
     * least significant bits are honored as signed value with a range
     * [-32..31].
     *
-    * Pack Offset U, and V for texture gather with offsets.
+    * Offsets should already have been packed in pack_const_offset().
     *
     *    ------------------------------------------
     *    |Bits     | [31:12]  | [11:6]  | [5:0]   |
@@ -149,57 +148,129 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
     *    |OffsetUV | LOD/Bias | OffsetV | OffsetU |
     *    ------------------------------------------
     */
-   nir_def *offu = nir_iand_imm(b, nir_channel(b, offset, 0), 0x3F);
-   nir_def *offv = nir_iand_imm(b, nir_channel(b, offset, 1), 0x3F);
+   nir_def *lod = tex->src[lod_index].src.ssa;
+   nir_def *backend2 = tex->src[backend2_index].src.ssa;
 
-   nir_def *offsetUV = nir_ior(b, offu, nir_ishl_imm(b, offv, 6));
+   b->cursor = nir_before_instr(&tex->instr);
 
-   nir_def *lod_offsetUV = nir_ior(b, offsetUV,
+   nir_def *lod_offsetUV = nir_ior(b, backend2,
                                    nir_iand_imm(b, lod, 0xFFFFF000));
-   nir_tex_instr_remove_src(tex, offset_index);
-   nir_tex_instr_add_src(tex, nir_tex_src_backend2, lod_offsetUV);
+
+   nir_src_rewrite(&tex->src[backend2_index].src, lod_offsetUV);
 
    return true;
 }
 
 static bool
-brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data)
+pack_offset(nir_builder *b, nir_tex_instr *tex, bool pack_6bits_offsets)
 {
-   if (instr->type != nir_instr_type_tex)
+   /* No offset, nothing to do */
+   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
+   if (offset_index < 0)
       return false;
 
-   const struct brw_nir_lower_texture_opts *opts = cb_data;
-   nir_tex_instr *tex = nir_instr_as_tex(instr);
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_def *offset = tex->src[offset_index].src.ssa;
+
+   /* Combine all three offsets into a single unsigned dword:
+    *
+    *    bits 11:8 - U Offset (X component)
+    *    bits  7:4 - V Offset (Y component)
+    *    bits  3:0 - R Offset (Z component)
+    *
+    * Or for TG4 messages with pack_6bits_offsets=true, do the bottom packing
+    * of :
+    *
+    *    ------------------------------------------
+    *    |Bits     | [31:12]  | [11:6]  | [5:0]   |
+    *    ------------------------------------------
+    *    |OffsetUV | LOD/Bias | OffsetV | OffsetU |
+    *    ------------------------------------------
+    */
+   const unsigned num_components =
+      nir_tex_instr_src_size(tex, offset_index);
+
+   static const unsigned bits4_bits[] = { 4, 4, 4, };
+   static const unsigned bits6_bits[] = { 6, 6, 0, };
+
+   offset = nir_pad_vector_imm_int(b, offset, 0, num_components);
+   offset = nir_format_clamp_sint(
+      b, offset, pack_6bits_offsets ? bits6_bits : bits4_bits);
+
+   static const unsigned bits4_offsets[] = { 8, 4, 0, };
+   static const unsigned bits6_offsets[] = { 0, 6, 0, };
+   const unsigned *comp_bits_offsets = pack_6bits_offsets ?
+      bits6_offsets : bits4_offsets;
+   const unsigned value_mask = pack_6bits_offsets ? 0x3f : 0xf;
+
+   nir_def *packed_offset = NULL;
+   for (unsigned c = 0; c < num_components; c++) {
+      nir_def *c_shifted = nir_ishl_imm(
+         b,
+         nir_iand_imm(b, nir_channel(b, offset, c), value_mask),
+         comp_bits_offsets[c]);
+      packed_offset = packed_offset == NULL ? c_shifted : nir_ior(b, packed_offset, c_shifted);
+   }
+
+   nir_tex_instr_remove_src(tex, offset_index);
+   nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed_offset);
+
+   return true;
+}
+
+static bool
+intel_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data)
+{
+   const struct intel_device_info *devinfo = cb_data;
+
+   const bool has_lod =
+      nir_tex_instr_src_index(tex, nir_tex_src_lod) != -1 ||
+      nir_tex_instr_src_index(tex, nir_tex_src_bias) != -1;
+   /* On Gfx20+, when we have a LOD, we need to pack the offsets with it. When
+    * there is no LOD, the offsets are lowered in the coordinates (see
+    * lower_xehp_tg4_offset_filter).
+    */
+   const bool needs_tg4_load_bias_offset_packing =
+      tex->op == nir_texop_tg4 && has_lod &&
+      devinfo->ver >= 20;
+   const bool needs_tg4_offset_packing = devinfo->verx10 >= 125;
+
+   bool progress = false;
+
+   if (tex->op != nir_texop_txf &&
+       (tex->op != nir_texop_tg4 || needs_tg4_offset_packing)) {
+      progress |= pack_offset(b, tex, needs_tg4_load_bias_offset_packing);
+   }
 
    switch (tex->op) {
    case nir_texop_txl:
    case nir_texop_txb:
-   case nir_texop_tg4:
+   case nir_texop_tg4: {
       if (tex->is_array &&
           tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-          opts->combined_lod_and_array_index) {
-         return pack_lod_and_array_index(b, tex);
+          devinfo->ver >= 20) {
+         progress |= pack_lod_and_array_index(b, tex);
       }
 
-      if (tex->op == nir_texop_tg4 && opts->combined_lod_or_bias_and_offset) {
-         return pack_lod_or_bias_and_offset(b, tex);
-      }
+      if (needs_tg4_load_bias_offset_packing)
+         progress |= pack_lod_or_bias_and_offset(b, tex);
 
-      return false;
+      break;
+   }
    default:
-      /* Nothing to do */
-      return false;
+      break;
    }
 
-   return false;
+   return progress;
 }
 
 bool
 brw_nir_lower_texture(nir_shader *shader,
-                      const struct brw_nir_lower_texture_opts *opts)
+                      const struct intel_device_info *devinfo)
 {
-   return nir_shader_instructions_pass(shader,
-                                       brw_nir_lower_texture_instr,
-                                       nir_metadata_none,
-                                       (void *)opts);
+   return nir_shader_tex_pass(shader,
+                              intel_nir_lower_texture_instr,
+                              nir_metadata_none,
+                              (void *)devinfo);
 }