brw: move texture offset packing to NIR

That way we can deal with upcoming non constant values for VK_KHR_maintenance8. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33138>
2025-12-21 11:30:11 +01:00 · 2025-01-21 14:17:11 +02:00 · 2025-01-21 14:17:11 +02:00 · 4346210ae6
commit 4346210ae6
parent 67ae49dede
6 changed files with 147 additions and 96 deletions
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@ -598,6 +598,8 @@ enum tex_logical_srcs {
   TEX_LOGICAL_SRC_SAMPLER_HANDLE,
   /** Texel offset for gathers */
   TEX_LOGICAL_SRC_TG4_OFFSET,
   /** Texture offset */
   TEX_LOGICAL_SRC_PACKED_OFFSET,
   /** REQUIRED: Number of coordinate components (as UD immediate) */
   TEX_LOGICAL_SRC_COORD_COMPONENTS,
   /** REQUIRED: Number of derivative components (as UD immediate) */
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@ -85,38 +85,6 @@ static void brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
 static void brw_combine_with_vec(const brw_builder &bld, const brw_reg &dst,
                                 const brw_reg &src, unsigned n);
 static bool
 brw_texture_offset(const nir_tex_instr *tex, unsigned src,
                   uint32_t *offset_bits_out)
 {
   if (!nir_src_is_const(tex->src[src].src))
      return false;
   const unsigned num_components = nir_tex_instr_src_size(tex, src);
   /* Combine all three offsets into a single unsigned dword:
    *
    *    bits 11:8 - U Offset (X component)
    *    bits  7:4 - V Offset (Y component)
    *    bits  3:0 - R Offset (Z component)
    */
   uint32_t offset_bits = 0;
   for (unsigned i = 0; i < num_components; i++) {
      int offset = nir_src_comp_as_int(tex->src[src].src, i);
      /* offset out of bounds; caller will handle it. */
      if (offset > 7 || offset < -8)
         return false;
      const unsigned shift = 4 * (2 - i);
      offset_bits |= (offset & 0xF) << shift;
   }
   *offset_bits_out = offset_bits;
   return true;
 }
 static brw_reg
 setup_imm_b(const brw_builder &bld, int8_t v)
 {
@ -7332,21 +7300,15 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
         srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD);
         break;
-      case nir_tex_src_offset: {
+      case nir_tex_src_offset:
-         uint32_t offset_bits = 0;
+         /* On gfx12.5+, if the offsets are not both constant and in the
-         if (brw_texture_offset(instr, i, &offset_bits)) {
+          * {-8,7} range, nir_lower_tex() will have already lowered the
-            header_bits |= offset_bits;
+          * source offset. So we should never reach this point.
-         } else {
+          */
-            /* On gfx12.5+, if the offsets are not both constant and in the
+         assert(devinfo->verx10 < 125);
-             * {-8,7} range, nir_lower_tex() will have already lowered the
+         srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
-             * source offset. So we should never reach this point.
+            retype(src, BRW_TYPE_D);
             */
            assert(devinfo->verx10 < 125);
            srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
               retype(src, BRW_TYPE_D);
         }
         break;
      }
      case nir_tex_src_projector:
         unreachable("should be lowered");
@ -7390,10 +7352,20 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
       * into a single (32-bit) value.
       */
      case nir_tex_src_backend2:
-         assert(instr->op == nir_texop_tg4);
+         /* For TG4, if there is a LOD, it would have been packed together
-         pack_lod_bias_and_offset = true;
+          * with offsets, just put everything into SRC_LOD.
-         srcs[TEX_LOGICAL_SRC_LOD] =
+          *
-            retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F);
+          * Otherwise this is a packed offset.
          */
         if (instr->op == nir_texop_tg4 &&
             (nir_tex_instr_src_index(instr, nir_tex_src_lod) != -1 ||
              nir_tex_instr_src_index(instr, nir_tex_src_bias) != -1)) {
            pack_lod_bias_and_offset = true;
            srcs[TEX_LOGICAL_SRC_LOD] =
               retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F);
         } else {
            srcs[TEX_LOGICAL_SRC_PACKED_OFFSET] = bld.emit_uniformize(src);
         }
         break;
      /* If this parameter is present, we are packing either the explicit LOD
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@ -757,6 +757,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
                           const brw_reg &surface_handle,
                           const brw_reg &sampler_handle,
                           const brw_reg &tg4_offset,
                           const brw_reg &packed_offset,
                           unsigned payload_type_bit_size,
                           unsigned coord_components,
                           unsigned grad_components,
@ -784,6 +785,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
   assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));
   if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 ||
       packed_offset.file != BAD_FILE ||
       sampler_handle.file != BAD_FILE ||
       is_high_sampler(devinfo, sampler) ||
       residency) {
@ -825,8 +827,17 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
         ubld.MOV(header, brw_imm_ud(0));
      else
         ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
-      if (inst->offset) {
+      if (packed_offset.file != BAD_FILE || inst->offset) {
-         ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
+         if (inst->offset && packed_offset.file != BAD_FILE) {
            if (packed_offset.file == IMM)
               ubld1.MOV(component(header, 2), brw_imm_ud(packed_offset.ud | inst->offset));
            else
               ubld1.OR(component(header, 2), packed_offset, brw_imm_ud(inst->offset));
         } else if (packed_offset.file != BAD_FILE) {
            ubld1.MOV(component(header, 2), packed_offset);
         } else {
            ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
         }
      } else if (devinfo->ver < 11 &&
                 bld.shader->stage != MESA_SHADER_VERTEX &&
                 bld.shader->stage != MESA_SHADER_FRAGMENT) {
@ -1286,6 +1297,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
   const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
   const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
   const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
   const brw_reg packed_offset = inst->src[TEX_LOGICAL_SRC_PACKED_OFFSET];
   assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
   const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
   assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
@ -1296,6 +1308,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
   const unsigned msg_payload_type_bit_size =
      get_sampler_msg_payload_type_bit_size(devinfo, inst);
   assert(tg4_offset.file == BAD_FILE || packed_offset.file == BAD_FILE);
   /* 16-bit payloads are available only on gfx11+ */
   assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);
@ -1304,7 +1318,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
                              sample_index,
                              mcs, surface, sampler,
                              surface_handle, sampler_handle,
-                              tg4_offset,
+                              tg4_offset, packed_offset,
                              msg_payload_type_bit_size,
                              coord_components, grad_components,
                              residency);
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@ -1790,11 +1790,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
   if (OPT(nir_lower_tex, &tex_options))
      OPT(nir_lower_tex, &tex_options);
-   const struct brw_nir_lower_texture_opts brw_tex_options = {
+   OPT(brw_nir_lower_texture, devinfo);
      .combined_lod_and_array_index = compiler->devinfo->ver >= 20,
      .combined_lod_or_bias_and_offset = compiler->devinfo->ver >= 20,
   };
   OPT(brw_nir_lower_texture, &brw_tex_options);
   OPT(intel_nir_lower_sparse_intrinsics);
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@ -205,12 +205,8 @@ bool brw_nir_lower_texel_address(nir_shader *shader,
                                 const struct intel_device_info *devinfo,
                                 enum isl_tiling tiling);
 struct brw_nir_lower_texture_opts {
   bool combined_lod_and_array_index;
   bool combined_lod_or_bias_and_offset;
 };
 bool brw_nir_lower_texture(nir_shader *nir,
-                           const struct brw_nir_lower_texture_opts *opts);
+                           const struct intel_device_info *devinfo);
 bool brw_nir_lower_sample_index_in_coord(nir_shader *nir);
--- a/src/intel/compiler/brw_nir_lower_texture.c
+++ b/src/intel/compiler/brw_nir_lower_texture.c
@ -22,6 +22,7 @@
 */
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_format_convert.h"
 #include "brw_nir.h"
 /**
@ -103,8 +104,11 @@ pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex)
 static bool
 pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
 {
-   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
+   /* If there is no backend2, it means there was no offset to pack so just
-   if (offset_index < 0)
+    * bail.
    */
   int backend2_index = nir_tex_instr_src_index(tex, nir_tex_src_backend2);
   if (backend2_index < 0)
      return false;
   /* If 32-bit texture coordinates are used, pack either the explicit LOD or
@ -130,18 +134,13 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
      return false;
   }
   nir_def *lod = tex->src[lod_index].src.ssa;
   nir_def *offset = tex->src[offset_index].src.ssa;
   b->cursor = nir_before_instr(&tex->instr);
   /* When using the programmable offsets instruction gather4_po_l_c with
    * SIMD16 or SIMD32 the U, V offsets are combined with LOD/bias parameters
    * on the 12 LSBs. For the offset parameters on gather instructions the 6
    * least significant bits are honored as signed value with a range
    * [-32..31].
    *
-    * Pack Offset U, and V for texture gather with offsets.
+    * Offsets should already have been packed in pack_const_offset().
    *
    *    ------------------------------------------
    *    |Bits     | [31:12]  | [11:6]  | [5:0]   |
@ -149,57 +148,129 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
    *    |OffsetUV | LOD/Bias | OffsetV | OffsetU |
    *    ------------------------------------------
    */
-   nir_def *offu = nir_iand_imm(b, nir_channel(b, offset, 0), 0x3F);
+   nir_def *lod = tex->src[lod_index].src.ssa;
-   nir_def *offv = nir_iand_imm(b, nir_channel(b, offset, 1), 0x3F);
+   nir_def *backend2 = tex->src[backend2_index].src.ssa;
-   nir_def *offsetUV = nir_ior(b, offu, nir_ishl_imm(b, offv, 6));
+   b->cursor = nir_before_instr(&tex->instr);
-   nir_def *lod_offsetUV = nir_ior(b, offsetUV,
+   nir_def *lod_offsetUV = nir_ior(b, backend2,
                                   nir_iand_imm(b, lod, 0xFFFFF000));
-   nir_tex_instr_remove_src(tex, offset_index);
+
-   nir_tex_instr_add_src(tex, nir_tex_src_backend2, lod_offsetUV);
+   nir_src_rewrite(&tex->src[backend2_index].src, lod_offsetUV);
   return true;
 }
 static bool
-brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data)
+pack_offset(nir_builder *b, nir_tex_instr *tex, bool pack_6bits_offsets)
 {
-   if (instr->type != nir_instr_type_tex)
+   /* No offset, nothing to do */
   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
   if (offset_index < 0)
      return false;
-   const struct brw_nir_lower_texture_opts *opts = cb_data;
+   b->cursor = nir_before_instr(&tex->instr);
-   nir_tex_instr *tex = nir_instr_as_tex(instr);
+
   nir_def *offset = tex->src[offset_index].src.ssa;
   /* Combine all three offsets into a single unsigned dword:
    *
    *    bits 11:8 - U Offset (X component)
    *    bits  7:4 - V Offset (Y component)
    *    bits  3:0 - R Offset (Z component)
    *
    * Or for TG4 messages with pack_6bits_offsets=true, do the bottom packing
    * of :
    *
    *    ------------------------------------------
    *    |Bits     | [31:12]  | [11:6]  | [5:0]   |
    *    ------------------------------------------
    *    |OffsetUV | LOD/Bias | OffsetV | OffsetU |
    *    ------------------------------------------
    */
   const unsigned num_components =
      nir_tex_instr_src_size(tex, offset_index);
   static const unsigned bits4_bits[] = { 4, 4, 4, };
   static const unsigned bits6_bits[] = { 6, 6, 0, };
   offset = nir_pad_vector_imm_int(b, offset, 0, num_components);
   offset = nir_format_clamp_sint(
      b, offset, pack_6bits_offsets ? bits6_bits : bits4_bits);
   static const unsigned bits4_offsets[] = { 8, 4, 0, };
   static const unsigned bits6_offsets[] = { 0, 6, 0, };
   const unsigned *comp_bits_offsets = pack_6bits_offsets ?
      bits6_offsets : bits4_offsets;
   const unsigned value_mask = pack_6bits_offsets ? 0x3f : 0xf;
   nir_def *packed_offset = NULL;
   for (unsigned c = 0; c < num_components; c++) {
      nir_def *c_shifted = nir_ishl_imm(
         b,
         nir_iand_imm(b, nir_channel(b, offset, c), value_mask),
         comp_bits_offsets[c]);
      packed_offset = packed_offset == NULL ? c_shifted : nir_ior(b, packed_offset, c_shifted);
   }
   nir_tex_instr_remove_src(tex, offset_index);
   nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed_offset);
   return true;
 }
 static bool
 intel_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data)
 {
   const struct intel_device_info *devinfo = cb_data;
   const bool has_lod =
      nir_tex_instr_src_index(tex, nir_tex_src_lod) != -1 ||
      nir_tex_instr_src_index(tex, nir_tex_src_bias) != -1;
   /* On Gfx20+, when we have a LOD, we need to pack the offsets with it. When
    * there is no LOD, the offsets are lowered in the coordinates (see
    * lower_xehp_tg4_offset_filter).
    */
   const bool needs_tg4_load_bias_offset_packing =
      tex->op == nir_texop_tg4 && has_lod &&
      devinfo->ver >= 20;
   const bool needs_tg4_offset_packing = devinfo->verx10 >= 125;
   bool progress = false;
   if (tex->op != nir_texop_txf &&
       (tex->op != nir_texop_tg4 || needs_tg4_offset_packing)) {
      progress |= pack_offset(b, tex, needs_tg4_load_bias_offset_packing);
   }
   switch (tex->op) {
   case nir_texop_txl:
   case nir_texop_txb:
-   case nir_texop_tg4:
+   case nir_texop_tg4: {
      if (tex->is_array &&
          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-          opts->combined_lod_and_array_index) {
+          devinfo->ver >= 20) {
-         return pack_lod_and_array_index(b, tex);
+         progress |= pack_lod_and_array_index(b, tex);
      }
-      if (tex->op == nir_texop_tg4 && opts->combined_lod_or_bias_and_offset) {
+      if (needs_tg4_load_bias_offset_packing)
-         return pack_lod_or_bias_and_offset(b, tex);
+         progress |= pack_lod_or_bias_and_offset(b, tex);
      }
-      return false;
+      break;
   }
   default:
-      /* Nothing to do */
+      break;
      return false;
   }
-   return false;
+   return progress;
 }
 bool
 brw_nir_lower_texture(nir_shader *shader,
-                      const struct brw_nir_lower_texture_opts *opts)
+                      const struct intel_device_info *devinfo)
 {
-   return nir_shader_instructions_pass(shader,
+   return nir_shader_tex_pass(shader,
-                                       brw_nir_lower_texture_instr,
+                              intel_nir_lower_texture_instr,
-                                       nir_metadata_none,
+                              nir_metadata_none,
-                                       (void *)opts);
+                              (void *)devinfo);
 }