nir: Pack texture LOD and array index to a single 32-bit value

v2: Fix clamped_ai calculation in nir_lower_tex.c. Add nir_tex_src_combined_lod_and_array_index_intel to print_tex_instr. Suggested by Sagar. Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27305>
2025-12-21 07:10:09 +01:00 · 2022-03-29 13:26:40 -07:00 · 2022-03-29 13:26:40 -07:00 · c8ba2bc2f0
commit c8ba2bc2f0
parent 78e7f7b377
4 changed files with 107 additions and 0 deletions
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@ -3208,6 +3208,7 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
   case nir_tex_src_sampler_offset:
   case nir_tex_src_texture_handle:
   case nir_tex_src_sampler_handle:
+   case nir_tex_src_combined_lod_and_array_index_intel:
      return nir_type_uint;

   case nir_num_tex_src_types:
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -2269,6 +2269,15 @@ typedef enum nir_tex_src_type {
   /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */
   nir_tex_src_backend2,

+   /**
+    * Backend-specific parameter that combines LOD parameter and array index.
+    *
+    * If this parameter is present, then nir_tex_src_lod and nir_tex_src_bias
+    * must not be present.  Also vice versa.  Only valid if nir_tex_instr::op
+    * is nir_texop_txl or nir_texop_txb and nir_tex_instr::is_array is set.
+    */
+   nir_tex_src_combined_lod_and_array_index_intel,
+
   nir_num_tex_src_types
 } nir_tex_src_type;

@ -5923,6 +5932,12 @@ typedef struct nir_lower_tex_options {
    */
   bool lower_index_to_offset;

+   /**
+    * If true, pack either the explicit LOD or LOD bias and the array index
+    * into a single (32-bit) value when 32-bit texture coordinates are used.
+    */
+   bool pack_lod_and_array_index;
+
   /**
    * Payload data to be sent to callback / filter functions.
    */
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@ -155,6 +155,86 @@ project_src(nir_builder *b, nir_tex_instr *tex)
   return true;
 }

+/**
+ * Pack either the explicit LOD or LOD bias and the array index together.
+ */
+static bool
+pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex)
+{
+   /* If 32-bit texture coordinates are used, pack either the explicit LOD or
+    * LOD bias and the array index into a single (32-bit) value.
+    */
+   int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod);
+   if (lod_index < 0) {
+      lod_index = nir_tex_instr_src_index(tex, nir_tex_src_bias);
+
+      /* The explicit LOD or LOD bias may not be found if this lowering has
+       * already occured.  The explicit LOD may also not be found in some
+       * cases where it is zero.
+       */
+      if (lod_index < 0)
+         return false;
+   }
+
+   assert(nir_tex_instr_src_type(tex, lod_index) == nir_type_float);
+
+   /* Also do not perform this packing if the explicit LOD is zero. */
+   if (tex->op == nir_texop_txl &&
+       nir_src_is_const(tex->src[lod_index].src) &&
+       nir_src_as_float(tex->src[lod_index].src) == 0.0) {
+      return false;
+   }
+
+   const int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   assert(coord_index >= 0);
+
+   nir_def *lod = tex->src[lod_index].src.ssa;
+   nir_def *coord = tex->src[coord_index].src.ssa;
+
+   assert(nir_tex_instr_src_type(tex, coord_index) == nir_type_float);
+
+   if (coord->bit_size < 32)
+      return false;
+
+   b->cursor = nir_before_instr(&tex->instr);
+
+   /* First, combine the two values.  The packing format is a little weird.
+    * The explicit LOD / LOD bias is stored as float, as normal.  However, the
+    * array index is converted to an integer and smashed into the low 9 bits.
+    */
+   const unsigned array_index = tex->coord_components - 1;
+
+   nir_def *clamped_ai =
+      nir_umin(b,
+               nir_f2u32(b, nir_fround_even(b, nir_swizzle(b, coord,
+                                                           &array_index, 1))),
+               nir_imm_int(b, 511));
+
+   nir_def *lod_ai =
+      nir_ior(b,
+              nir_iand(b, lod, nir_imm_int(b, 0xfffffe00)),
+              clamped_ai);
+
+   /* Second, replace the coordinate with a new value that has one fewer
+    * component (i.e., drop the array index).
+    */
+   static const unsigned xyzw[] = { 0, 1, 2, 3 };
+
+   nir_def *reduced_coord =
+      nir_swizzle(b, coord, xyzw, tex->coord_components - 1);
+
+   tex->coord_components--;
+
+   /* Finally, remove the old sources and add the new. */
+   nir_src_rewrite(&tex->src[coord_index].src, reduced_coord);
+
+   nir_tex_instr_remove_src(tex, lod_index);
+   nir_tex_instr_add_src(tex, nir_tex_src_combined_lod_and_array_index_intel,
+                         lod_ai);
+
+   return true;
+}
+
 static bool
 lower_offset(nir_builder *b, nir_tex_instr *tex)
 {
@ -1575,6 +1655,13 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
         progress = true;
      }

+      if (tex->is_array &&
+          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+          (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
+          options->pack_lod_and_array_index) {
+         progress = pack_lod_and_array_index(b, tex) || progress;
+      }
+
      unsigned texture_index = tex->texture_index;
      uint32_t texture_mask = 1u << texture_index;
      int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
@ -1716,6 +1803,7 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
       * three opcodes provides one.  Provide a default LOD of 0.
       */
      if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
+          (nir_tex_instr_src_index(tex, nir_tex_src_combined_lod_and_array_index_intel) == -1) &&
          (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
           tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
         b->cursor = nir_before_instr(&tex->instr);
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@ -1751,6 +1751,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
      case nir_tex_src_lod:
         fprintf(fp, "(lod)");
         break;
+      case nir_tex_src_combined_lod_and_array_index_intel:
+         fprintf(fp, "(combined_lod_and_array_index_intel)");
+         break;
      case nir_tex_src_min_lod:
         fprintf(fp, "(min_lod)");
         break;