brw: Add brw_tex_inst

Incorporate some "control sources" directly into the instruction. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
2025-12-28 19:00:13 +01:00 · 2025-08-21 00:02:14 -07:00 · 2025-08-21 00:02:14 -07:00 · f0f1e63f99
commit f0f1e63f99
parent 0fcce2722f
11 changed files with 188 additions and 175 deletions
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@ -630,12 +630,6 @@ enum tex_logical_srcs {
   TEX_LOGICAL_SRC_SAMPLER_HANDLE,
   /** Texel offset for gathers */
   TEX_LOGICAL_SRC_TG4_OFFSET,
-   /** REQUIRED: Number of coordinate components (as UD immediate) */
-   TEX_LOGICAL_SRC_COORD_COMPONENTS,
-   /** REQUIRED: Number of derivative components (as UD immediate) */
-   TEX_LOGICAL_SRC_GRAD_COMPONENTS,
-   /** REQUIRED: request residency (as UD immediate) */
-   TEX_LOGICAL_SRC_RESIDENCY,

   TEX_LOGICAL_NUM_SRCS,
 };
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@ -3632,17 +3632,15 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon
   srcs[TEX_LOGICAL_SRC_SURFACE] = texture;
   srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
   srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle;
-   srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components);
-   srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
-   srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0);

-   brw_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
-                            ARRAY_SIZE(srcs));
+   brw_tex_inst *tex = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
+                                ARRAY_SIZE(srcs))->as_tex();
+   tex->coord_components = components;

   /* We only care about one or two regs of response, but the sampler always
    * writes 4/8.
    */
-   inst->size_written = 4 * dest.component_size(inst->exec_size);
+   tex->size_written = 4 * dest.component_size(tex->exec_size);

   return dest;
 }
@ -3713,14 +3711,12 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const b
   srcs[TEX_LOGICAL_SRC_MCS]              = mcs;
   srcs[TEX_LOGICAL_SRC_SURFACE]          = brw_imm_ud(target);
   srcs[TEX_LOGICAL_SRC_SAMPLER]          = brw_imm_ud(0);
-   srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(3);
-   srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS]  = brw_imm_ud(0);
-   srcs[TEX_LOGICAL_SRC_RESIDENCY]        = brw_imm_ud(0);

-   brw_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
-   inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
+   brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
+   tex->size_written = 4 * tex->dst.component_size(tex->exec_size);
+   tex->coord_components = 3;

-   return inst;
+   return tex;
 }

 /**
@ -6021,9 +6017,6 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
      else
         srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = image;
      srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0);
-      srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0);
-      srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
-      srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0);

      /* Since the image size is always uniform, we can just emit a SIMD8
       * query instruction and splat the result out.
@ -6031,8 +6024,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
      const brw_builder ubld = bld.scalar_group();

      brw_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4);
-      brw_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
-                                tmp, srcs, ARRAY_SIZE(srcs));
+      brw_tex_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
+                                     tmp, srcs, ARRAY_SIZE(srcs))->as_tex();
      inst->size_written = 4 * REG_SIZE * reg_unit(devinfo);

      for (unsigned c = 0; c < instr->def.num_components; ++c) {
@ -7430,8 +7423,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
    */
   assert(!instr->is_sparse || srcs[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);

-   srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(instr->is_sparse);
-
   int lod_components = 0;

   /* The hardware requires a LOD for buffer textures */
@ -7612,9 +7603,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
                        srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]);
   }

-   srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
-   srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
-
   enum opcode opcode;
   switch (instr->op) {
   case nir_texop_tex:
@ -7741,9 +7729,12 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
      brw_allocate_vgrf_units(*bld.shader, total_regs * reg_unit(devinfo)),
      dst_type);

-   brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
-   inst->offset = header_bits;
-   inst->size_written = total_regs * grf_size;
+   brw_tex_inst *tex = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
+   tex->offset = header_bits;
+   tex->size_written = total_regs * grf_size;
+   tex->residency = instr->is_sparse;
+   tex->coord_components = instr->coord_components;
+   tex->grad_components = lod_components;

   /* Wa_14012688258:
    *
@ -7758,7 +7749,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
         assert(instr->coord_components >= 3u);

      /* See opt_zero_samples(). */
-      inst->keep_payload_trailing_zeros = true;
+      tex->keep_payload_trailing_zeros = true;
   }

   /* With half-floats returns, the stride into a GRF allocation for each
@ -7781,7 +7772,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
   if (instr->op != nir_texop_query_levels && !instr->is_sparse &&
       !non_aligned_component_stride) {
      /* In most cases we can write directly to the result. */
-      inst->dst = nir_def_reg;
+      tex->dst = nir_def_reg;
   } else {
      /* In other cases, we have to reorganize the sampler message's results
       * a bit to match the NIR intrinsic's expectations.
--- a/src/intel/compiler/brw_inst.cpp
+++ b/src/intel/compiler/brw_inst.cpp
@ -14,10 +14,13 @@
 static inline unsigned
 brw_inst_kind_size(brw_inst_kind kind)
 {
+   STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_tex_inst));
+
   /* TODO: Temporarily here to ensure all instructions can be converted to
    * SEND.  Once all new kinds are added, change so that BASE allocate only
    * sizeof(brw_inst).
    */
+
   return sizeof(brw_send_inst);
 }

@ -154,6 +157,27 @@ brw_inst_kind_for_opcode(enum opcode opcode)
   case SHADER_OPCODE_INTERLOCK:
      return BRW_KIND_SEND;

+   case SHADER_OPCODE_TEX_LOGICAL:
+   case SHADER_OPCODE_TXD_LOGICAL:
+   case SHADER_OPCODE_TXF_LOGICAL:
+   case SHADER_OPCODE_TXL_LOGICAL:
+   case SHADER_OPCODE_TXS_LOGICAL:
+   case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
+   case FS_OPCODE_TXB_LOGICAL:
+   case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+   case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
+   case SHADER_OPCODE_TXF_MCS_LOGICAL:
+   case SHADER_OPCODE_LOD_LOGICAL:
+   case SHADER_OPCODE_TG4_LOGICAL:
+   case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+   case SHADER_OPCODE_TG4_BIAS_LOGICAL:
+   case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
+   case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
+   case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
+   case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
+   case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
+      return BRW_KIND_TEX;
+
   default:
      return BRW_KIND_BASE;
   }
@ -436,17 +460,15 @@ brw_inst::components_read(unsigned i) const
   case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
   case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
   case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
-   case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
-      assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM &&
-             src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM &&
-             src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
+   case SHADER_OPCODE_SAMPLEINFO_LOGICAL: {
+      const brw_tex_inst *tex = as_tex();
      /* Texture coordinates. */
      if (i == TEX_LOGICAL_SRC_COORDINATE)
-         return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
+         return tex->coord_components;
      /* Texture derivatives. */
      else if ((i == TEX_LOGICAL_SRC_LOD || i == TEX_LOGICAL_SRC_LOD2) &&
               opcode == SHADER_OPCODE_TXD_LOGICAL)
-         return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
+         return tex->grad_components;
      /* Texture offset. */
      else if (i == TEX_LOGICAL_SRC_TG4_OFFSET)
         return 2;
@ -460,6 +482,7 @@ brw_inst::components_read(unsigned i) const
            return 1;
      } else
         return 1;
+   }

   case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
      if (i == MEMORY_LOGICAL_DATA0)
@ -663,27 +686,8 @@ brw_inst::flags_written(const intel_device_info *devinfo) const
 bool
 brw_inst::has_sampler_residency() const
 {
-   switch (opcode) {
-   case SHADER_OPCODE_TEX_LOGICAL:
-   case FS_OPCODE_TXB_LOGICAL:
-   case SHADER_OPCODE_TXL_LOGICAL:
-   case SHADER_OPCODE_TXD_LOGICAL:
-   case SHADER_OPCODE_TXF_LOGICAL:
-   case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
-   case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
-   case SHADER_OPCODE_TXS_LOGICAL:
-   case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
-   case SHADER_OPCODE_TG4_LOGICAL:
-   case SHADER_OPCODE_TG4_BIAS_LOGICAL:
-   case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
-   case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
-   case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
-   case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
-      assert(src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
-      return src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
-   default:
-      return false;
-   }
+   const brw_tex_inst *tex = as_tex();
+   return tex && tex->residency;
 }

 /* \sa inst_is_raw_move in brw_eu_validate. */
--- a/src/intel/compiler/brw_inst.h
+++ b/src/intel/compiler/brw_inst.h
@ -42,6 +42,7 @@ struct brw_shader;
 enum ENUM_PACKED brw_inst_kind {
   BRW_KIND_BASE,
   BRW_KIND_SEND,
+   BRW_KIND_TEX,
 };

 brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
@ -68,6 +69,7 @@ struct brw_inst : brw_exec_node {
   }

   KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
+   KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX);

 #undef KIND_HELPERS

@ -270,6 +272,12 @@ struct brw_send_inst : brw_inst {
   };
 };

+struct brw_tex_inst : brw_inst {
+   uint8_t coord_components;
+   uint8_t grad_components;
+   bool residency;
+};
+
 /**
 * Make the execution of \p inst dependent on the evaluation of a possibly
 * inverted predicate.
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@ -841,38 +841,32 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
 }

 static void
-lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
+lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex)
 {
   const intel_device_info *devinfo = bld.shader->devinfo;
   const brw_compiler *compiler = bld.shader->compiler;

-   const brw_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];
-   const brw_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
-   const brw_reg lod = inst->src[TEX_LOGICAL_SRC_LOD];
-   const brw_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];
-   const brw_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD];
-   const brw_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
-   const brw_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS];
-   const brw_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
-   const brw_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
-   const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
-   const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
-   const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
-   assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
-   const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
-   assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
-   const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
-   assert(inst->src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
-   const bool residency = inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
+   const brw_reg coordinate = tex->src[TEX_LOGICAL_SRC_COORDINATE];
+   const brw_reg shadow_c = tex->src[TEX_LOGICAL_SRC_SHADOW_C];
+   const brw_reg lod = tex->src[TEX_LOGICAL_SRC_LOD];
+   const brw_reg lod2 = tex->src[TEX_LOGICAL_SRC_LOD2];
+   const brw_reg min_lod = tex->src[TEX_LOGICAL_SRC_MIN_LOD];
+   const brw_reg sample_index = tex->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
+   const brw_reg mcs = tex->src[TEX_LOGICAL_SRC_MCS];
+   const brw_reg surface = tex->src[TEX_LOGICAL_SRC_SURFACE];
+   const brw_reg sampler = tex->src[TEX_LOGICAL_SRC_SAMPLER];
+   const brw_reg surface_handle = tex->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
+   const brw_reg sampler_handle = tex->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
+   const brw_reg tg4_offset = tex->src[TEX_LOGICAL_SRC_TG4_OFFSET];

   const unsigned payload_type_bit_size =
-      get_sampler_msg_payload_type_bit_size(devinfo, inst);
+      get_sampler_msg_payload_type_bit_size(devinfo, tex);

   /* 16-bit payloads are available only on gfx11+ */
   assert(payload_type_bit_size != 16 || devinfo->ver >= 11);

   /* We never generate EOT sampler messages */
-   assert(!inst->eot);
+   assert(!tex->eot);

   const enum brw_reg_type payload_type =
      brw_type_with_size(BRW_TYPE_F, payload_type_bit_size);
@ -881,7 +875,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
   const enum brw_reg_type payload_signed_type =
      brw_type_with_size(BRW_TYPE_D, payload_type_bit_size);
   unsigned header_size = 0, length = 0;
-   opcode op = inst->opcode;
+   opcode op = tex->opcode;
   brw_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE];
   for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
      sources[i] = bld.vgrf(payload_type);
@ -890,10 +884,10 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
   assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
   assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));

-   if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 ||
+   if (shader_opcode_needs_header(op, devinfo) || tex->offset != 0 ||
       sampler_handle.file != BAD_FILE ||
       is_high_sampler(devinfo, sampler) ||
-       residency) {
+       tex->residency) {
      /* For general texture offsets (no txf workaround), we need a header to
       * put them in.
       *
@ -911,19 +905,19 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
       * writemask.  It's reversed from normal: 1 means "don't write".
       */
      unsigned comps_regs =
-         DIV_ROUND_UP(regs_written(inst) - reg_unit(devinfo) * residency,
+         DIV_ROUND_UP(regs_written(tex) - reg_unit(devinfo) * tex->residency,
                      reg_unit(devinfo));
      unsigned comp_regs =
-         DIV_ROUND_UP(inst->dst.component_size(inst->exec_size),
+         DIV_ROUND_UP(tex->dst.component_size(tex->exec_size),
                      reg_unit(devinfo) * REG_SIZE);
      if (comps_regs < 4 * comp_regs) {
         assert(comps_regs % comp_regs == 0);
         unsigned mask = ~((1 << (comps_regs / comp_regs)) - 1) & 0xf;
-         inst->offset |= mask << 12;
+         tex->offset |= mask << 12;
      }

-      if (residency)
-         inst->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */
+      if (tex->residency)
+         tex->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */

      /* Build the actual header */
      const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
@ -932,8 +926,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
         ubld.MOV(header, brw_imm_ud(0));
      else
         ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
-      if (inst->offset) {
-         ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
+      if (tex->offset) {
+         ubld1.MOV(component(header, 2), brw_imm_ud(tex->offset));
      } else if (devinfo->ver < 11 &&
                 bld.shader->stage != MESA_SHADER_VERTEX &&
                 bld.shader->stage != MESA_SHADER_FRAGMENT) {
@ -1049,13 +1043,13 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
      /* Load dPdx and the coordinate together:
       * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
       */
-      for (unsigned i = 0; i < coord_components; i++) {
+      for (unsigned i = 0; i < tex->coord_components; i++) {
         bld.MOV(sources[length++], offset(coordinate, bld, i));

         /* For cube map array, the coordinate is (u,v,r,ai) but there are
          * only derivatives for (u, v, r).
          */
-         if (i < grad_components) {
+         if (i < tex->grad_components) {
            bld.MOV(sources[length++], offset(lod, bld, i));
            bld.MOV(sources[length++], offset(lod2, bld, i));
         }
@ -1077,7 +1071,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
      sources[length] = retype(sources[length], payload_signed_type);
      bld.MOV(sources[length++], offset(coordinate, bld, 0));

-      if (coord_components >= 2) {
+      if (tex->coord_components >= 2) {
         sources[length] = retype(sources[length], payload_signed_type);
         bld.MOV(sources[length], offset(coordinate, bld, 1));
      } else {
@ -1090,7 +1084,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
         bld.MOV(sources[length++], lod);
      }

-      for (unsigned i = 2; i < coord_components; i++) {
+      for (unsigned i = 2; i < tex->coord_components; i++) {
         sources[length] = retype(sources[length], payload_signed_type);
         bld.MOV(sources[length++], offset(coordinate, bld, i));
      }
@ -1138,7 +1132,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
      /* There is no offsetting for this message; just copy in the integer
       * texture coordinates.
       */
-      for (unsigned i = 0; i < coord_components; i++) {
+      for (unsigned i = 0; i < tex->coord_components; i++) {
         sources[length] = retype(sources[length], payload_signed_type);
         bld.MOV(sources[length++], offset(coordinate, bld, i));
      }
@ -1155,7 +1149,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
         bld.MOV(sources[length++], offset(tg4_offset, bld, i));
      }

-      if (coord_components == 3) /* r if present */
+      if (tex->coord_components == 3) /* r if present */
         bld.MOV(sources[length++], offset(coordinate, bld, 2));

      coordinate_done = true;
@ -1166,7 +1160,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)

   /* Set up the coordinate (except for cases where it was done above) */
   if (!coordinate_done) {
-      for (unsigned i = 0; i < coord_components; i++)
+      for (unsigned i = 0; i < tex->coord_components; i++)
         bld.MOV(retype(sources[length++], payload_type),
                 offset(coordinate, bld, i));
   }
@ -1186,7 +1180,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
          * Param Number   0        1  2  3  4
          * Param          BIAS_AI  U  V  R  MLOD
          */
-         length += 3 - coord_components;
+         length += 3 - tex->coord_components;
      } else if (op == SHADER_OPCODE_TXD_LOGICAL && devinfo->verx10 >= 125) {
         /* On DG2 and newer platforms, sample_d can only be used with 1D and
          * 2D surfaces, so the maximum number of gradient components is 2.
@ -1196,12 +1190,12 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
          *
          * See bspec 45942, "Enable new message layout for cube array"
          */
-         length += 3 - coord_components;
-         length += (2 - grad_components) * 2;
+         length += 3 - tex->coord_components;
+         length += (2 - tex->grad_components) * 2;
      } else {
-         length += 4 - coord_components;
+         length += 4 - tex->coord_components;
         if (op == SHADER_OPCODE_TXD_LOGICAL)
-            length += (3 - grad_components) * 2;
+            length += (3 - tex->grad_components) * 2;
      }

      bld.MOV(sources[length++], min_lod);
@ -1228,24 +1222,24 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
   if (devinfo->ver < 20) {
      if (payload_type_bit_size == 16) {
         assert(devinfo->ver >= 11);
-         simd_mode = inst->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H :
+         simd_mode = tex->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H :
            GFX10_SAMPLER_SIMD_MODE_SIMD16H;
      } else {
-         simd_mode = inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
+         simd_mode = tex->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
            BRW_SAMPLER_SIMD_MODE_SIMD16;
      }
   } else {
      if (payload_type_bit_size == 16) {
-         simd_mode = inst->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16H :
+         simd_mode = tex->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16H :
            XE2_SAMPLER_SIMD_MODE_SIMD32H;
      } else {
-         simd_mode = inst->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16 :
+         simd_mode = tex->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16 :
            XE2_SAMPLER_SIMD_MODE_SIMD32;
      }
   }

-   brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
-   inst = NULL;
+   brw_send_inst *send = brw_transform_inst_to_send(bld, tex);
+   tex = NULL;

   send->mlen = mlen;
   send->header_size = header_size;
@ -2679,7 +2673,7 @@ brw_lower_logical_sends(brw_shader &s)
      case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
      case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
      case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
-         lower_sampler_logical_send(ibld, inst);
+         lower_sampler_logical_send(ibld, inst->as_tex());
         break;

      case SHADER_OPCODE_GET_BUFFER_SIZE:
--- a/src/intel/compiler/brw_lower_simd_width.cpp
+++ b/src/intel/compiler/brw_lower_simd_width.cpp
@ -160,55 +160,48 @@ get_fpu_lowered_simd_width(const brw_shader *shader,
 */
 static unsigned
 get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
-                               const brw_inst *inst)
+                               const brw_tex_inst *tex)
 {
   /* If we have a min_lod parameter on anything other than a simple sample
    * message, it will push it over 5 arguments and we have to fall back to
    * SIMD8.
    */
-   if (inst->opcode != SHADER_OPCODE_TEX_LOGICAL &&
-       inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))
+   if (tex->opcode != SHADER_OPCODE_TEX_LOGICAL &&
+       tex->components_read(TEX_LOGICAL_SRC_MIN_LOD))
      return devinfo->ver < 20 ? 8 : 16;

   /* On Gfx9+ the LOD argument is for free if we're able to use the LZ
    * variant of the TXL or TXF message.
    */
-   const bool implicit_lod = (inst->opcode == SHADER_OPCODE_TXL_LOGICAL ||
-                              inst->opcode == SHADER_OPCODE_TXF_LOGICAL) &&
-                             inst->src[TEX_LOGICAL_SRC_LOD].is_zero();
+   const bool implicit_lod = (tex->opcode == SHADER_OPCODE_TXL_LOGICAL ||
+                              tex->opcode == SHADER_OPCODE_TXF_LOGICAL) &&
+                             tex->src[TEX_LOGICAL_SRC_LOD].is_zero();

   /* Calculate the total number of argument components that need to be passed
    * to the sampler unit.
    */
-   assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
-   const unsigned grad_components =
-      inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
-   assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
-   const unsigned coord_components =
-      inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
-
   unsigned num_payload_components =
-      coord_components +
-      inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
-      (implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) +
-      inst->components_read(TEX_LOGICAL_SRC_LOD2) +
-      inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
-      (inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
-       inst->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) +
-      inst->components_read(TEX_LOGICAL_SRC_MCS) +
-      inst->components_read(TEX_LOGICAL_SRC_MIN_LOD);
+      tex->coord_components +
+      tex->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
+      (implicit_lod ? 0 : tex->components_read(TEX_LOGICAL_SRC_LOD)) +
+      tex->components_read(TEX_LOGICAL_SRC_LOD2) +
+      tex->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
+      (tex->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
+       tex->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) +
+      tex->components_read(TEX_LOGICAL_SRC_MCS) +
+      tex->components_read(TEX_LOGICAL_SRC_MIN_LOD);


-   if (inst->opcode == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20) {
-      num_payload_components += 3 - coord_components;
-   } else if (inst->opcode == SHADER_OPCODE_TXD_LOGICAL &&
+   if (tex->opcode == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20) {
+      num_payload_components += 3 - tex->coord_components;
+   } else if (tex->opcode == SHADER_OPCODE_TXD_LOGICAL &&
            devinfo->verx10 >= 125 && devinfo->ver < 20) {
      num_payload_components +=
-         3 - coord_components + (2 - grad_components) * 2;
+         3 - tex->coord_components + (2 - tex->grad_components) * 2;
   } else {
-      num_payload_components += 4 - coord_components;
-      if (inst->opcode == SHADER_OPCODE_TXD_LOGICAL)
-         num_payload_components += (3 - grad_components) * 2;
+      num_payload_components += 4 - tex->coord_components;
+      if (tex->opcode == SHADER_OPCODE_TXD_LOGICAL)
+         num_payload_components += (3 - tex->grad_components) * 2;
   }


@ -219,7 +212,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
    * maximum message size supported by the sampler, regardless of whether a
    * header is provided or not.
    */
-   return MIN2(inst->exec_size, simd_limit);
+   return MIN2(tex->exec_size, simd_limit);
 }

 static bool
@ -379,7 +372,7 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
   case FS_OPCODE_TXB_LOGICAL:
   case SHADER_OPCODE_TXF_LOGICAL:
   case SHADER_OPCODE_TXS_LOGICAL:
-      return get_sampler_lowered_simd_width(devinfo, inst);
+      return get_sampler_lowered_simd_width(devinfo, inst->as_tex());

   case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
   case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
--- a/src/intel/compiler/brw_opt_cse.cpp
+++ b/src/intel/compiler/brw_opt_cse.cpp
@ -251,6 +251,14 @@ send_inst_match(brw_send_inst *a, brw_send_inst *b)
          a->send_bits == b->send_bits;
 }

+static bool
+tex_inst_match(brw_tex_inst *a, brw_tex_inst *b)
+{
+   return a->coord_components == b->coord_components &&
+          a->grad_components == b->grad_components &&
+          a->residency == b->residency;
+}
+
 static bool
 instructions_match(brw_inst *a, brw_inst *b, bool *negate)
 {
@ -258,6 +266,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate)
   return a->opcode == b->opcode &&
          /* `kind` is derived from opcode, so skipped. */
          (a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) &&
+          (a->kind != BRW_KIND_TEX || tex_inst_match(a->as_tex(), b->as_tex())) &&
          a->exec_size == b->exec_size &&
          a->group == b->group &&
          a->predicate == b->predicate &&
@ -342,6 +351,17 @@ hash_inst(const void *v)
      break;
   }

+   case BRW_KIND_TEX: {
+      const brw_tex_inst *tex = inst->as_tex();
+      const uint8_t tex_u8data[] = {
+         tex->coord_components,
+         tex->grad_components,
+         tex->residency,
+      };
+      hash = HASH(hash, tex_u8data);
+      break;
+   }
+
   case BRW_KIND_BASE:
      /* Nothing else to do. */
      break;
--- a/src/intel/compiler/brw_opt_txf_combiner.cpp
+++ b/src/intel/compiler/brw_opt_txf_combiner.cpp
@ -97,46 +97,47 @@ brw_opt_combine_convergent_txf(brw_shader &s)
         if (inst->opcode != SHADER_OPCODE_TXF_LOGICAL)
            continue;

+         brw_tex_inst *tex = inst->as_tex();
+
         /* Only handle buffers or single miplevel 1D images for now */
-         if (inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud > 1)
+         if (tex->coord_components > 1)
            continue;

-         if (inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0)
+         if (tex->residency)
            continue;

-         if (inst->predicate || inst->force_writemask_all)
+         if (tex->predicate || tex->force_writemask_all)
            continue;

-         if (!is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_LOD]) ||
-             !is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_SURFACE]) ||
-             !is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]))
+         if (!is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_LOD]) ||
+             !is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_SURFACE]) ||
+             !is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]))
            continue;

         /* Only handle immediates for now: we could check is_uniform(),
          * but we'd need to ensure the coordinate's definition reaches
          * txfs[0] which is where we'll insert the combined coordinate.
          */
-         if (inst->src[TEX_LOGICAL_SRC_COORDINATE].file != IMM)
+         if (tex->src[TEX_LOGICAL_SRC_COORDINATE].file != IMM)
            continue;

         /* texelFetch from 1D buffers shouldn't have any of these */
-         assert(inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);
-         assert(inst->src[TEX_LOGICAL_SRC_LOD2].file == BAD_FILE);
-         assert(inst->src[TEX_LOGICAL_SRC_MIN_LOD].file == BAD_FILE);
-         assert(inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX].file == BAD_FILE);
-         assert(inst->src[TEX_LOGICAL_SRC_MCS].file == BAD_FILE);
-         assert(inst->src[TEX_LOGICAL_SRC_TG4_OFFSET].file == BAD_FILE);
-         assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM &&
-                inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud == 0);
+         assert(tex->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);
+         assert(tex->src[TEX_LOGICAL_SRC_LOD2].file == BAD_FILE);
+         assert(tex->src[TEX_LOGICAL_SRC_MIN_LOD].file == BAD_FILE);
+         assert(tex->src[TEX_LOGICAL_SRC_SAMPLE_INDEX].file == BAD_FILE);
+         assert(tex->src[TEX_LOGICAL_SRC_MCS].file == BAD_FILE);
+         assert(tex->src[TEX_LOGICAL_SRC_TG4_OFFSET].file == BAD_FILE);
+         assert(tex->grad_components == 0);

         if (count > 0 &&
-             (!sources_match(defs, inst, txfs[0], TEX_LOGICAL_SRC_LOD) ||
-              !sources_match(defs, inst, txfs[0], TEX_LOGICAL_SRC_SURFACE) ||
-              !sources_match(defs, inst, txfs[0],
+             (!sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_LOD) ||
+              !sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_SURFACE) ||
+              !sources_match(defs, tex, txfs[0],
                             TEX_LOGICAL_SRC_SURFACE_HANDLE)))
            continue;

-         txfs[count++] = inst;
+         txfs[count++] = tex;

         if (count == ARRAY_SIZE(txfs))
            break;
@ -179,9 +180,6 @@ brw_opt_combine_convergent_txf(brw_shader &s)
         srcs[TEX_LOGICAL_SRC_SAMPLER] = txfs[0]->src[TEX_LOGICAL_SRC_SAMPLER];
         srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] =
            txfs[0]->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
-         srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(1);
-         srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
-         srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);

         /* Each of our txf may have a reduced response length if some
          * components are never read.  Use the maximum of the sizes.
@ -194,9 +192,12 @@ brw_opt_combine_convergent_txf(brw_shader &s)

         /* Emit the new divergent TXF */
         brw_reg div = ubld.vgrf(BRW_TYPE_UD, new_dest_comps);
-         brw_inst *div_txf =
+         brw_tex_inst *div_txf =
            ubld.emit(SHADER_OPCODE_TXF_LOGICAL, div, srcs,
-                      TEX_LOGICAL_NUM_SRCS);
+                      TEX_LOGICAL_NUM_SRCS)->as_tex();
+         div_txf->coord_components = 1;
+         div_txf->grad_components = 0;
+         div_txf->residency = false;

         /* Update it to also use response length reduction */
         const unsigned per_component_regs =
--- a/src/intel/compiler/brw_print.cpp
+++ b/src/intel/compiler/brw_print.cpp
@ -656,6 +656,12 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
      }
   }

+   if (const brw_tex_inst *tex = inst->as_tex()) {
+      fprintf(file, ", coord_comps: %uu", tex->coord_components);
+      fprintf(file, ", grad_comps: %uu", tex->grad_components);
+      fprintf(file, ", residency: %s", tex->residency ? "true" : "false");
+   }
+
   fprintf(file, " ");

   if (inst->force_writemask_all)
--- a/src/intel/compiler/test_opt_cmod_propagation.cpp
+++ b/src/intel/compiler/test_opt_cmod_propagation.cpp
@ -212,13 +212,14 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
   brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS];
   tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
   tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
-   tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
-   tex_srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
-   tex_srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);

   bld.ADD(offset(dest, bld, 2), src0, src1);
-   bld.emit(SHADER_OPCODE_TEX_LOGICAL, dest, tex_srcs, TEX_LOGICAL_NUM_SRCS)
-      ->size_written = 4 * REG_SIZE;
+
+   brw_tex_inst *tex =
+      bld.emit(SHADER_OPCODE_TEX_LOGICAL, dest, tex_srcs, TEX_LOGICAL_NUM_SRCS)->as_tex();
+   tex->size_written = 4 * REG_SIZE;
+   tex->coord_components = 2;
+
   bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);

   EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
--- a/src/intel/compiler/test_opt_saturate_propagation.cpp
+++ b/src/intel/compiler/test_opt_saturate_propagation.cpp
@ -277,13 +277,14 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
   brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS] = {};
   tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
   tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
-   tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
-   tex_srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
-   tex_srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);

   bld.ADD(offset(dst0, bld, 2), src0, src1);
-   bld.emit(SHADER_OPCODE_TEX_LOGICAL, dst0, tex_srcs, TEX_LOGICAL_NUM_SRCS)
-      ->size_written = 8 * REG_SIZE;
+
+   brw_tex_inst *tex =
+      bld.emit(SHADER_OPCODE_TEX_LOGICAL, dst0, tex_srcs, TEX_LOGICAL_NUM_SRCS)->as_tex();
+   tex->size_written = 8 * REG_SIZE;
+   tex->coord_components = 2;
+
   bld.MOV(dst1, offset(dst0, bld, 2))->saturate = true;

   EXPECT_NO_PROGRESS(brw_opt_saturate_propagation, bld);