intel/compiler: Adjust sample_b parameter according to new layout

On Xe2+, we need to pack LOD with array index for cube array surfaces, with that mlod parameter gets adjusted to different indices based on the layout. So track if we are packing LOD with array index in fs_inst and propogate that to sampler lowering code to adjust param location. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27447>
2026-05-22 19:38:10 +02:00 · 2024-02-14 11:12:12 -08:00 · 2024-02-14 11:12:12 -08:00 · b34b2bdff3
commit b34b2bdff3
parent d4ce848476
3 changed files with 30 additions and 5 deletions
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@ -8087,6 +8087,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb,

   ASSERTED bool got_lod = false;
   ASSERTED bool got_bias = false;
+   bool pack_lod_and_array_index = false;
   uint32_t header_bits = 0;
   for (unsigned i = 0; i < instr->num_srcs; i++) {
      nir_src nir_src = instr->src[i].src;
@ -8224,7 +8225,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb,
      case nir_tex_src_backend1:
         assert(!got_lod && !got_bias);
         got_lod = true;
-
+         pack_lod_and_array_index = true;
         assert(instr->op == nir_texop_txl || instr->op == nir_texop_txb);
         srcs[TEX_LOGICAL_SRC_LOD] =
            retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_REGISTER_TYPE_F);
@ -8350,6 +8351,8 @@ fs_nir_emit_texture(nir_to_brw_state &ntb,
   fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
   inst->offset = header_bits;

+   inst->has_packed_lod_ai_src = pack_lod_and_array_index;
+
   const unsigned dest_size = nir_tex_instr_dest_size(instr);
   if (devinfo->ver >= 9 &&
       instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) {
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@ -429,6 +429,9 @@ public:
   bool keep_payload_trailing_zeros;

   tgl_swsb sched; /**< Scheduling info. */
+
+   /* Hint that this instruction has combined LOD/LOD bias with array index */
+   bool has_packed_lod_ai_src;
 };

 /**
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@ -1281,7 +1281,22 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,

   if (min_lod.file != BAD_FILE && !min_lod_is_first) {
      /* Account for all of the missing coordinate sources */
-      if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) {
+      if (op == FS_OPCODE_TXB && devinfo->ver >= 20 &&
+          inst->has_packed_lod_ai_src) {
+         /* Bspec 64985:
+          *
+          * For sample_b sampler message format:
+          *
+          * SIMD16H/SIMD32H
+          * Param Number   0     1  2  3  4  5
+          * Param          BIAS  U  V  R  Ai MLOD
+          *
+          * SIMD16/SIMD32
+          * Param Number   0        1  2  3  4
+          * Param          BIAS_AI  U  V  R  MLOD
+          */
+         length += 3 - coord_components;
+      } else if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) {
         /* On DG2 and newer platforms, sample_d can only be used with 1D and
          * 2D surfaces, so the maximum number of gradient components is 2.
          * In spite of this limitation, the Bspec lists a mysterious R
@ -1433,8 +1448,10 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,

 static unsigned
 get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
-                                      opcode op, const fs_reg *src)
+                                      opcode op, const fs_inst *inst)
 {
+   assert(inst);
+   const fs_reg *src = inst->src;
   unsigned src_type_size = 0;

   /* All sources need to have the same size, therefore seek the first valid
@ -1480,7 +1497,9 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
   if (op == SHADER_OPCODE_TXF_CMS_W ||
       op == SHADER_OPCODE_TXF_CMS ||
       op == SHADER_OPCODE_TXF_UMS ||
-       op == SHADER_OPCODE_TXF_MCS)
+       op == SHADER_OPCODE_TXF_MCS ||
+       (op == FS_OPCODE_TXB && !inst->has_packed_lod_ai_src &&
+        devinfo->ver >= 20))
      src_type_size = 2;

   return src_type_size * 8;
@ -1513,7 +1532,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)

   if (devinfo->ver >= 7) {
      const unsigned msg_payload_type_bit_size =
-         get_sampler_msg_payload_type_bit_size(devinfo, op, inst->src);
+         get_sampler_msg_payload_type_bit_size(devinfo, op, inst);

      /* 16-bit payloads are available only on gfx11+ */
      assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);