brw/nir: add intrinsics to read attribute payload register indirectly

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
2026-05-05 20:28:04 +02:00 · 2025-04-29 12:50:42 +03:00 · 2025-04-29 12:50:42 +03:00 · 9d342081e7
commit 9d342081e7
parent ef17fbf8e5
7 changed files with 51 additions and 3 deletions
--- a/src/compiler/nir/nir_divergence_analysis.c
+++ b/src/compiler/nir/nir_divergence_analysis.c
@ -354,6 +354,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
   case nir_intrinsic_load_fs_msaa_intel:
   case nir_intrinsic_load_constant_base_ptr:
   case nir_intrinsic_load_const_buf_base_addr_lvp:
+   case nir_intrinsic_load_max_polygon_intel:
      is_divergent = false;
      break;

@ -708,7 +709,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
   case nir_intrinsic_load_frag_size_ir3:
   case nir_intrinsic_load_frag_offset_ir3:
   case nir_intrinsic_bindless_resource_ir3:
-   case nir_intrinsic_ray_intersection_ir3: {
+   case nir_intrinsic_ray_intersection_ir3:
+   case nir_intrinsic_read_attribute_payload_intel: {
      unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
      for (unsigned i = 0; i < num_srcs; i++) {
         if (src_divergent(instr->src[i], state)) {
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@ -2303,6 +2303,15 @@ image("load_raw_intel", src_comp=[1], dest_comp=0,
      flags=[CAN_ELIMINATE])
 image("store_raw_intel", src_comp=[1, 0])

+# Maximum number of polygons processed in the fragment shader
+system_value("max_polygon_intel", 1, bit_sizes=[32])
+
+# Read the attribute thread payload at a given offset
+# src[] = { offset }
+intrinsic("read_attribute_payload_intel", dest_comp=1, bit_sizes=[32],
+          src_comp=[1],
+          flags=[CAN_ELIMINATE, CAN_REORDER])
+
 # Number of data items being operated on for a SIMD program.
 system_value("simd_width_intel", 1)

--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@ -1198,11 +1198,23 @@ brw_assign_urb_setup(brw_shader &s)
   struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data);

   int urb_start = s.payload().num_regs + prog_data->base.curb_read_length;
+   bool read_attribute_payload = false;

   /* Offset all the urb_setup[] index by the actual position of the
    * setup regs, now that the location of the constants has been chosen.
    */
   foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
+      if (inst->opcode == FS_OPCODE_READ_ATTRIBUTE_PAYLOAD) {
+         brw_reg offset = inst->src[0];
+         inst->resize_sources(3);
+         inst->opcode = SHADER_OPCODE_MOV_INDIRECT;
+         inst->src[0] = retype(brw_vec8_grf(urb_start, 0), BRW_TYPE_UD);
+         inst->src[1] = offset;
+         inst->src[2] = brw_imm_ud(REG_SIZE * 2 * 32);
+         read_attribute_payload = true;
+         continue;
+      }
+
      for (int i = 0; i < inst->sources; i++) {
         if (inst->src[i].file == ATTR) {
            /* ATTR brw_reg::nr in the FS is in units of logical scalar
@ -1359,11 +1371,18 @@ brw_assign_urb_setup(brw_shader &s)
      }
   }

+   if (read_attribute_payload) {
+      s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
+                            BRW_DEPENDENCY_VARIABLES);
+   }
+
   /* Each attribute is 4 setup channels, each of which is half a reg,
    * but they may be replicated multiple times for multipolygon
    * dispatch.
    */
-   s.first_non_payload_grf += prog_data->num_varying_inputs * 2 * s.max_polygons;
+   s.first_non_payload_grf +=
+      (read_attribute_payload ? 32 : prog_data->num_varying_inputs) *
+      2 * s.max_polygons;

   /* Unlike regular attributes, per-primitive attributes have all 4 channels
    * in the same slot, so each GRF can store two slots.
@ -1440,6 +1459,9 @@ run_fs(brw_shader &s, bool allow_spilling, bool do_rep_send)

      brw_assign_urb_setup(s);

+      s.debug_optimizer(nir, "urb_setup", 89, 0);
+
+
      brw_lower_3src_null_dest(s);
      brw_workaround_emit_dummy_mov_instruction(s);

--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@ -512,6 +512,7 @@ enum opcode {
   FS_OPCODE_INTERPOLATE_AT_SAMPLE,
   FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
   FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
+   FS_OPCODE_READ_ATTRIBUTE_PAYLOAD,

   /**
    * GLSL barrier()
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@ -4593,6 +4593,17 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
              brw_dynamic_msaa_flags(brw_wm_prog_data(s.prog_data)));
      break;

+   case nir_intrinsic_load_max_polygon_intel:
+      bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons));
+      break;
+
+   case nir_intrinsic_read_attribute_payload_intel: {
+      const brw_reg offset = retype(get_nir_src(ntb, instr->src[0], 0),
+                                    BRW_TYPE_UD);
+      bld.emit(FS_OPCODE_READ_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset);
+      break;
+   }
+
   default:
      brw_from_nir_emit_intrinsic(ntb, bld, instr);
      break;
--- a/src/intel/compiler/brw_lower_simd_width.cpp
+++ b/src/intel/compiler/brw_lower_simd_width.cpp
@ -423,7 +423,8 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
              swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :
              get_fpu_lowered_simd_width(shader, inst));
   }
-   case SHADER_OPCODE_MOV_INDIRECT: {
+   case SHADER_OPCODE_MOV_INDIRECT:
+   case FS_OPCODE_READ_ATTRIBUTE_PAYLOAD: {
      /* From IVB and HSW PRMs:
       *
       * "2.When the destination requires two registers and the sources are
--- a/src/intel/compiler/brw_print.cpp
+++ b/src/intel/compiler/brw_print.cpp
@ -245,6 +245,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
      return "interp_shared_offset";
   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
      return "interp_per_slot_offset";
+   case FS_OPCODE_READ_ATTRIBUTE_PAYLOAD:
+      return "fs_read_attribute_payload";

   case SHADER_OPCODE_BARRIER:
      return "barrier";