brw: switch to load_(pixel_coord|frag_coord_z|frag_coord_w) intrinsics

Allows us to better determine if we need Z/W payload delivery. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36392>
2025-12-28 01:40:08 +01:00 · 2025-07-25 10:01:42 +03:00 · 2025-07-25 10:01:42 +03:00 · 7e72d392d7
commit 7e72d392d7
parent b7f011e653
3 changed files with 44 additions and 46 deletions
--- a/src/intel/compiler/brw/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw/brw_compile_fs.cpp
@ -220,8 +220,8 @@ brw_emit_interpolation_setup(brw_shader &s)
      ub_cps_height = byte_offset(retype(cps_size, BRW_TYPE_UB), 1);
   }

-   s.pixel_x = bld.vgrf(BRW_TYPE_F);
-   s.pixel_y = bld.vgrf(BRW_TYPE_F);
+   s.uw_pixel_x = abld.vgrf(BRW_TYPE_UW);
+   s.uw_pixel_y = abld.vgrf(BRW_TYPE_UW);

   brw_fs_thread_payload &payload = s.fs_payload();

@ -379,9 +379,6 @@ brw_emit_interpolation_setup(brw_shader &s)
      break;
   }

-   brw_reg uw_pixel_x = abld.vgrf(BRW_TYPE_UW);
-   brw_reg uw_pixel_y = abld.vgrf(BRW_TYPE_UW);
-
   for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) {
      const brw_builder hbld = abld.group(MIN2(16, s.dispatch_width), i);
      /* According to the "PS Thread Payload for Normal Dispatch"
@ -394,8 +391,8 @@ brw_emit_interpolation_setup(brw_shader &s)
                                    brw_vec1_grf(i + 1, 0);
      const struct brw_reg gi_uw = retype(gi_reg, BRW_TYPE_UW);

-      brw_reg int_pixel_x = offset(uw_pixel_x, hbld, i);
-      brw_reg int_pixel_y = offset(uw_pixel_y, hbld, i);
+      brw_reg int_pixel_x = offset(s.uw_pixel_x, hbld, i);
+      brw_reg int_pixel_y = offset(s.uw_pixel_y, hbld, i);

      if (devinfo->verx10 >= 125) {
         /* We compute two sets of int pixel x/y: one with a 2 byte stride for
@ -408,12 +405,6 @@ brw_emit_interpolation_setup(brw_shader &s)
         const brw_reg int_pixel_x_4b = dbld.vgrf(BRW_TYPE_UW);
         const brw_reg int_pixel_y_4b = dbld.vgrf(BRW_TYPE_UW);

-         hbld.ADD(int_pixel_x,
-                  brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
-                  int_pixel_offset_x);
-         hbld.ADD(int_pixel_y,
-                  brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
-                  int_pixel_offset_y);
         dbld.ADD(int_pixel_x_4b,
                  brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
                  int_pixel_offset_x);
@ -422,27 +413,18 @@ brw_emit_interpolation_setup(brw_shader &s)
                  int_pixel_offset_y);

         if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER) {
-            brw_inst *addx = hbld.ADD(int_pixel_x, int_pixel_x,
-                                     horiz_stride(half_int_pixel_offset_x, 0));
-            brw_inst *addy = hbld.ADD(int_pixel_y, int_pixel_y,
-                                     horiz_stride(half_int_pixel_offset_y, 0));
-            if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) {
-               addx->predicate = BRW_PREDICATE_NORMAL;
-               addy->predicate = BRW_PREDICATE_NORMAL;
-            }
-            addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b,
-                            horiz_stride(half_int_pixel_offset_x, 0));
-            addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b,
-                            horiz_stride(half_int_pixel_offset_y, 0));
+            brw_inst *addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b,
+                                      horiz_stride(half_int_pixel_offset_x, 0));
+            brw_inst *addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b,
+                                      horiz_stride(half_int_pixel_offset_y, 0));
            if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) {
               addx->predicate = BRW_PREDICATE_NORMAL;
               addy->predicate = BRW_PREDICATE_NORMAL;
            }
         }

-         hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x_4b, 2));
-         hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y_4b, 2));
-
+         hbld.MOV(int_pixel_x, horiz_stride(int_pixel_x_4b, 2));
+         hbld.MOV(int_pixel_y, horiz_stride(int_pixel_y_4b, 2));
      } else {
         /* The "Register Region Restrictions" page says for BDW (and newer,
          * presumably):
@ -466,9 +448,6 @@ brw_emit_interpolation_setup(brw_shader &s)
                                      horiz_stride(half_int_pixel_offset_x, 0));
         hbld.emit(FS_OPCODE_PIXEL_Y, int_pixel_y, int_pixel_xy,
                                      horiz_stride(half_int_pixel_offset_y, 0));
-
-         hbld.MOV(offset(s.pixel_x, hbld, i), int_pixel_x);
-         hbld.MOV(offset(s.pixel_y, hbld, i), int_pixel_y);
      }
   }

@ -503,8 +482,11 @@ brw_emit_interpolation_setup(brw_shader &s)
      const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F);
      const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F);

-      abld.ADD(float_pixel_x, s.pixel_x, negate(x_start));
-      abld.ADD(float_pixel_y, s.pixel_y, negate(y_start));
+      abld.MOV(float_pixel_x, s.uw_pixel_x);
+      abld.MOV(float_pixel_y, s.uw_pixel_y);
+
+      abld.ADD(float_pixel_x, float_pixel_x, negate(x_start));
+      abld.ADD(float_pixel_y, float_pixel_y, negate(y_start));

      const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F);
      const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F);
@ -1107,12 +1089,12 @@ brw_nir_populate_wm_prog_data(nir_shader *shader,
                           prog_data->coarse_pixel_dispatch != INTEL_NEVER;

   prog_data->uses_src_w =
-      BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
+      BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W);
   prog_data->uses_src_depth =
-      BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
+      BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) &&
      prog_data->coarse_pixel_dispatch == INTEL_NEVER;
   prog_data->uses_depth_w_coefficients = prog_data->uses_pc_bary_coefficients ||
-      (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
+      (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) &&
       prog_data->coarse_pixel_dispatch != INTEL_NEVER);

   calculate_urb_setup(devinfo, key, prog_data, shader, mue_map, per_primitive_offsets);
@ -1523,6 +1505,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
   if (!key->coherent_fb_fetch)
      NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);

+   NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord);
+   NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord);
+
   /* From the SKL PRM, Volume 7, "Alpha Coverage":
    *  "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
    *   hardware, regardless of the state setting for this feature."
--- a/src/intel/compiler/brw/brw_from_nir.cpp
+++ b/src/intel/compiler/brw/brw_from_nir.cpp
@ -2170,6 +2170,11 @@ emit_pixel_interpolater_alu_at_offset(const brw_builder &bld,
   const brw_reg off_y = bld.vgrf(BRW_TYPE_F);
   bld.ADD(off_y, offset(offs, bld, 1), brw_imm_f(0.5));

+   const brw_reg pixel_x = bld.vgrf(BRW_TYPE_F);
+   bld.MOV(pixel_x, shader->uw_pixel_x);
+   const brw_reg pixel_y = bld.vgrf(BRW_TYPE_F);
+   bld.MOV(pixel_y, shader->uw_pixel_y);
+
   /* Process no more than two polygons at a time to avoid hitting
    * regioning restrictions.
    */
@ -2212,11 +2217,11 @@ emit_pixel_interpolater_alu_at_offset(const brw_builder &bld,

      /* Compute X/Y coordinate deltas relative to the origin of the polygon. */
      const brw_reg delta_x = ibld.vgrf(BRW_TYPE_F);
-      ibld.ADD(delta_x, offset(shader->pixel_x, ibld, i), negate(start_x));
+      ibld.ADD(delta_x, offset(pixel_x, ibld, i), negate(start_x));
      ibld.ADD(delta_x, delta_x, offset(off_x, ibld, i));

      const brw_reg delta_y = ibld.vgrf(BRW_TYPE_F);
-      ibld.ADD(delta_y, offset(shader->pixel_y, ibld, i), negate(start_y));
+      ibld.ADD(delta_y, offset(pixel_y, ibld, i), negate(start_y));
      ibld.ADD(delta_y, delta_y, offset(off_y, ibld, i));

      /* Evaluate the plane equations obtained above for the
@ -4101,6 +4106,20 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
      dest = get_nir_def(ntb, instr->def);

   switch (instr->intrinsic) {
+   case nir_intrinsic_load_pixel_coord: {
+      brw_reg comps[2] = { s.uw_pixel_x, s.uw_pixel_y };
+      bld.VEC(retype(dest, BRW_TYPE_UW), comps, 2);
+      break;
+   }
+
+   case nir_intrinsic_load_frag_coord_z:
+      bld.MOV(dest, s.pixel_z);
+      break;
+
+   case nir_intrinsic_load_frag_coord_w:
+      bld.MOV(dest, s.wpos_w);
+      break;
+
   case nir_intrinsic_load_front_face:
      bld.MOV(retype(dest, BRW_TYPE_D), emit_frontfacing_interpolation(ntb));
      break;
@ -4443,12 +4462,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
      break;
   }

-   case nir_intrinsic_load_frag_coord: {
-      brw_reg comps[4] = { s.pixel_x, s.pixel_y, s.pixel_z, s.wpos_w };
-      bld.VEC(dest, comps, 4);
-      break;
-   }
-
   case nir_intrinsic_load_interpolated_input: {
      assert(nir_src_is_intrinsic(instr->src[0]));
      nir_intrinsic_instr *bary_intrinsic = nir_src_as_intrinsic(instr->src[0]);
--- a/src/intel/compiler/brw/brw_shader.h
+++ b/src/intel/compiler/brw/brw_shader.h
@ -191,8 +191,8 @@ public:

   bool source_depth_to_render_target;

-   brw_reg pixel_x;
-   brw_reg pixel_y;
+   brw_reg uw_pixel_x;
+   brw_reg uw_pixel_y;
   brw_reg pixel_z;
   brw_reg wpos_w;
   brw_reg pixel_w;