brw: switch to load_(pixel_coord|frag_coord_z|frag_coord_w) intrinsics
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Allows us to better determine if we need Z/W payload delivery.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36392>
This commit is contained in:
Lionel Landwerlin 2025-07-25 10:01:42 +03:00 committed by Marge Bot
parent b7f011e653
commit 7e72d392d7
3 changed files with 44 additions and 46 deletions

View file

@ -220,8 +220,8 @@ brw_emit_interpolation_setup(brw_shader &s)
ub_cps_height = byte_offset(retype(cps_size, BRW_TYPE_UB), 1);
}
s.pixel_x = bld.vgrf(BRW_TYPE_F);
s.pixel_y = bld.vgrf(BRW_TYPE_F);
s.uw_pixel_x = abld.vgrf(BRW_TYPE_UW);
s.uw_pixel_y = abld.vgrf(BRW_TYPE_UW);
brw_fs_thread_payload &payload = s.fs_payload();
@ -379,9 +379,6 @@ brw_emit_interpolation_setup(brw_shader &s)
break;
}
brw_reg uw_pixel_x = abld.vgrf(BRW_TYPE_UW);
brw_reg uw_pixel_y = abld.vgrf(BRW_TYPE_UW);
for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) {
const brw_builder hbld = abld.group(MIN2(16, s.dispatch_width), i);
/* According to the "PS Thread Payload for Normal Dispatch"
@ -394,8 +391,8 @@ brw_emit_interpolation_setup(brw_shader &s)
brw_vec1_grf(i + 1, 0);
const struct brw_reg gi_uw = retype(gi_reg, BRW_TYPE_UW);
brw_reg int_pixel_x = offset(uw_pixel_x, hbld, i);
brw_reg int_pixel_y = offset(uw_pixel_y, hbld, i);
brw_reg int_pixel_x = offset(s.uw_pixel_x, hbld, i);
brw_reg int_pixel_y = offset(s.uw_pixel_y, hbld, i);
if (devinfo->verx10 >= 125) {
/* We compute two sets of int pixel x/y: one with a 2 byte stride for
@ -408,12 +405,6 @@ brw_emit_interpolation_setup(brw_shader &s)
const brw_reg int_pixel_x_4b = dbld.vgrf(BRW_TYPE_UW);
const brw_reg int_pixel_y_4b = dbld.vgrf(BRW_TYPE_UW);
hbld.ADD(int_pixel_x,
brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
int_pixel_offset_x);
hbld.ADD(int_pixel_y,
brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
int_pixel_offset_y);
dbld.ADD(int_pixel_x_4b,
brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
int_pixel_offset_x);
@ -422,27 +413,18 @@ brw_emit_interpolation_setup(brw_shader &s)
int_pixel_offset_y);
if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER) {
brw_inst *addx = hbld.ADD(int_pixel_x, int_pixel_x,
horiz_stride(half_int_pixel_offset_x, 0));
brw_inst *addy = hbld.ADD(int_pixel_y, int_pixel_y,
horiz_stride(half_int_pixel_offset_y, 0));
if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) {
addx->predicate = BRW_PREDICATE_NORMAL;
addy->predicate = BRW_PREDICATE_NORMAL;
}
addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b,
horiz_stride(half_int_pixel_offset_x, 0));
addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b,
horiz_stride(half_int_pixel_offset_y, 0));
brw_inst *addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b,
horiz_stride(half_int_pixel_offset_x, 0));
brw_inst *addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b,
horiz_stride(half_int_pixel_offset_y, 0));
if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) {
addx->predicate = BRW_PREDICATE_NORMAL;
addy->predicate = BRW_PREDICATE_NORMAL;
}
}
hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x_4b, 2));
hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y_4b, 2));
hbld.MOV(int_pixel_x, horiz_stride(int_pixel_x_4b, 2));
hbld.MOV(int_pixel_y, horiz_stride(int_pixel_y_4b, 2));
} else {
/* The "Register Region Restrictions" page says for BDW (and newer,
* presumably):
@ -466,9 +448,6 @@ brw_emit_interpolation_setup(brw_shader &s)
horiz_stride(half_int_pixel_offset_x, 0));
hbld.emit(FS_OPCODE_PIXEL_Y, int_pixel_y, int_pixel_xy,
horiz_stride(half_int_pixel_offset_y, 0));
hbld.MOV(offset(s.pixel_x, hbld, i), int_pixel_x);
hbld.MOV(offset(s.pixel_y, hbld, i), int_pixel_y);
}
}
@ -503,8 +482,11 @@ brw_emit_interpolation_setup(brw_shader &s)
const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F);
const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F);
abld.ADD(float_pixel_x, s.pixel_x, negate(x_start));
abld.ADD(float_pixel_y, s.pixel_y, negate(y_start));
abld.MOV(float_pixel_x, s.uw_pixel_x);
abld.MOV(float_pixel_y, s.uw_pixel_y);
abld.ADD(float_pixel_x, float_pixel_x, negate(x_start));
abld.ADD(float_pixel_y, float_pixel_y, negate(y_start));
const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F);
const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F);
@ -1107,12 +1089,12 @@ brw_nir_populate_wm_prog_data(nir_shader *shader,
prog_data->coarse_pixel_dispatch != INTEL_NEVER;
prog_data->uses_src_w =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W);
prog_data->uses_src_depth =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) &&
prog_data->coarse_pixel_dispatch == INTEL_NEVER;
prog_data->uses_depth_w_coefficients = prog_data->uses_pc_bary_coefficients ||
(BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
(BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) &&
prog_data->coarse_pixel_dispatch != INTEL_NEVER);
calculate_urb_setup(devinfo, key, prog_data, shader, mue_map, per_primitive_offsets);
@ -1523,6 +1505,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (!key->coherent_fb_fetch)
NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord);
NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord);
/* From the SKL PRM, Volume 7, "Alpha Coverage":
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
* hardware, regardless of the state setting for this feature."

View file

@ -2170,6 +2170,11 @@ emit_pixel_interpolater_alu_at_offset(const brw_builder &bld,
const brw_reg off_y = bld.vgrf(BRW_TYPE_F);
bld.ADD(off_y, offset(offs, bld, 1), brw_imm_f(0.5));
const brw_reg pixel_x = bld.vgrf(BRW_TYPE_F);
bld.MOV(pixel_x, shader->uw_pixel_x);
const brw_reg pixel_y = bld.vgrf(BRW_TYPE_F);
bld.MOV(pixel_y, shader->uw_pixel_y);
/* Process no more than two polygons at a time to avoid hitting
* regioning restrictions.
*/
@ -2212,11 +2217,11 @@ emit_pixel_interpolater_alu_at_offset(const brw_builder &bld,
/* Compute X/Y coordinate deltas relative to the origin of the polygon. */
const brw_reg delta_x = ibld.vgrf(BRW_TYPE_F);
ibld.ADD(delta_x, offset(shader->pixel_x, ibld, i), negate(start_x));
ibld.ADD(delta_x, offset(pixel_x, ibld, i), negate(start_x));
ibld.ADD(delta_x, delta_x, offset(off_x, ibld, i));
const brw_reg delta_y = ibld.vgrf(BRW_TYPE_F);
ibld.ADD(delta_y, offset(shader->pixel_y, ibld, i), negate(start_y));
ibld.ADD(delta_y, offset(pixel_y, ibld, i), negate(start_y));
ibld.ADD(delta_y, delta_y, offset(off_y, ibld, i));
/* Evaluate the plane equations obtained above for the
@ -4101,6 +4106,20 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
dest = get_nir_def(ntb, instr->def);
switch (instr->intrinsic) {
case nir_intrinsic_load_pixel_coord: {
brw_reg comps[2] = { s.uw_pixel_x, s.uw_pixel_y };
bld.VEC(retype(dest, BRW_TYPE_UW), comps, 2);
break;
}
case nir_intrinsic_load_frag_coord_z:
bld.MOV(dest, s.pixel_z);
break;
case nir_intrinsic_load_frag_coord_w:
bld.MOV(dest, s.wpos_w);
break;
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_TYPE_D), emit_frontfacing_interpolation(ntb));
break;
@ -4443,12 +4462,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
break;
}
case nir_intrinsic_load_frag_coord: {
brw_reg comps[4] = { s.pixel_x, s.pixel_y, s.pixel_z, s.wpos_w };
bld.VEC(dest, comps, 4);
break;
}
case nir_intrinsic_load_interpolated_input: {
assert(nir_src_is_intrinsic(instr->src[0]));
nir_intrinsic_instr *bary_intrinsic = nir_src_as_intrinsic(instr->src[0]);

View file

@ -191,8 +191,8 @@ public:
bool source_depth_to_render_target;
brw_reg pixel_x;
brw_reg pixel_y;
brw_reg uw_pixel_x;
brw_reg uw_pixel_y;
brw_reg pixel_z;
brw_reg wpos_w;
brw_reg pixel_w;