From 7e72d392d753598bc8ef8f00d0bc5a6646bec0dc Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 25 Jul 2025 10:01:42 +0300 Subject: [PATCH] brw: switch to load_(pixel_coord|frag_coord_z|frag_coord_w) intrinsics Allows us to better determine if we need Z/W payload delivery. Signed-off-by: Lionel Landwerlin Reviewed-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/brw/brw_compile_fs.cpp | 57 +++++++++-------------- src/intel/compiler/brw/brw_from_nir.cpp | 29 ++++++++---- src/intel/compiler/brw/brw_shader.h | 4 +- 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp index 205a8af2024..06e23a946d8 100644 --- a/src/intel/compiler/brw/brw_compile_fs.cpp +++ b/src/intel/compiler/brw/brw_compile_fs.cpp @@ -220,8 +220,8 @@ brw_emit_interpolation_setup(brw_shader &s) ub_cps_height = byte_offset(retype(cps_size, BRW_TYPE_UB), 1); } - s.pixel_x = bld.vgrf(BRW_TYPE_F); - s.pixel_y = bld.vgrf(BRW_TYPE_F); + s.uw_pixel_x = abld.vgrf(BRW_TYPE_UW); + s.uw_pixel_y = abld.vgrf(BRW_TYPE_UW); brw_fs_thread_payload &payload = s.fs_payload(); @@ -379,9 +379,6 @@ brw_emit_interpolation_setup(brw_shader &s) break; } - brw_reg uw_pixel_x = abld.vgrf(BRW_TYPE_UW); - brw_reg uw_pixel_y = abld.vgrf(BRW_TYPE_UW); - for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) { const brw_builder hbld = abld.group(MIN2(16, s.dispatch_width), i); /* According to the "PS Thread Payload for Normal Dispatch" @@ -394,8 +391,8 @@ brw_emit_interpolation_setup(brw_shader &s) brw_vec1_grf(i + 1, 0); const struct brw_reg gi_uw = retype(gi_reg, BRW_TYPE_UW); - brw_reg int_pixel_x = offset(uw_pixel_x, hbld, i); - brw_reg int_pixel_y = offset(uw_pixel_y, hbld, i); + brw_reg int_pixel_x = offset(s.uw_pixel_x, hbld, i); + brw_reg int_pixel_y = offset(s.uw_pixel_y, hbld, i); if (devinfo->verx10 >= 125) { /* We compute two sets of int pixel x/y: one with a 2 byte stride for @@ -408,12 +405,6 @@ brw_emit_interpolation_setup(brw_shader &s) const brw_reg int_pixel_x_4b = dbld.vgrf(BRW_TYPE_UW); const brw_reg int_pixel_y_4b = dbld.vgrf(BRW_TYPE_UW); - hbld.ADD(int_pixel_x, - brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)), - int_pixel_offset_x); - hbld.ADD(int_pixel_y, - brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)), - int_pixel_offset_y); dbld.ADD(int_pixel_x_4b, brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)), int_pixel_offset_x); @@ -422,27 +413,18 @@ brw_emit_interpolation_setup(brw_shader &s) int_pixel_offset_y); if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER) { - brw_inst *addx = hbld.ADD(int_pixel_x, int_pixel_x, - horiz_stride(half_int_pixel_offset_x, 0)); - brw_inst *addy = hbld.ADD(int_pixel_y, int_pixel_y, - horiz_stride(half_int_pixel_offset_y, 0)); - if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) { - addx->predicate = BRW_PREDICATE_NORMAL; - addy->predicate = BRW_PREDICATE_NORMAL; - } - addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b, - horiz_stride(half_int_pixel_offset_x, 0)); - addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b, - horiz_stride(half_int_pixel_offset_y, 0)); + brw_inst *addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b, + horiz_stride(half_int_pixel_offset_x, 0)); + brw_inst *addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b, + horiz_stride(half_int_pixel_offset_y, 0)); if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) { addx->predicate = BRW_PREDICATE_NORMAL; addy->predicate = BRW_PREDICATE_NORMAL; } } - hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x_4b, 2)); - hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y_4b, 2)); - + hbld.MOV(int_pixel_x, horiz_stride(int_pixel_x_4b, 2)); + hbld.MOV(int_pixel_y, horiz_stride(int_pixel_y_4b, 2)); } else { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): @@ -466,9 +448,6 @@ brw_emit_interpolation_setup(brw_shader &s) horiz_stride(half_int_pixel_offset_x, 0)); hbld.emit(FS_OPCODE_PIXEL_Y, int_pixel_y, int_pixel_xy, horiz_stride(half_int_pixel_offset_y, 0)); - - hbld.MOV(offset(s.pixel_x, hbld, i), int_pixel_x); - hbld.MOV(offset(s.pixel_y, hbld, i), int_pixel_y); } } @@ -503,8 +482,11 @@ brw_emit_interpolation_setup(brw_shader &s) const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F); const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F); - abld.ADD(float_pixel_x, s.pixel_x, negate(x_start)); - abld.ADD(float_pixel_y, s.pixel_y, negate(y_start)); + abld.MOV(float_pixel_x, s.uw_pixel_x); + abld.MOV(float_pixel_y, s.uw_pixel_y); + + abld.ADD(float_pixel_x, float_pixel_x, negate(x_start)); + abld.ADD(float_pixel_y, float_pixel_y, negate(y_start)); const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F); const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F); @@ -1107,12 +1089,12 @@ brw_nir_populate_wm_prog_data(nir_shader *shader, prog_data->coarse_pixel_dispatch != INTEL_NEVER; prog_data->uses_src_w = - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W); prog_data->uses_src_depth = - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) && + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) && prog_data->coarse_pixel_dispatch == INTEL_NEVER; prog_data->uses_depth_w_coefficients = prog_data->uses_pc_bary_coefficients || - (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) && + (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) && prog_data->coarse_pixel_dispatch != INTEL_NEVER); calculate_urb_setup(devinfo, key, prog_data, shader, mue_map, per_primitive_offsets); @@ -1523,6 +1505,9 @@ brw_compile_fs(const struct brw_compiler *compiler, if (!key->coherent_fb_fetch) NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key); + NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord); + NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord); + /* From the SKL PRM, Volume 7, "Alpha Coverage": * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in * hardware, regardless of the state setting for this feature." diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index aa66a1dfe13..d85da501a0d 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -2170,6 +2170,11 @@ emit_pixel_interpolater_alu_at_offset(const brw_builder &bld, const brw_reg off_y = bld.vgrf(BRW_TYPE_F); bld.ADD(off_y, offset(offs, bld, 1), brw_imm_f(0.5)); + const brw_reg pixel_x = bld.vgrf(BRW_TYPE_F); + bld.MOV(pixel_x, shader->uw_pixel_x); + const brw_reg pixel_y = bld.vgrf(BRW_TYPE_F); + bld.MOV(pixel_y, shader->uw_pixel_y); + /* Process no more than two polygons at a time to avoid hitting * regioning restrictions. */ @@ -2212,11 +2217,11 @@ emit_pixel_interpolater_alu_at_offset(const brw_builder &bld, /* Compute X/Y coordinate deltas relative to the origin of the polygon. */ const brw_reg delta_x = ibld.vgrf(BRW_TYPE_F); - ibld.ADD(delta_x, offset(shader->pixel_x, ibld, i), negate(start_x)); + ibld.ADD(delta_x, offset(pixel_x, ibld, i), negate(start_x)); ibld.ADD(delta_x, delta_x, offset(off_x, ibld, i)); const brw_reg delta_y = ibld.vgrf(BRW_TYPE_F); - ibld.ADD(delta_y, offset(shader->pixel_y, ibld, i), negate(start_y)); + ibld.ADD(delta_y, offset(pixel_y, ibld, i), negate(start_y)); ibld.ADD(delta_y, delta_y, offset(off_y, ibld, i)); /* Evaluate the plane equations obtained above for the @@ -4101,6 +4106,20 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, dest = get_nir_def(ntb, instr->def); switch (instr->intrinsic) { + case nir_intrinsic_load_pixel_coord: { + brw_reg comps[2] = { s.uw_pixel_x, s.uw_pixel_y }; + bld.VEC(retype(dest, BRW_TYPE_UW), comps, 2); + break; + } + + case nir_intrinsic_load_frag_coord_z: + bld.MOV(dest, s.pixel_z); + break; + + case nir_intrinsic_load_frag_coord_w: + bld.MOV(dest, s.wpos_w); + break; + case nir_intrinsic_load_front_face: bld.MOV(retype(dest, BRW_TYPE_D), emit_frontfacing_interpolation(ntb)); break; @@ -4443,12 +4462,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_load_frag_coord: { - brw_reg comps[4] = { s.pixel_x, s.pixel_y, s.pixel_z, s.wpos_w }; - bld.VEC(dest, comps, 4); - break; - } - case nir_intrinsic_load_interpolated_input: { assert(nir_src_is_intrinsic(instr->src[0])); nir_intrinsic_instr *bary_intrinsic = nir_src_as_intrinsic(instr->src[0]); diff --git a/src/intel/compiler/brw/brw_shader.h b/src/intel/compiler/brw/brw_shader.h index 811db87e6fe..cb913bf9414 100644 --- a/src/intel/compiler/brw/brw_shader.h +++ b/src/intel/compiler/brw/brw_shader.h @@ -191,8 +191,8 @@ public: bool source_depth_to_render_target; - brw_reg pixel_x; - brw_reg pixel_y; + brw_reg uw_pixel_x; + brw_reg uw_pixel_y; brw_reg pixel_z; brw_reg wpos_w; brw_reg pixel_w;