radv: switch to nir_frag_coord_xy_z_w_separate with w_rcp

Only Cyberpunk affected:

Totals from 10 (0.00% of 202429) affected shaders:
Latency: 337370 -> 337366 (-0.00%)

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41461>
This commit is contained in:
Marek Olšák 2026-05-09 19:07:10 -04:00 committed by Marge Bot
parent 63deaf3bfe
commit cb66de3beb
5 changed files with 27 additions and 12 deletions

View file

@ -201,6 +201,16 @@ lower_intrinsic_to_arg(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
ac_nir_load_arg(b, s->args, s->args->frag_pos[2]),
ac_nir_load_arg(b, s->args, s->args->frag_pos[3]));
break;
case nir_intrinsic_load_frag_coord_xy:
replacement = nir_vec2(b, ac_nir_load_arg(b, s->args, s->args->frag_pos[0]),
ac_nir_load_arg(b, s->args, s->args->frag_pos[1]));
break;
case nir_intrinsic_load_frag_coord_z:
replacement = ac_nir_load_arg(b, s->args, s->args->frag_pos[2]);
break;
case nir_intrinsic_load_frag_coord_w_rcp:
replacement = ac_nir_load_arg(b, s->args, s->args->frag_pos[3]);
break;
case nir_intrinsic_load_local_invocation_id: {
unsigned num_bits[3];
nir_def *vec[3];

View file

@ -47,16 +47,13 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
nir_def_replace(&intrin->def, def);
return true;
}
case nir_intrinsic_load_frag_coord: {
case nir_intrinsic_load_frag_coord_z: {
if (!gfx_state->adjust_frag_coord_z)
return false;
if (!(nir_def_components_read(&intrin->def) & (1 << 2)))
return false;
b->fp_math_ctrl = nir_fp_no_fast_math;
nir_def *frag_z = nir_channel(b, &intrin->def, 2);
nir_def *frag_z = &intrin->def;
/* VRS Rate X = Ancillary[2:3] */
nir_def *ancillary = nir_load_vector_arg_amd(b, 1, .base = args->ac.ancillary.arg_index);
@ -69,8 +66,7 @@ pass(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
/* adjusted_frag_z = dFdxFine(frag_z) * 0.0625 + frag_z */
frag_z = nir_ffma(b, nir_ddx_fine(b, frag_z), mul, frag_z);
nir_def *new_dest = nir_vector_insert_imm(b, &intrin->def, frag_z, 2);
nir_def_rewrite_uses_after(&intrin->def, new_dest);
nir_def_rewrite_uses_after(&intrin->def, frag_z);
b->fp_math_ctrl = 0;
return true;

View file

@ -1924,7 +1924,9 @@ radv_consider_force_vrs(const struct radv_graphics_state_key *gfx_state, const s
* interpolator) as that'd result in races between adjacent primitives with no common fine pixels.
*/
nir_shader *fs_shader = fs_stage->nir;
if (fs_shader && (BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
if (fs_shader && (BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_XY) ||
BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) ||
BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W_RCP) ||
BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_PIXEL_COORD) ||
fs_shader->info.fs.sample_interlock_ordered || fs_shader->info.fs.sample_interlock_unordered ||
fs_shader->info.fs.pixel_interlock_ordered || fs_shader->info.fs.pixel_interlock_unordered)) {
@ -2497,8 +2499,6 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, gfx_state, vgt_outprim_type);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_fragcoord_wtrans);
/* frag_depth = gl_FragCoord.z broadcasts to all samples of the fragment shader invocation,
* so only optimize it away if we know there is only one sample per invocation.
* Because we don't know if sample shading is used with factor 1.0f, this means

View file

@ -66,6 +66,7 @@ get_nir_options_for_stage(struct radv_compiler_info *compiler_info, mesa_shader_
options->max_unroll_iterations = 32;
options->max_unroll_iterations_aggressive = 128;
options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv;
options->frag_coord_form = nir_frag_coord_xy_z_w_separate | nir_frag_coord_use_w_rcp;
options->io_options |= nir_io_mediump_is_32bit | nir_io_radv_intrinsic_component_workaround;
options->varying_expression_max_cost = ac_nir_varying_expression_max_cost;
}

View file

@ -270,9 +270,15 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
case nir_intrinsic_load_pixel_coord:
info->ps.reads_pixel_coord = true;
break;
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_frag_coord_xy:
info->ps.reads_frag_coord_mask |= nir_def_components_read(&instr->def);
break;
case nir_intrinsic_load_frag_coord_z:
info->ps.reads_frag_coord_mask |= BITFIELD_BIT(2);
break;
case nir_intrinsic_load_frag_coord_w_rcp:
info->ps.reads_frag_coord_mask |= BITFIELD_BIT(3);
break;
case nir_intrinsic_load_sample_pos:
info->ps.reads_sample_pos_mask |= nir_def_components_read(&instr->def);
break;
@ -813,7 +819,9 @@ gather_shader_info_fs(enum amd_gfx_level gfx_level, const nir_shader *nir,
info->ps.allow_flat_shading =
!(uses_persp_or_linear_interp || info->ps.needs_sample_positions || info->ps.reads_frag_shading_rate ||
info->ps.writes_memory || nir->info.fs.needs_coarse_quad_helper_invocations ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_XY) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W_RCP) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PIXEL_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||