amd: lower gl_FragCoord.w rcp in NIR

This allows NIR to remove the rcps if the application uses rcp(gl_FragCoord.w).
D3D provides w, not 1/w like GL/VK in the shader, so this is commonly used.

Foz-DB Navi21:
Totals from 2068 (2.61% of 79206) affected shaders:
MaxWaves: 45636 -> 45652 (+0.04%)
Instrs: 2173444 -> 2169671 (-0.17%); split: -0.18%, +0.00%
CodeSize: 11881304 -> 11867208 (-0.12%); split: -0.12%, +0.01%
VGPRs: 118000 -> 117968 (-0.03%)
Latency: 35689676 -> 35675909 (-0.04%); split: -0.06%, +0.02%
InvThroughput: 9167199 -> 9159801 (-0.08%); split: -0.08%, +0.00%
VClause: 45076 -> 45078 (+0.00%); split: -0.01%, +0.02%
SClause: 92503 -> 92366 (-0.15%); split: -0.31%, +0.17%
Copies: 140282 -> 140303 (+0.01%); split: -0.13%, +0.14%
Branches: 53347 -> 53346 (-0.00%); split: -0.01%, +0.00%
PreVGPRs: 96495 -> 96465 (-0.03%)
VALU: 1522980 -> 1519252 (-0.24%); split: -0.25%, +0.01%
SALU: 213451 -> 213460 (+0.00%); split: -0.02%, +0.02%

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31967>
This commit is contained in:
Georg Lehmann 2024-11-04 19:26:49 +01:00 committed by Marge Bot
parent 917f312873
commit 2cd8a9fef7
5 changed files with 6 additions and 8 deletions

View file

@ -30,7 +30,7 @@ traces:
checksum: 47c004fed88bed8d3d387295399f0810
angle/libangle_restricted_traces_pubg_mobile_battle_royale.so:
vk-radv-raven:
checksum: 5c2a0f8ab6d98a7cf9461d755f051c16
checksum: ae4e6353a18c3ea76067a8d3a8740a75
angle/libangle_restricted_traces_temple_run_300.so:
vk-radv-raven:
checksum: 4fa6a73dad7d9dd747ba7cbf82aae42e

View file

@ -5736,11 +5736,6 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
else
vec->operands[i] = Operand(v1);
}
if (G_0286CC_POS_W_FLOAT_ENA(ctx->program->config->spi_ps_input_ena)) {
assert(num_components == 4);
vec->operands[3] =
bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_arg(ctx, ctx->args->frag_pos[3]));
}
for (Operand& op : vec->operands)
op = op.isUndefined() ? Operand::zero() : op;

View file

@ -2873,8 +2873,7 @@ emit_load_frag_coord(struct ac_nir_context *ctx)
{
LLVMValueRef values[4] = {
ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[3])};
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
}

View file

@ -2667,6 +2667,8 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, gfx_state, rast_prim);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_fragcoord_wtrans);
/* frag_depth = gl_FragCoord.z broadcasts to all samples of the fragment shader invocation,
* so only optimize it away if we know there is only one sample per invocation.
* Because we don't know if sample shading is used with factor 1.0f, this means

View file

@ -2350,6 +2350,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
if (key->ps.mono.point_smoothing)
NIR_PASS(progress, nir, nir_lower_point_smooth);
NIR_PASS(progress, nir, nir_lower_fragcoord_wtrans);
}
/* This must be before si_nir_lower_resource. */