From 2cd8a9fef769a47e84d9aefac829de10623a42b9 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 4 Nov 2024 19:26:49 +0100 Subject: [PATCH] amd: lower gl_FragCoord.w rcp in NIR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows NIR to remove the rcps if the application uses rcp(gl_FragCoord.w). D3D provides w, not 1/w like GL/VK in the shader, so this is commonly used. Foz-DB Navi21: Totals from 2068 (2.61% of 79206) affected shaders: MaxWaves: 45636 -> 45652 (+0.04%) Instrs: 2173444 -> 2169671 (-0.17%); split: -0.18%, +0.00% CodeSize: 11881304 -> 11867208 (-0.12%); split: -0.12%, +0.01% VGPRs: 118000 -> 117968 (-0.03%) Latency: 35689676 -> 35675909 (-0.04%); split: -0.06%, +0.02% InvThroughput: 9167199 -> 9159801 (-0.08%); split: -0.08%, +0.00% VClause: 45076 -> 45078 (+0.00%); split: -0.01%, +0.02% SClause: 92503 -> 92366 (-0.15%); split: -0.31%, +0.17% Copies: 140282 -> 140303 (+0.01%); split: -0.13%, +0.14% Branches: 53347 -> 53346 (-0.00%); split: -0.01%, +0.00% PreVGPRs: 96495 -> 96465 (-0.03%) VALU: 1522980 -> 1519252 (-0.24%); split: -0.25%, +0.01% SALU: 213451 -> 213460 (+0.00%); split: -0.02%, +0.02% Reviewed-by: Marek Olšák Reviewed-by: Alyssa Rosenzweig Part-of: --- src/amd/ci/restricted-traces-amd.yml | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 5 ----- src/amd/llvm/ac_nir_to_llvm.c | 3 +-- src/amd/vulkan/radv_pipeline_graphics.c | 2 ++ src/gallium/drivers/radeonsi/si_shader.c | 2 ++ 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/amd/ci/restricted-traces-amd.yml b/src/amd/ci/restricted-traces-amd.yml index e6729ebccb6..a0d8d303f82 100644 --- a/src/amd/ci/restricted-traces-amd.yml +++ b/src/amd/ci/restricted-traces-amd.yml @@ -30,7 +30,7 @@ traces: checksum: 47c004fed88bed8d3d387295399f0810 angle/libangle_restricted_traces_pubg_mobile_battle_royale.so: vk-radv-raven: - checksum: 5c2a0f8ab6d98a7cf9461d755f051c16 + checksum: ae4e6353a18c3ea76067a8d3a8740a75 angle/libangle_restricted_traces_temple_run_300.so: vk-radv-raven: checksum: 4fa6a73dad7d9dd747ba7cbf82aae42e diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 36ce31d0bd9..9ebca41ad99 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5736,11 +5736,6 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components) else vec->operands[i] = Operand(v1); } - if (G_0286CC_POS_W_FLOAT_ENA(ctx->program->config->spi_ps_input_ena)) { - assert(num_components == 4); - vec->operands[3] = - bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_arg(ctx, ctx->args->frag_pos[3])); - } for (Operand& op : vec->operands) op = op.isUndefined() ? Operand::zero() : op; diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index f83ca8a86a9..9692a08d4e7 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2873,8 +2873,7 @@ emit_load_frag_coord(struct ac_nir_context *ctx) { LLVMValueRef values[4] = { ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]), - ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]), - ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))}; + ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[3])}; return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4)); } diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index e2abd13e698..70529c9d600 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2667,6 +2667,8 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, gfx_state, rast_prim); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_fragcoord_wtrans); + /* frag_depth = gl_FragCoord.z broadcasts to all samples of the fragment shader invocation, * so only optimize it away if we know there is only one sample per invocation. * Because we don't know if sample shading is used with factor 1.0f, this means diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6619ab9b750..3fd1a3b9f9a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2350,6 +2350,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, if (key->ps.mono.point_smoothing) NIR_PASS(progress, nir, nir_lower_point_smooth); + + NIR_PASS(progress, nir, nir_lower_fragcoord_wtrans); } /* This must be before si_nir_lower_resource. */