From e112e2b0477fc36280a695c5d31e5345770b362f Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 27 Dec 2024 20:26:25 +0100 Subject: [PATCH] nir,amd: optimize front_face ? a : -a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi31: Totals from 3345 (4.21% of 79395) affected shaders: MaxWaves: 96182 -> 96174 (-0.01%) Instrs: 3135439 -> 3129508 (-0.19%); split: -0.24%, +0.05% CodeSize: 16776088 -> 16718048 (-0.35%); split: -0.38%, +0.03% VGPRs: 190884 -> 190848 (-0.02%); split: -0.03%, +0.01% Latency: 32624132 -> 32621734 (-0.01%); split: -0.16%, +0.16% InvThroughput: 5759987 -> 5749957 (-0.17%); split: -0.23%, +0.05% VClause: 51044 -> 51086 (+0.08%); split: -0.12%, +0.20% SClause: 103415 -> 103223 (-0.19%); split: -0.64%, +0.45% Copies: 170398 -> 170555 (+0.09%); split: -0.64%, +0.74% PreSGPRs: 135567 -> 133887 (-1.24%) PreVGPRs: 140569 -> 141317 (+0.53%) VALU: 1959144 -> 1953839 (-0.27%); split: -0.30%, +0.03% SALU: 217956 -> 217676 (-0.13%); split: -0.20%, +0.07% Reviewed-by: Marek Olšák Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_shader_util.c | 1 + src/compiler/nir/nir.h | 5 +++++ src/compiler/nir/nir_opt_intrinsics.c | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 3815ca1549e..fa97d0b6f9e 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -97,6 +97,7 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm, options->vectorize_vec2_16bit = info->has_packed_math_16bit; options->discard_is_demote = true; options->optimize_sample_mask_in = true; + options->optimize_load_front_face_fsign = true; options->io_options = nir_io_has_flexible_input_interpolation_except_flat | (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) | nir_io_prefer_scalar_fs_inputs | diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index a6b4b48ce24..16e0e58d93e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4016,6 +4016,11 @@ typedef struct nir_shader_compiler_options { */ bool optimize_sample_mask_in; + /** + * Optimize load_front_face ? a : -a to load_front_face_fsign * a + */ + bool optimize_load_front_face_fsign; + /** * Optimize boolean reductions of quad broadcasts. This should only be enabled if * nir_intrinsic_reduce supports INCLUDE_HELPERS. diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index c73dd40d1a3..441e498d287 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -23,6 +23,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_search_helpers.h" /** * \file nir_opt_intrinsics.c @@ -89,6 +90,22 @@ try_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu, return shuffle; } +/* load_front_face ? a : -a -> load_front_face_sign * a */ +static nir_def * +try_opt_front_face_fsign(nir_builder *b, nir_alu_instr *alu) +{ + if (alu->def.bit_size != 32 || + !nir_src_as_intrinsic(alu->src[0].src) || + nir_src_as_intrinsic(alu->src[0].src)->intrinsic != nir_intrinsic_load_front_face || + !is_only_used_as_float(alu) || + !nir_alu_srcs_negative_equal_typed(alu, alu, 1, 2, nir_type_float)) + return NULL; + + nir_def *src = nir_ssa_for_alu_src(b, alu, 1); + + return nir_fmul(b, nir_load_front_face_fsign(b), src); +} + static bool src_is_quad_broadcast(nir_block *block, nir_src src, nir_intrinsic_instr **intrin) { @@ -222,6 +239,8 @@ opt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu, switch (alu->op) { case nir_op_bcsel: replacement = try_opt_bcsel_of_shuffle(b, alu, block_has_discard); + if (!replacement && options->optimize_load_front_face_fsign) + replacement = try_opt_front_face_fsign(b, alu); break; case nir_op_iand: case nir_op_ior: