diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 3815ca1549e..fa97d0b6f9e 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -97,6 +97,7 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm, options->vectorize_vec2_16bit = info->has_packed_math_16bit; options->discard_is_demote = true; options->optimize_sample_mask_in = true; + options->optimize_load_front_face_fsign = true; options->io_options = nir_io_has_flexible_input_interpolation_except_flat | (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) | nir_io_prefer_scalar_fs_inputs | diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index a6b4b48ce24..16e0e58d93e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4016,6 +4016,11 @@ typedef struct nir_shader_compiler_options { */ bool optimize_sample_mask_in; + /** + * Optimize load_front_face ? a : -a to load_front_face_fsign * a + */ + bool optimize_load_front_face_fsign; + /** * Optimize boolean reductions of quad broadcasts. This should only be enabled if * nir_intrinsic_reduce supports INCLUDE_HELPERS. diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index c73dd40d1a3..441e498d287 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -23,6 +23,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_search_helpers.h" /** * \file nir_opt_intrinsics.c @@ -89,6 +90,22 @@ try_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu, return shuffle; } +/* load_front_face ? a : -a -> load_front_face_sign * a */ +static nir_def * +try_opt_front_face_fsign(nir_builder *b, nir_alu_instr *alu) +{ + if (alu->def.bit_size != 32 || + !nir_src_as_intrinsic(alu->src[0].src) || + nir_src_as_intrinsic(alu->src[0].src)->intrinsic != nir_intrinsic_load_front_face || + !is_only_used_as_float(alu) || + !nir_alu_srcs_negative_equal_typed(alu, alu, 1, 2, nir_type_float)) + return NULL; + + nir_def *src = nir_ssa_for_alu_src(b, alu, 1); + + return nir_fmul(b, nir_load_front_face_fsign(b), src); +} + static bool src_is_quad_broadcast(nir_block *block, nir_src src, nir_intrinsic_instr **intrin) { @@ -222,6 +239,8 @@ opt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu, switch (alu->op) { case nir_op_bcsel: replacement = try_opt_bcsel_of_shuffle(b, alu, block_has_discard); + if (!replacement && options->optimize_load_front_face_fsign) + replacement = try_opt_front_face_fsign(b, alu); break; case nir_op_iand: case nir_op_ior: