mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-18 08:10:37 +02:00
nir,amd: optimize front_face ? a : -a
Foz-DB Navi31: Totals from 3345 (4.21% of 79395) affected shaders: MaxWaves: 96182 -> 96174 (-0.01%) Instrs: 3135439 -> 3129508 (-0.19%); split: -0.24%, +0.05% CodeSize: 16776088 -> 16718048 (-0.35%); split: -0.38%, +0.03% VGPRs: 190884 -> 190848 (-0.02%); split: -0.03%, +0.01% Latency: 32624132 -> 32621734 (-0.01%); split: -0.16%, +0.16% InvThroughput: 5759987 -> 5749957 (-0.17%); split: -0.23%, +0.05% VClause: 51044 -> 51086 (+0.08%); split: -0.12%, +0.20% SClause: 103415 -> 103223 (-0.19%); split: -0.64%, +0.45% Copies: 170398 -> 170555 (+0.09%); split: -0.64%, +0.74% PreSGPRs: 135567 -> 133887 (-1.24%) PreVGPRs: 140569 -> 141317 (+0.53%) VALU: 1959144 -> 1953839 (-0.27%); split: -0.30%, +0.03% SALU: 217956 -> 217676 (-0.13%); split: -0.20%, +0.07% Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32791>
This commit is contained in:
parent
9bd4296845
commit
e112e2b047
3 changed files with 25 additions and 0 deletions
|
|
@ -97,6 +97,7 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm,
|
|||
options->vectorize_vec2_16bit = info->has_packed_math_16bit;
|
||||
options->discard_is_demote = true;
|
||||
options->optimize_sample_mask_in = true;
|
||||
options->optimize_load_front_face_fsign = true;
|
||||
options->io_options = nir_io_has_flexible_input_interpolation_except_flat |
|
||||
(info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) |
|
||||
nir_io_prefer_scalar_fs_inputs |
|
||||
|
|
|
|||
|
|
@ -4016,6 +4016,11 @@ typedef struct nir_shader_compiler_options {
|
|||
*/
|
||||
bool optimize_sample_mask_in;
|
||||
|
||||
/**
|
||||
* Optimize load_front_face ? a : -a to load_front_face_fsign * a
|
||||
*/
|
||||
bool optimize_load_front_face_fsign;
|
||||
|
||||
/**
|
||||
* Optimize boolean reductions of quad broadcasts. This should only be enabled if
|
||||
* nir_intrinsic_reduce supports INCLUDE_HELPERS.
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_search_helpers.h"
|
||||
|
||||
/**
|
||||
* \file nir_opt_intrinsics.c
|
||||
|
|
@ -89,6 +90,22 @@ try_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu,
|
|||
return shuffle;
|
||||
}
|
||||
|
||||
/* load_front_face ? a : -a -> load_front_face_sign * a */
|
||||
static nir_def *
|
||||
try_opt_front_face_fsign(nir_builder *b, nir_alu_instr *alu)
|
||||
{
|
||||
if (alu->def.bit_size != 32 ||
|
||||
!nir_src_as_intrinsic(alu->src[0].src) ||
|
||||
nir_src_as_intrinsic(alu->src[0].src)->intrinsic != nir_intrinsic_load_front_face ||
|
||||
!is_only_used_as_float(alu) ||
|
||||
!nir_alu_srcs_negative_equal_typed(alu, alu, 1, 2, nir_type_float))
|
||||
return NULL;
|
||||
|
||||
nir_def *src = nir_ssa_for_alu_src(b, alu, 1);
|
||||
|
||||
return nir_fmul(b, nir_load_front_face_fsign(b), src);
|
||||
}
|
||||
|
||||
static bool
|
||||
src_is_quad_broadcast(nir_block *block, nir_src src, nir_intrinsic_instr **intrin)
|
||||
{
|
||||
|
|
@ -222,6 +239,8 @@ opt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu,
|
|||
switch (alu->op) {
|
||||
case nir_op_bcsel:
|
||||
replacement = try_opt_bcsel_of_shuffle(b, alu, block_has_discard);
|
||||
if (!replacement && options->optimize_load_front_face_fsign)
|
||||
replacement = try_opt_front_face_fsign(b, alu);
|
||||
break;
|
||||
case nir_op_iand:
|
||||
case nir_op_ior:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue