From 580304350bbd4974216f4a890aa292108315f76d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 29 Dec 2024 12:58:34 -0500 Subject: [PATCH] ac/nir: optimize front_face in ac_nir_lower_ps_early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i.e. before future linking optimizations and shader_info gathering Reviewed-by: Timur Kristóf Part-of: --- src/amd/common/nir/ac_nir.h | 1 + src/amd/common/nir/ac_nir_lower_ps_early.c | 20 ++++++++++++------- .../drivers/radeonsi/si_nir_lower_abi.c | 9 --------- src/gallium/drivers/radeonsi/si_shader.c | 1 + 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index aabe84a69a8..2f91252fae3 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -275,6 +275,7 @@ typedef struct { bool force_linear_sample_interp; bool force_persp_center_interp; bool force_linear_center_interp; + int force_front_face; /* 0 -> keep, 1 -> set to true, -1 -> set to false */ unsigned ps_iter_samples; /* Outputs. */ diff --git a/src/amd/common/nir/ac_nir_lower_ps_early.c b/src/amd/common/nir/ac_nir_lower_ps_early.c index 34227ad50fd..ffd40c4ff6e 100644 --- a/src/amd/common/nir/ac_nir_lower_ps_early.c +++ b/src/amd/common/nir/ac_nir_lower_ps_early.c @@ -150,8 +150,6 @@ rewrite_ps_load_barycentric(nir_builder *b, nir_intrinsic_instr *intrin, lower_p if (!var) return false; - b->cursor = nir_before_instr(&intrin->instr); - nir_def *replacement = nir_load_var(b, var); nir_def_replace(&intrin->def, replacement); return true; @@ -189,8 +187,6 @@ optimize_lower_ps_outputs(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_ nir_def *value = intrin->src[0].ssa; bool progress = false; - b->cursor = nir_before_instr(&intrin->instr); - /* Clamp color. */ if (s->options->clamp_color) { value = nir_fsat(b, value); @@ -288,9 +284,6 @@ lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_ * The samplemask loaded by hardware is always the coverage of the * entire pixel/fragment, so mask bits out based on the sample ID. */ - - b->cursor = nir_before_instr(&intrin->instr); - uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples); nir_def *sampleid = nir_load_sample_id(b); nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid); @@ -310,6 +303,7 @@ lower_ps_intrinsic(nir_builder *b, nir_instr *instr, void *state) if (instr->type != nir_instr_type_intrinsic) return false; + b->cursor = nir_before_instr(instr); nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { @@ -325,6 +319,18 @@ lower_ps_intrinsic(nir_builder *b, nir_instr *instr, void *state) if (s->options->ps_iter_samples > 1) return lower_ps_load_sample_mask_in(b, intrin, s); break; + case nir_intrinsic_load_front_face: + if (s->options->force_front_face) { + nir_def_replace(&intrin->def, nir_imm_bool(b, s->options->force_front_face == 1)); + return true; + } + break; + case nir_intrinsic_load_front_face_fsign: + if (s->options->force_front_face) { + nir_def_replace(&intrin->def, nir_imm_float(b, s->options->force_front_face == 1 ? 1 : -1)); + return true; + } + break; default: break; } diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 1043f7c88cb..dffde5d403c 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -572,15 +572,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s case nir_intrinsic_load_alpha_reference_amd: replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference); break; - case nir_intrinsic_load_front_face: - case nir_intrinsic_load_front_face_fsign: - if (!key->ps.opt.force_front_face_input) - return false; - if (intrin->intrinsic == nir_intrinsic_load_front_face) - replacement = nir_imm_bool(b, key->ps.opt.force_front_face_input == 1); - else - replacement = nir_imm_float(b, key->ps.opt.force_front_face_input == 1 ? 1.0 : -1.0); - break; case nir_intrinsic_load_color0: case nir_intrinsic_load_color1: { uint32_t colors_read = sel->info.colors_read; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 23759797bc9..6ee6357dc19 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2528,6 +2528,7 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ .force_linear_sample_interp = key->ps.part.prolog.force_linear_sample_interp, .force_persp_center_interp = key->ps.part.prolog.force_persp_center_interp, .force_linear_center_interp = key->ps.part.prolog.force_linear_center_interp, + .force_front_face = key->ps.opt.force_front_face_input, .ps_iter_samples = 1 << key->ps.part.prolog.samplemask_log_ps_iter, .clamp_color = key->ps.part.epilog.clamp_color,