lima: Move fdot lowering from NIR to lima

This change relocates the fdot lowering from the generic NIR to the lima, since lima is the only consumer of this particular lowering. This avoids potential conflicts with the similar fdot lowering already present in nir_lower_alu_width. Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> Reviewed-by: Erico Nunes <nunes.erico@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34757>
2025-12-22 20:00:10 +01:00 · 2025-04-30 11:38:20 +02:00 · 2025-04-30 11:38:20 +02:00 · f17d350001
commit f17d350001
parent 0f747d0990
6 changed files with 12 additions and 11 deletions
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@ -510,12 +510,6 @@ optimizations.extend([
   (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
   (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
   # Lower fdot to fsum when it is available
   (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
   (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
   (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
   (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
   # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
   # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
   # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
--- a/src/compiler/nir/nir_shader_compiler_options.h
+++ b/src/compiler/nir/nir_shader_compiler_options.h
@ -292,9 +292,6 @@ typedef struct nir_shader_compiler_options {
   /* lower fdph to fdot4 */
   bool lower_fdph;
   /** lower fdot to fmul and fsum/fadd. */
   bool lower_fdot;
   /* Does the native fdot instruction replicate its result for four
    * components?  If so, then opt_algebraic_late will turn all fdotN
    * instructions into fdotN_replicated instructions.
--- a/src/gallium/drivers/lima/ir/lima_ir.h
+++ b/src/gallium/drivers/lima/ir/lima_ir.h
@ -66,6 +66,7 @@ struct ra_regs *ppir_regalloc_init(void *mem_ctx);
 void lima_nir_lower_uniform_to_scalar(nir_shader *shader);
 bool lima_nir_scale_trig(nir_shader *shader);
 bool lima_nir_lower_ftrunc(nir_shader *shader);
 bool lima_nir_lower_fdot(nir_shader *shader);
 bool lima_nir_split_load_input(nir_shader *shader);
 bool lima_nir_split_loads(nir_shader *shader);
--- a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
+++ b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
@ -37,6 +37,14 @@ lower_ftrunc = [
        (('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a')))))
 ]
 lower_fdot = [
   # Lower fdot to fsum
   (('fdot2', 'a', 'b'), ('fsum2', ('fmul', 'a', 'b'))),
   (('fdot3', 'a', 'b'), ('fsum3', ('fmul', 'a', 'b'))),
   (('fdot4', 'a', 'b'), ('fsum4', ('fmul', 'a', 'b'))),
   (('fsum2', 'a'), ('fadd', 'a.x', 'a.y')),
 ]
 # PP fuse clamp_positive. Shared with Midgard/Bifrost
 ppir_algebraic_late = [
    (('fmax', 'a', 0.0), ('fclamp_pos', 'a')),
@ -59,6 +67,8 @@ def run():
                                      scale_trig).render())
    print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc",
                                      lower_ftrunc).render())
    print(nir_algebraic.AlgebraicPass("lima_nir_lower_fdot",
                                      lower_fdot).render())
    print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late",
                                      ppir_algebraic_late).render())
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@ -78,7 +78,6 @@ static const nir_shader_compiler_options fs_nir_options = {
   .lower_flrp32 = true,
   .lower_flrp64 = true,
   .lower_fsign = true,
   .lower_fdot = true,
   .lower_fdph = true,
   .lower_insert_byte = true,
   .lower_insert_word = true,
@ -260,6 +259,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
      };
      NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
      NIR_PASS(progress, s, nir_opt_algebraic);
      NIR_PASS(progress, s, lima_nir_lower_fdot);
      NIR_PASS(progress, s, nir_opt_constant_folding);
      NIR_PASS(progress, s, nir_opt_undef);
      NIR_PASS(progress, s, nir_opt_loop_unroll);
--- a/src/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp
@ -3590,7 +3590,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
   op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
   op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
   op.lower_fdph = false;
   op.lower_fdot = false;
   op.fdot_replicates = false; // TODO
   op.lower_ffloor = false; // TODO
   op.lower_ffract = true;