From f17d350001a195c89633f083866c5afb7563f78c Mon Sep 17 00:00:00 2001
From: Christian Gmeiner <cgmeiner@igalia.com>
Date: Wed, 30 Apr 2025 11:38:20 +0200
Subject: [PATCH] lima: Move fdot lowering from NIR to lima

This change relocates the fdot lowering from the generic NIR to the lima,
since lima is the only consumer of this particular lowering. This avoids
potential conflicts with the similar fdot lowering already present in
nir_lower_alu_width.

Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Reviewed-by: Erico Nunes <nunes.erico@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34757>
---
 src/compiler/nir/nir_opt_algebraic.py             |  6 ------
 src/compiler/nir/nir_shader_compiler_options.h    |  3 ---
 src/gallium/drivers/lima/ir/lima_ir.h             |  1 +
 src/gallium/drivers/lima/ir/lima_nir_algebraic.py | 10 ++++++++++
 src/gallium/drivers/lima/lima_program.c           |  2 +-
 src/nouveau/codegen/nv50_ir_from_nir.cpp          |  1 -
 6 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index d88de408239..9d1c0ce30eb 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -510,12 +510,6 @@ optimizations.extend([
    (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
    (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
 
-   # Lower fdot to fsum when it is available
-   (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
-   (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
-   (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
-   (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
-
    # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
    # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
    # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h
index e976b9e547b..db9f4e11ee5 100644
--- a/src/compiler/nir/nir_shader_compiler_options.h
+++ b/src/compiler/nir/nir_shader_compiler_options.h
@@ -292,9 +292,6 @@ typedef struct nir_shader_compiler_options {
    /* lower fdph to fdot4 */
    bool lower_fdph;
 
-   /** lower fdot to fmul and fsum/fadd. */
-   bool lower_fdot;
-
    /* Does the native fdot instruction replicate its result for four
     * components?  If so, then opt_algebraic_late will turn all fdotN
     * instructions into fdotN_replicated instructions.
diff --git a/src/gallium/drivers/lima/ir/lima_ir.h b/src/gallium/drivers/lima/ir/lima_ir.h
index d94e3f70637..fbc3f2cd8c9 100644
--- a/src/gallium/drivers/lima/ir/lima_ir.h
+++ b/src/gallium/drivers/lima/ir/lima_ir.h
@@ -66,6 +66,7 @@ struct ra_regs *ppir_regalloc_init(void *mem_ctx);
 void lima_nir_lower_uniform_to_scalar(nir_shader *shader);
 bool lima_nir_scale_trig(nir_shader *shader);
 bool lima_nir_lower_ftrunc(nir_shader *shader);
+bool lima_nir_lower_fdot(nir_shader *shader);
 bool lima_nir_split_load_input(nir_shader *shader);
 bool lima_nir_split_loads(nir_shader *shader);
 
diff --git a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
index 3074c839717..fb0e53b9074 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
+++ b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
@@ -37,6 +37,14 @@ lower_ftrunc = [
         (('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a')))))
 ]
 
+lower_fdot = [
+   # Lower fdot to fsum
+   (('fdot2', 'a', 'b'), ('fsum2', ('fmul', 'a', 'b'))),
+   (('fdot3', 'a', 'b'), ('fsum3', ('fmul', 'a', 'b'))),
+   (('fdot4', 'a', 'b'), ('fsum4', ('fmul', 'a', 'b'))),
+   (('fsum2', 'a'), ('fadd', 'a.x', 'a.y')),
+]
+
 # PP fuse clamp_positive. Shared with Midgard/Bifrost
 ppir_algebraic_late = [
     (('fmax', 'a', 0.0), ('fclamp_pos', 'a')),
@@ -59,6 +67,8 @@ def run():
                                       scale_trig).render())
     print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc",
                                       lower_ftrunc).render())
+    print(nir_algebraic.AlgebraicPass("lima_nir_lower_fdot",
+                                      lower_fdot).render())
     print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late",
                                       ppir_algebraic_late).render())
 
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 1b5093030aa..f9fb395a02e 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -78,7 +78,6 @@ static const nir_shader_compiler_options fs_nir_options = {
    .lower_flrp32 = true,
    .lower_flrp64 = true,
    .lower_fsign = true,
-   .lower_fdot = true,
    .lower_fdph = true,
    .lower_insert_byte = true,
    .lower_insert_word = true,
@@ -260,6 +259,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
       };
       NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
       NIR_PASS(progress, s, nir_opt_algebraic);
+      NIR_PASS(progress, s, lima_nir_lower_fdot);
       NIR_PASS(progress, s, nir_opt_constant_folding);
       NIR_PASS(progress, s, nir_opt_undef);
       NIR_PASS(progress, s, nir_opt_loop_unroll);
diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp
index eb4af297f24..ccb32c29e7d 100644
--- a/src/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3590,7 +3590,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
    op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
    op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
    op.lower_fdph = false;
-   op.lower_fdot = false;
    op.fdot_replicates = false; // TODO
    op.lower_ffloor = false; // TODO
    op.lower_ffract = true;