nak,nir: Add 32-bit nir_op_lea_nv and use it

Changes code size by -0.80% on shaderdb. Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32517>
2026-05-05 00:58:05 +02:00 · 2024-11-15 15:56:46 -05:00 · 2024-11-15 15:56:46 -05:00 · 0470643047
commit 0470643047
parent 54fcc63d3e
5 changed files with 51 additions and 0 deletions
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@ -1412,6 +1412,11 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32],
        dst |= ((uint32_t)x) << i * 8;
    }""")

+# Address arithmetic instructions: shift and add
+# Shift must be a constant.
+opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False,
+       "", "src0 + (src1 << (src2 % bit_size))")
+
 # 24b multiply into 32b result (with sign extension)
 binop("imul24", tint32, _2src_commutative + associative,
      "(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)")
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@ -541,6 +541,24 @@ is_used_by_non_fsat(const nir_alu_instr *instr)
   return false;
 }

+static inline bool
+is_used_by_non_ldc_nv(const nir_alu_instr *instr)
+{
+   nir_foreach_use(src, &instr->def) {
+      const nir_instr *const user_instr = nir_src_parent_instr(src);
+
+      if (user_instr->type != nir_instr_type_intrinsic)
+         return true;
+
+      const nir_intrinsic_instr *const user_intrin = nir_instr_as_intrinsic(user_instr);
+
+      if (user_intrin->intrinsic != nir_intrinsic_ldc_nv)
+         return true;
+   }
+
+   return false;
+}
+
 static inline bool
 is_only_used_as_float_impl(const nir_alu_instr *instr, unsigned depth)
 {
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@ -612,6 +612,24 @@ pub trait SSABuilder: Builder {
        dst
    }

+    fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
+        let dst = self.alloc_ssa(RegFile::GPR, 1);
+        assert!(self.sm() >= 70);
+
+        self.push_op(OpLea {
+            dst: dst.into(),
+            overflow: Dst::None,
+            a: a,
+            b: b,
+            a_high: 0.into(),
+            dst_high: false,
+            shift: shift % 32,
+            intermediate_mod: SrcMod::None,
+        });
+
+        dst
+    }
+
    fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
        let dst = if x.is_predicate() {
            self.alloc_ssa(RegFile::Pred, 1)
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -1478,6 +1478,12 @@ impl<'a> ShaderFromNir<'a> {
                    b.shr(srcs[0], srcs[1], true)
                }
            }
+            nir_op_lea_nv => {
+                let src_a = srcs[1];
+                let src_b = srcs[0];
+                let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
+                b.lea(src_a, src_b, shift)
+            }
            nir_op_isub => match alu.def.bit_size {
                32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),
                64 => b.iadd64(srcs[0], srcs[1].ineg(), 0.into()),
--- a/src/nouveau/compiler/nak_nir_algebraic.py
+++ b/src/nouveau/compiler/nak_nir_algebraic.py
@ -27,6 +27,7 @@ import sys
 a = 'a'
 b = 'b'
 c = 'c'
+s = 's'

 # common conditions to improve readability
 volta = 'nak->sm >= 70 && nak->sm < 75'
@ -38,6 +39,9 @@ algebraic_lowering = [
    (('umin', 'a', 'b'), ('bcsel', ('ult', a, b), a, b), volta),
    (('umax', 'a', 'b'), ('bcsel', ('ult', a, b), b, a), volta),
    (('iadd', 'a@64', ('ineg', 'b@64')), ('isub', a, b)),
+
+    (('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')),
+        ('lea_nv', a, b, s), 'nak->sm >= 70'),
 ]

 def main():