nak: replace get_io_addr_offset with nir_opt_offsets

Totals: Totals: CodeSize: 9521188272 -> 9474779520 (-0.49%); split: -0.50%, +0.01% Number of GPRs: 47361498 -> 47340754 (-0.04%); split: -0.05%, +0.00% SLM Size: 5444552 -> 5444436 (-0.00%) Static cycle count: 6182267636 -> 6141873245 (-0.65%); split: -0.69%, +0.03% Spills to memory: 44288 -> 44241 (-0.11%) Fills from memory: 44288 -> 44241 (-0.11%) Spills to reg: 185307 -> 185246 (-0.03%); split: -0.06%, +0.03% Fills from reg: 225943 -> 225895 (-0.02%); split: -0.04%, +0.01% Max warps/SM: 50637496 -> 50646924 (+0.02%); split: +0.02%, -0.00% Totals from 118675 (10.20% of 1163204) affected shaders: CodeSize: 2675917792 -> 2629509040 (-1.73%); split: -1.77%, +0.04% Number of GPRs: 7190170 -> 7169426 (-0.29%); split: -0.32%, +0.03% SLM Size: 2694216 -> 2694100 (-0.00%) Static cycle count: 3780817453 -> 3740423062 (-1.07%); split: -1.12%, +0.05% Spills to memory: 40938 -> 40891 (-0.11%) Fills from memory: 40938 -> 40891 (-0.11%) Spills to reg: 78989 -> 78928 (-0.08%); split: -0.14%, +0.06% Fills from reg: 83274 -> 83226 (-0.06%); split: -0.10%, +0.04% Max warps/SM: 4219736 -> 4229164 (+0.22%); split: +0.23%, -0.01% Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39525>
2026-05-06 15:58:05 +02:00 · 2026-01-25 13:27:49 +01:00 · 2026-01-25 13:27:49 +01:00 · 5890aedf8c
commit 5890aedf8c
parent e5bf1f5aff
4 changed files with 40 additions and 131 deletions
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -460,35 +460,6 @@ impl<'a> ShaderFromNir<'a> {
        self.get_ssa_ref(src).into()
    }

-    fn get_io_addr_offset(
-        &mut self,
-        addr: &nir_src,
-        imm_bits: u8,
-    ) -> (Src, i32) {
-        let addr = addr.as_def();
-        let addr_offset = unsafe {
-            nak_get_io_addr_offset(addr as *const _ as *mut _, imm_bits)
-        };
-
-        if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) {
-            let base_def = unsafe { base_def.as_ref() };
-            let base_comp = u8::try_from(addr_offset.base.comp).unwrap();
-            let (base, _) = self.get_ssa_comp(base_def, base_comp);
-            (base.into(), addr_offset.offset)
-        } else {
-            (SrcRef::Zero.into(), addr_offset.offset)
-        }
-    }
-
-    fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) {
-        let (off, off_imm) = self.get_io_addr_offset(addr, 16);
-        if let Ok(off_imm_u16) = u16::try_from(off_imm) {
-            (off, off_imm_u16)
-        } else {
-            (self.get_src(addr), 0)
-        }
-    }
-
    fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
        self.set_ssa(def, (*ssa).into());
    }
@ -2971,7 +2942,7 @@ impl<'a> ShaderFromNir<'a> {
            }
            nir_intrinsic_global_atomic_nv => {
                let bit_size = intrin.def.bit_size();
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let data = self.get_src(&srcs[1]);
                let atom_type = self.get_atomic_type(intrin);
                let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
@ -2993,7 +2964,7 @@ impl<'a> ShaderFromNir<'a> {
                    data: data,
                    atom_op: atom_op,
                    atom_type: atom_type,
-                    addr_offset: offset,
+                    addr_offset: intrin.base(),
                    mem_space: MemSpace::Global(MemAddrType::A64),
                    mem_order: MemOrder::Strong(MemScope::GPU),
                    mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
@ -3003,7 +2974,7 @@ impl<'a> ShaderFromNir<'a> {
            nir_intrinsic_global_atomic_swap_nv => {
                assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
                let bit_size = intrin.def.bit_size();
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let cmpr = self.get_src(&srcs[1]);
                let data = self.get_src(&srcs[2]);
                let atom_type = AtomType::U(bit_size);
@ -3018,7 +2989,7 @@ impl<'a> ShaderFromNir<'a> {
                    data: data,
                    atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
                    atom_type: atom_type,
-                    addr_offset: offset,
+                    addr_offset: intrin.base(),
                    mem_space: MemSpace::Global(MemAddrType::A64),
                    mem_order: MemOrder::Strong(MemScope::GPU),
                    mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
@ -3093,12 +3064,11 @@ impl<'a> ShaderFromNir<'a> {
            }
            nir_intrinsic_vild_nv => {
                let dst = b.alloc_ssa(RegFile::GPR);
-
-                let (idx, off) = self.get_io_addr_offset(&srcs[0], 8);
+                let idx = self.get_src(&srcs[0]);
                b.push_op(OpViLd {
                    dst: dst.into(),
                    idx,
-                    off: off.try_into().unwrap(),
+                    off: intrin.base().try_into().unwrap(),
                });
                self.set_dst(&intrin.def, dst.into());
            }
@ -3122,13 +3092,13 @@ impl<'a> ShaderFromNir<'a> {
                    eviction_priority: self
                        .get_eviction_priority(intrin.access()),
                };
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));

                b.push_op(OpLd {
                    dst: dst.clone().into(),
                    addr: addr,
-                    offset: offset,
+                    offset: intrin.base(),
                    access: access,
                });
                self.set_dst(&intrin.def, dst);
@ -3231,13 +3201,13 @@ impl<'a> ShaderFromNir<'a> {
                    order: MemOrder::Strong(MemScope::CTA),
                    eviction_priority: MemEvictionPriority::Normal,
                };
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));

                b.push_op(OpLd {
                    dst: dst.clone().into(),
                    addr: addr,
-                    offset: offset,
+                    offset: intrin.base(),
                    access: access,
                });
                self.set_dst(&intrin.def, dst);
@ -3252,14 +3222,13 @@ impl<'a> ShaderFromNir<'a> {
                    order: MemOrder::Strong(MemScope::CTA),
                    eviction_priority: MemEvictionPriority::Normal,
                };
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
-                let offset = offset + intrin.base();
+                let addr = self.get_src(&srcs[0]);
                let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));

                b.push_op(OpLd {
                    dst: dst.clone().into(),
                    addr: addr,
-                    offset: offset,
+                    offset: intrin.base(),
                    access: access,
                });
                self.set_dst(&intrin.def, dst);
@ -3268,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> {
                let size_B = intrin.def.bit_size() / 8;
                let mem_type = MemType::from_size(size_B, false);

-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
                let locked = b.alloc_ssa(RegFile::Pred);

@ -3276,7 +3245,7 @@ impl<'a> ShaderFromNir<'a> {
                    dst: dst.clone().into(),
                    locked: locked.into(),
                    addr,
-                    offset,
+                    offset: intrin.base(),
                    mem_type,
                });
                let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
@ -3319,7 +3288,8 @@ impl<'a> ShaderFromNir<'a> {
                    (intrin.def.bit_size() / 8) * intrin.def.num_components();
                let idx = &srcs[0];

-                let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
+                let off = self.get_src(&srcs[1]);
+                let off_imm = intrin.base() as u16;

                let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));

@ -3329,7 +3299,7 @@ impl<'a> ShaderFromNir<'a> {
                        buf: CBuf::Binding(idx_imm),
                        offset: off_imm,
                    };
-                    if off.is_zero() {
+                    if srcs[1].is_zero() {
                        for (i, comp) in dst.iter().enumerate() {
                            let i = u16::try_from(i).unwrap();
                            b.copy_to(
@ -3376,15 +3346,15 @@ impl<'a> ShaderFromNir<'a> {
                    (intrin.def.bit_size() / 8) * intrin.def.num_components();

                let handle = self.get_ssa_ref(&srcs[0]);
-                let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
+                let off = self.get_src(&srcs[1]);

                let cb = CBufRef {
                    buf: CBuf::BindlessSSA(handle[..].try_into().unwrap()),
-                    offset: off_imm,
+                    offset: intrin.base() as u16,
                };

                let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
-                if off.is_zero() {
+                if srcs[1].is_zero() {
                    for (i, comp) in dst.iter().enumerate() {
                        let i = u16::try_from(i).unwrap();
                        b.copy_to(
@ -3565,7 +3535,7 @@ impl<'a> ShaderFromNir<'a> {
            }
            nir_intrinsic_shared_atomic_nv => {
                let bit_size = intrin.def.bit_size();
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let data = self.get_src(&srcs[1]);
                let atom_type = self.get_atomic_type(intrin);
                let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
@ -3580,7 +3550,7 @@ impl<'a> ShaderFromNir<'a> {
                    data: data,
                    atom_op: atom_op,
                    atom_type: atom_type,
-                    addr_offset: offset,
+                    addr_offset: intrin.base(),
                    mem_space: MemSpace::Shared,
                    mem_order: MemOrder::Strong(MemScope::CTA),
                    mem_eviction_priority: MemEvictionPriority::Normal,
@ -3590,7 +3560,7 @@ impl<'a> ShaderFromNir<'a> {
            nir_intrinsic_shared_atomic_swap_nv => {
                assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
                let bit_size = intrin.def.bit_size();
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
+                let addr = self.get_src(&srcs[0]);
                let cmpr = self.get_src(&srcs[1]);
                let data = self.get_src(&srcs[2]);
                let atom_type = AtomType::U(bit_size);
@ -3605,7 +3575,7 @@ impl<'a> ShaderFromNir<'a> {
                    data: data,
                    atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
                    atom_type: atom_type,
-                    addr_offset: offset,
+                    addr_offset: intrin.base(),
                    mem_space: MemSpace::Shared,
                    mem_order: MemOrder::Strong(MemScope::CTA),
                    mem_eviction_priority: MemEvictionPriority::Normal,
@ -3628,12 +3598,12 @@ impl<'a> ShaderFromNir<'a> {
                    eviction_priority: self
                        .get_eviction_priority(intrin.access()),
                };
-                let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
+                let addr = self.get_src(&srcs[1]);

                b.push_op(OpSt {
                    addr: addr,
                    data: data,
-                    offset: offset,
+                    offset: intrin.base(),
                    access: access,
                });
            }
@ -3658,12 +3628,12 @@ impl<'a> ShaderFromNir<'a> {
                    order: MemOrder::Strong(MemScope::CTA),
                    eviction_priority: MemEvictionPriority::Normal,
                };
-                let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
+                let addr = self.get_src(&srcs[1]);

                b.push_op(OpSt {
                    addr: addr,
                    data: data,
-                    offset: offset,
+                    offset: intrin.base(),
                    access: access,
                });
            }
@ -3678,13 +3648,12 @@ impl<'a> ShaderFromNir<'a> {
                    order: MemOrder::Strong(MemScope::CTA),
                    eviction_priority: MemEvictionPriority::Normal,
                };
-                let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
-                let offset = offset + intrin.base();
+                let addr = self.get_src(&srcs[1]);

                b.push_op(OpSt {
                    addr: addr,
                    data: data,
-                    offset: offset,
+                    offset: intrin.base(),
                    access: access,
                });
            }
@ -3694,14 +3663,14 @@ impl<'a> ShaderFromNir<'a> {
                    (srcs[0].bit_size() / 8) * srcs[0].num_components();
                let mem_type = MemType::from_size(size_B, false);

-                let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
+                let addr = self.get_src(&srcs[1]);
                let locked = b.alloc_ssa(RegFile::Pred);

                b.push_op(OpStSCheckUnlock {
                    locked: locked.into(),
                    addr,
                    data,
-                    offset,
+                    offset: intrin.base(),
                    mem_type,
                });
                let locked_gpr = b.sel(locked.into(), 1.into(), 0.into());
@ -3795,14 +3764,13 @@ impl<'a> ShaderFromNir<'a> {
                    LdsmSize::M8N8
                };
                let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
-                let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
-                let offset = offset + intrin.base();
+                let addr = self.get_src(&srcs[0]);
                b.push_op(OpLdsm {
                    dst: dst.clone().into(),
                    mat_size,
                    mat_count,
                    addr,
-                    offset,
+                    offset: intrin.base(),
                });
                self.set_dst(&intrin.def, dst);
            }
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@ -1058,6 +1058,9 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
   return progress;
 }

+const static struct nir_opt_offsets_options nak_offset_options = {
+};
+
 void
 nak_postprocess_nir(nir_shader *nir,
                    const struct nak_compiler *nak,
@ -1194,6 +1197,7 @@ nak_postprocess_nir(nir_shader *nir,
   }

   OPT(nir, nak_nir_lower_load_store, nak);
+   OPT(nir, nir_opt_offsets, &nak_offset_options);

   OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
   OPT(nir, nir_lower_int64);
@ -1282,59 +1286,3 @@ nak_postprocess_nir(nir_shader *nir,
      nir_print_shader(nir, stderr);
   }
 }
-
-static bool
-scalar_is_imm_int(nir_scalar x, unsigned bits, bool is_signed)
-{
-   if (!nir_scalar_is_const(x))
-      return false;
-
-   if (is_signed) {
-      int64_t imm = nir_scalar_as_int(x);
-      return u_intN_min(bits) <= imm && imm <= u_intN_max(bits);
-   } else {
-      return nir_scalar_as_uint(x) < u_uintN_max(bits);
-   }
-}
-
-struct nak_io_addr_offset
-nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits)
-{
-   nir_scalar addr_s = {
-      .def = addr,
-      .comp = 0,
-   };
-
-   /* If the entire address is constant, it's an unsigned immediate */
-   if (scalar_is_imm_int(addr_s, imm_bits, false)) {
-      /* Base is a dumb name for this.  It should be offset */
-      return (struct nak_io_addr_offset) {
-         .offset = nir_scalar_as_int(addr_s),
-      };
-   }
-
-   addr_s = nir_scalar_chase_movs(addr_s);
-   if (!nir_scalar_is_alu(addr_s) ||
-       nir_scalar_alu_op(addr_s) != nir_op_iadd) {
-      return (struct nak_io_addr_offset) {
-         .base = addr_s,
-      };
-   }
-
-   for (unsigned i = 0; i < 2; i++) {
-      nir_scalar off_s = nir_scalar_chase_alu_src(addr_s, i);
-      off_s = nir_scalar_chase_movs(off_s);
-
-      /* If it's imm+indirect then the immediate is signed */
-      if (scalar_is_imm_int(off_s, imm_bits, true)) {
-         return (struct nak_io_addr_offset) {
-            .base = nir_scalar_chase_alu_src(addr_s, 1 - i),
-            .offset = nir_scalar_as_int(off_s),
-         };
-      }
-   }
-
-   return (struct nak_io_addr_offset) {
-      .base = addr_s,
-   };
-}
--- a/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c
+++ b/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c
@ -65,7 +65,8 @@ lower_ldcx_to_global(nir_builder *b, nir_intrinsic_instr *load,
         nir_iadd(b, addr, nir_u2u64(b, offset)),
         .align_mul = nir_intrinsic_align_mul(load),
         .align_offset = nir_intrinsic_align_offset(load),
-         .access = ACCESS_CAN_REORDER);
+         .access = ACCESS_CAN_REORDER,
+         .base = nir_intrinsic_base(load));
   }
   nir_pop_if(b, NULL);
   val = nir_if_phi(b, val, zero);
--- a/src/nouveau/compiler/nak_private.h
+++ b/src/nouveau/compiler/nak_private.h
@ -140,14 +140,6 @@ struct nak_xfb_info
 nak_xfb_from_nir(const struct nak_compiler *nak,
                 const struct nir_xfb_info *nir_xfb);

-struct nak_io_addr_offset {
-   nir_scalar base;
-   int32_t offset;
-};
-
-struct nak_io_addr_offset
-nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits);
-
 enum nak_nir_tex_ref_type {
   /** Indicates that this is a bindless texture */
   NAK_NIR_TEX_REF_TYPE_BINDLESS,