diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index ba9388b9196..4faba43fc5b 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -460,35 +460,6 @@ impl<'a> ShaderFromNir<'a> { self.get_ssa_ref(src).into() } - fn get_io_addr_offset( - &mut self, - addr: &nir_src, - imm_bits: u8, - ) -> (Src, i32) { - let addr = addr.as_def(); - let addr_offset = unsafe { - nak_get_io_addr_offset(addr as *const _ as *mut _, imm_bits) - }; - - if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) { - let base_def = unsafe { base_def.as_ref() }; - let base_comp = u8::try_from(addr_offset.base.comp).unwrap(); - let (base, _) = self.get_ssa_comp(base_def, base_comp); - (base.into(), addr_offset.offset) - } else { - (SrcRef::Zero.into(), addr_offset.offset) - } - } - - fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) { - let (off, off_imm) = self.get_io_addr_offset(addr, 16); - if let Ok(off_imm_u16) = u16::try_from(off_imm) { - (off, off_imm_u16) - } else { - (self.get_src(addr), 0) - } - } - fn set_dst(&mut self, def: &nir_def, ssa: SSARef) { self.set_ssa(def, (*ssa).into()); } @@ -2971,7 +2942,7 @@ impl<'a> ShaderFromNir<'a> { } nir_intrinsic_global_atomic_nv => { let bit_size = intrin.def.bit_size(); - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let data = self.get_src(&srcs[1]); let atom_type = self.get_atomic_type(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate); @@ -2993,7 +2964,7 @@ impl<'a> ShaderFromNir<'a> { data: data, atom_op: atom_op, atom_type: atom_type, - addr_offset: offset, + addr_offset: intrin.base(), mem_space: MemSpace::Global(MemAddrType::A64), mem_order: MemOrder::Strong(MemScope::GPU), mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access @@ -3003,7 +2974,7 @@ impl<'a> ShaderFromNir<'a> { nir_intrinsic_global_atomic_swap_nv => { assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg); let bit_size = intrin.def.bit_size(); - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let cmpr = self.get_src(&srcs[1]); let data = self.get_src(&srcs[2]); let atom_type = AtomType::U(bit_size); @@ -3018,7 +2989,7 @@ impl<'a> ShaderFromNir<'a> { data: data, atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate), atom_type: atom_type, - addr_offset: offset, + addr_offset: intrin.base(), mem_space: MemSpace::Global(MemAddrType::A64), mem_order: MemOrder::Strong(MemScope::GPU), mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access @@ -3093,12 +3064,11 @@ impl<'a> ShaderFromNir<'a> { } nir_intrinsic_vild_nv => { let dst = b.alloc_ssa(RegFile::GPR); - - let (idx, off) = self.get_io_addr_offset(&srcs[0], 8); + let idx = self.get_src(&srcs[0]); b.push_op(OpViLd { dst: dst.into(), idx, - off: off.try_into().unwrap(), + off: intrin.base().try_into().unwrap(), }); self.set_dst(&intrin.def, dst.into()); } @@ -3122,13 +3092,13 @@ impl<'a> ShaderFromNir<'a> { eviction_priority: self .get_eviction_priority(intrin.access()), }; - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); b.push_op(OpLd { dst: dst.clone().into(), addr: addr, - offset: offset, + offset: intrin.base(), access: access, }); self.set_dst(&intrin.def, dst); @@ -3231,13 +3201,13 @@ impl<'a> ShaderFromNir<'a> { order: MemOrder::Strong(MemScope::CTA), eviction_priority: MemEvictionPriority::Normal, }; - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); b.push_op(OpLd { dst: dst.clone().into(), addr: addr, - offset: offset, + offset: intrin.base(), access: access, }); self.set_dst(&intrin.def, dst); @@ -3252,14 +3222,13 @@ impl<'a> ShaderFromNir<'a> { order: MemOrder::Strong(MemScope::CTA), eviction_priority: MemEvictionPriority::Normal, }; - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); - let offset = offset + intrin.base(); + let addr = self.get_src(&srcs[0]); let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); b.push_op(OpLd { dst: dst.clone().into(), addr: addr, - offset: offset, + offset: intrin.base(), access: access, }); self.set_dst(&intrin.def, dst); @@ -3268,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> { let size_B = intrin.def.bit_size() / 8; let mem_type = MemType::from_size(size_B, false); - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); let locked = b.alloc_ssa(RegFile::Pred); @@ -3276,7 +3245,7 @@ impl<'a> ShaderFromNir<'a> { dst: dst.clone().into(), locked: locked.into(), addr, - offset, + offset: intrin.base(), mem_type, }); let locked_gpr = b.sel(locked.into(), 1.into(), 0.into()); @@ -3319,7 +3288,8 @@ impl<'a> ShaderFromNir<'a> { (intrin.def.bit_size() / 8) * intrin.def.num_components(); let idx = &srcs[0]; - let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]); + let off = self.get_src(&srcs[1]); + let off_imm = intrin.base() as u16; let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); @@ -3329,7 +3299,7 @@ impl<'a> ShaderFromNir<'a> { buf: CBuf::Binding(idx_imm), offset: off_imm, }; - if off.is_zero() { + if srcs[1].is_zero() { for (i, comp) in dst.iter().enumerate() { let i = u16::try_from(i).unwrap(); b.copy_to( @@ -3376,15 +3346,15 @@ impl<'a> ShaderFromNir<'a> { (intrin.def.bit_size() / 8) * intrin.def.num_components(); let handle = self.get_ssa_ref(&srcs[0]); - let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]); + let off = self.get_src(&srcs[1]); let cb = CBufRef { buf: CBuf::BindlessSSA(handle[..].try_into().unwrap()), - offset: off_imm, + offset: intrin.base() as u16, }; let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); - if off.is_zero() { + if srcs[1].is_zero() { for (i, comp) in dst.iter().enumerate() { let i = u16::try_from(i).unwrap(); b.copy_to( @@ -3565,7 +3535,7 @@ impl<'a> ShaderFromNir<'a> { } nir_intrinsic_shared_atomic_nv => { let bit_size = intrin.def.bit_size(); - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let data = self.get_src(&srcs[1]); let atom_type = self.get_atomic_type(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate); @@ -3580,7 +3550,7 @@ impl<'a> ShaderFromNir<'a> { data: data, atom_op: atom_op, atom_type: atom_type, - addr_offset: offset, + addr_offset: intrin.base(), mem_space: MemSpace::Shared, mem_order: MemOrder::Strong(MemScope::CTA), mem_eviction_priority: MemEvictionPriority::Normal, @@ -3590,7 +3560,7 @@ impl<'a> ShaderFromNir<'a> { nir_intrinsic_shared_atomic_swap_nv => { assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg); let bit_size = intrin.def.bit_size(); - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let addr = self.get_src(&srcs[0]); let cmpr = self.get_src(&srcs[1]); let data = self.get_src(&srcs[2]); let atom_type = AtomType::U(bit_size); @@ -3605,7 +3575,7 @@ impl<'a> ShaderFromNir<'a> { data: data, atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate), atom_type: atom_type, - addr_offset: offset, + addr_offset: intrin.base(), mem_space: MemSpace::Shared, mem_order: MemOrder::Strong(MemScope::CTA), mem_eviction_priority: MemEvictionPriority::Normal, @@ -3628,12 +3598,12 @@ impl<'a> ShaderFromNir<'a> { eviction_priority: self .get_eviction_priority(intrin.access()), }; - let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24); + let addr = self.get_src(&srcs[1]); b.push_op(OpSt { addr: addr, data: data, - offset: offset, + offset: intrin.base(), access: access, }); } @@ -3658,12 +3628,12 @@ impl<'a> ShaderFromNir<'a> { order: MemOrder::Strong(MemScope::CTA), eviction_priority: MemEvictionPriority::Normal, }; - let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24); + let addr = self.get_src(&srcs[1]); b.push_op(OpSt { addr: addr, data: data, - offset: offset, + offset: intrin.base(), access: access, }); } @@ -3678,13 +3648,12 @@ impl<'a> ShaderFromNir<'a> { order: MemOrder::Strong(MemScope::CTA), eviction_priority: MemEvictionPriority::Normal, }; - let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24); - let offset = offset + intrin.base(); + let addr = self.get_src(&srcs[1]); b.push_op(OpSt { addr: addr, data: data, - offset: offset, + offset: intrin.base(), access: access, }); } @@ -3694,14 +3663,14 @@ impl<'a> ShaderFromNir<'a> { (srcs[0].bit_size() / 8) * srcs[0].num_components(); let mem_type = MemType::from_size(size_B, false); - let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24); + let addr = self.get_src(&srcs[1]); let locked = b.alloc_ssa(RegFile::Pred); b.push_op(OpStSCheckUnlock { locked: locked.into(), addr, data, - offset, + offset: intrin.base(), mem_type, }); let locked_gpr = b.sel(locked.into(), 1.into(), 0.into()); @@ -3795,14 +3764,13 @@ impl<'a> ShaderFromNir<'a> { LdsmSize::M8N8 }; let dst = b.alloc_ssa_vec(RegFile::GPR, comps); - let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); - let offset = offset + intrin.base(); + let addr = self.get_src(&srcs[0]); b.push_op(OpLdsm { dst: dst.clone().into(), mat_size, mat_count, addr, - offset, + offset: intrin.base(), }); self.set_dst(&intrin.def, dst); } diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 45253a3976f..0c4dac5824a 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1058,6 +1058,9 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak) return progress; } +const static struct nir_opt_offsets_options nak_offset_options = { +}; + void nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak, @@ -1194,6 +1197,7 @@ nak_postprocess_nir(nir_shader *nir, } OPT(nir, nak_nir_lower_load_store, nak); + OPT(nir, nir_opt_offsets, &nak_offset_options); OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options); OPT(nir, nir_lower_int64); @@ -1282,59 +1286,3 @@ nak_postprocess_nir(nir_shader *nir, nir_print_shader(nir, stderr); } } - -static bool -scalar_is_imm_int(nir_scalar x, unsigned bits, bool is_signed) -{ - if (!nir_scalar_is_const(x)) - return false; - - if (is_signed) { - int64_t imm = nir_scalar_as_int(x); - return u_intN_min(bits) <= imm && imm <= u_intN_max(bits); - } else { - return nir_scalar_as_uint(x) < u_uintN_max(bits); - } -} - -struct nak_io_addr_offset -nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits) -{ - nir_scalar addr_s = { - .def = addr, - .comp = 0, - }; - - /* If the entire address is constant, it's an unsigned immediate */ - if (scalar_is_imm_int(addr_s, imm_bits, false)) { - /* Base is a dumb name for this. It should be offset */ - return (struct nak_io_addr_offset) { - .offset = nir_scalar_as_int(addr_s), - }; - } - - addr_s = nir_scalar_chase_movs(addr_s); - if (!nir_scalar_is_alu(addr_s) || - nir_scalar_alu_op(addr_s) != nir_op_iadd) { - return (struct nak_io_addr_offset) { - .base = addr_s, - }; - } - - for (unsigned i = 0; i < 2; i++) { - nir_scalar off_s = nir_scalar_chase_alu_src(addr_s, i); - off_s = nir_scalar_chase_movs(off_s); - - /* If it's imm+indirect then the immediate is signed */ - if (scalar_is_imm_int(off_s, imm_bits, true)) { - return (struct nak_io_addr_offset) { - .base = nir_scalar_chase_alu_src(addr_s, 1 - i), - .offset = nir_scalar_as_int(off_s), - }; - } - } - - return (struct nak_io_addr_offset) { - .base = addr_s, - }; -} diff --git a/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c b/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c index 24034533069..4f240740272 100644 --- a/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c +++ b/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c @@ -65,7 +65,8 @@ lower_ldcx_to_global(nir_builder *b, nir_intrinsic_instr *load, nir_iadd(b, addr, nir_u2u64(b, offset)), .align_mul = nir_intrinsic_align_mul(load), .align_offset = nir_intrinsic_align_offset(load), - .access = ACCESS_CAN_REORDER); + .access = ACCESS_CAN_REORDER, + .base = nir_intrinsic_base(load)); } nir_pop_if(b, NULL); val = nir_if_phi(b, val, zero); diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index 57ec68f7885..7c65d8f6662 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -140,14 +140,6 @@ struct nak_xfb_info nak_xfb_from_nir(const struct nak_compiler *nak, const struct nir_xfb_info *nir_xfb); -struct nak_io_addr_offset { - nir_scalar base; - int32_t offset; -}; - -struct nak_io_addr_offset -nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits); - enum nak_nir_tex_ref_type { /** Indicates that this is a bindless texture */ NAK_NIR_TEX_REF_TYPE_BINDLESS,