Merge branch 'nak-isbe-instrs' into 'main'

nak: Implement ISBEWR and extend ISBERD implementation

See merge request mesa/mesa!39716
This commit is contained in:
Mary Guillemard 2026-03-11 05:55:19 +01:00
commit 413f82cb85
15 changed files with 453 additions and 15 deletions

View file

@ -994,6 +994,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_dpas_intel:
case nir_intrinsic_convert_cmat_intel:
case nir_intrinsic_isberd_nv:
case nir_intrinsic_isbewr_nv:
case nir_intrinsic_vild_nv:
case nir_intrinsic_al2p_nv:
case nir_intrinsic_ald_nv:

View file

@ -2768,8 +2768,15 @@ intrinsic("ldcx_nv", dest_comp=0, src_comp=[1, 1],
flags=[CAN_ELIMINATE, CAN_REORDER])
intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64],
indices=[ACCESS, BASE, DIVERGENT], flags=[CAN_ELIMINATE])
intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { offset }.
# FLAGS is struct nak_nir_isbe_flags
intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[8, 16, 32],
indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS],
flags=[CAN_ELIMINATE])
# src[] = { data, offset }.
# FLAGS is struct nak_nir_isbe_flags
intrinsic("isbewr_nv", src_comp=[0, 1],
indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS], flags=[])
intrinsic("vild_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
flags=[CAN_ELIMINATE, CAN_REORDER],
indices=[BASE])

View file

@ -383,6 +383,10 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
/* Always signed offset */
case nir_intrinsic_cmat_load_shared_nv:
return try_fold_load_store(b, intrin, state, 0, -8388608, 0x7fffff, false);
case nir_intrinsic_isberd_nv:
return try_fold_load_store(b, intrin, state, 0, 0, get_max(state, intrin, 0), false);
case nir_intrinsic_isbewr_nv:
return try_fold_load_store(b, intrin, state, 1, 0, get_max(state, intrin, 0), false);
default:
return false;
}

View file

@ -3033,13 +3033,104 @@ impl<'a> ShaderFromNir<'a> {
self.set_dst(&intrin.def, dst.into());
}
nir_intrinsic_isberd_nv => {
let base = u16::try_from(intrin.range_base()).unwrap();
let range = u16::try_from(intrin.range()).unwrap();
let range = base..(base + range);
let flags = intrin.flags();
let flags: nak_nir_isbe_flags =
unsafe { std::mem::transmute_copy(&flags) };
assert!(intrin.def.num_components() == 1);
let size_B = intrin.def.bit_size() / 8;
let access_type = match flags.access() {
NAK_ISBE_ACCESS_MAP => IsbeAccessType::Map,
NAK_ISBE_ACCESS_PATCH => IsbeAccessType::Patch,
NAK_ISBE_ACCESS_PRIM => IsbeAccessType::Primitive,
NAK_ISBE_ACCESS_ATTR => IsbeAccessType::Attribute,
_ => panic!("Invalid ISBE access {}", flags.access()),
};
if matches!(access_type, IsbeAccessType::Attribute)
&& !flags.per_primitive()
&& !range.is_empty()
{
if let ShaderIoInfo::Vtg(io) = &mut self.info.io {
if flags.output() {
io.mark_attrs_written(range);
} else {
io.mark_attrs_read(range);
}
} else {
panic!("Must be a VTG stage");
}
}
let dst = b.alloc_ssa(RegFile::GPR);
b.push_op(OpIsberd {
dst: dst.into(),
idx: self.get_src(&srcs[0]),
offset: self.get_src(&srcs[0]),
imm_offset: u16::try_from(intrin.base()).unwrap(),
output: flags.output(),
skew: flags.skew(),
mem_type: MemType::from_size(size_B, false),
access_type,
});
self.set_dst(&intrin.def, dst.into());
}
nir_intrinsic_isbewr_nv => {
let base = u16::try_from(intrin.range_base()).unwrap();
let range = u16::try_from(intrin.range()).unwrap();
let range = base..(base + range);
let flags = intrin.flags();
let flags: nak_nir_isbe_flags =
unsafe { std::mem::transmute_copy(&flags) };
assert!(srcs[0].num_components() == 1);
let size_B = srcs[0].bit_size() / 8;
let access_type = match flags.access() {
NAK_ISBE_ACCESS_MAP => IsbeAccessType::Map,
NAK_ISBE_ACCESS_PATCH => {
panic!("PATCH access is invalid in ISBEWR")
}
NAK_ISBE_ACCESS_PRIM => {
panic!("PRIM access is invalid in ISBEWR")
}
NAK_ISBE_ACCESS_ATTR => IsbeAccessType::Attribute,
_ => panic!("Invalid ISBE access {}", flags.access()),
};
if matches!(access_type, IsbeAccessType::Attribute)
&& !flags.per_primitive()
&& !range.is_empty()
{
if let ShaderIoInfo::Vtg(io) = &mut self.info.io {
if flags.output() {
io.mark_store_req(range.clone());
io.mark_attrs_written(range);
} else {
io.mark_attrs_read(range);
}
} else {
panic!("Must be a VTG stage");
}
}
b.push_op(OpIsbewr {
offset: self.get_src(&srcs[1]),
imm_offset: u16::try_from(intrin.base()).unwrap(),
data: self.get_src(&srcs[0]),
output: flags.output(),
skew: flags.skew(),
mem_type: MemType::from_size(size_B, false),
access_type,
});
}
nir_intrinsic_vild_nv => {
let dst = b.alloc_ssa(RegFile::GPR);
let idx = self.get_src(&srcs[0]);

View file

@ -7233,23 +7233,108 @@ impl DisplayOp for OpCS2R {
}
impl_display_for_op!(OpCS2R);
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum IsbeAccessType {
Map,
Patch,
Primitive,
Attribute,
}
impl fmt::Display for IsbeAccessType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IsbeAccessType::Map => write!(f, "map"),
IsbeAccessType::Patch => write!(f, "patch"),
IsbeAccessType::Primitive => write!(f, "prim"),
IsbeAccessType::Attribute => write!(f, "attr"),
}
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIsberd {
#[dst_type(GPR)]
pub dst: Dst,
#[src_type(SSA)]
pub idx: Src,
#[src_type(GPR)]
pub offset: Src,
pub imm_offset: u16,
pub mem_type: MemType,
pub access_type: IsbeAccessType,
pub output: bool,
pub skew: bool,
}
impl DisplayOp for OpIsberd {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "isberd [{}]", self.idx)
write!(f, "isberd")?;
if self.output {
write!(f, ".o")?;
}
write!(f, ".{}", self.access_type)?;
if self.skew {
write!(f, ".skew")?;
}
write!(f, "{} {}", self.mem_type, self.dst)?;
if self.imm_offset != 0 {
write!(f, " [{}+0x{:x}]", self.offset, self.imm_offset)
} else {
write!(f, " [{}]", self.offset)
}
}
}
impl_display_for_op!(OpIsberd);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIsbewr {
#[src_type(GPR)]
pub offset: Src,
#[src_type(GPR)]
pub data: Src,
pub imm_offset: u16,
pub mem_type: MemType,
pub access_type: IsbeAccessType,
pub output: bool,
pub skew: bool,
}
impl DisplayOp for OpIsbewr {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "isbewr")?;
if self.output {
write!(f, ".o")?;
}
write!(f, ".{}", self.access_type)?;
if self.skew {
write!(f, ".skew")?;
}
write!(f, "{}", self.mem_type)?;
if self.imm_offset != 0 {
write!(f, " [{}+0x{:x}]", self.offset, self.imm_offset)?;
} else {
write!(f, " [{}]", self.offset)?;
}
write!(f, " {}", self.data)
}
}
impl_display_for_op!(OpIsbewr);
/// Vertex Index Load
/// (Only available in Kepler)
///
@ -8094,6 +8179,7 @@ pub enum Op {
TexDepBar(Box<OpTexDepBar>),
CS2R(Box<OpCS2R>),
Isberd(Box<OpIsberd>),
Isbewr(Box<OpIsbewr>),
ViLd(Box<OpViLd>),
Kill(Box<OpKill>),
Nop(OpNop),
@ -8279,6 +8365,7 @@ impl Op {
| Op::TexDepBar(_)
| Op::CS2R(_)
| Op::Isberd(_)
| Op::Isbewr(_)
| Op::ViLd(_)
| Op::Kill(_)
| Op::PixLd(_)
@ -8461,6 +8548,7 @@ impl Op {
| Op::TexDepBar(_)
| Op::CS2R(_)
| Op::Isberd(_)
| Op::Isbewr(_)
| Op::ViLd(_)
| Op::Kill(_)
| Op::PixLd(_)
@ -8856,6 +8944,7 @@ impl Instr {
| Op::RegOut(_)
| Op::Out(_)
| Op::OutFinal(_)
| Op::Isbewr(_)
| Op::Annotate(_) => false,
Op::BMov(op) => !op.clear,
_ => true,
@ -9662,8 +9751,14 @@ impl IsbeSpaceSharingStateTracker {
}
pub fn visit_instr(&mut self, instr: &Instr) {
// Track attribute store. (XXX: ISBEWR)
self.has_attribute_store |= matches!(instr.op, Op::ASt(_));
// Track attribute store.
match &instr.op {
Op::ASt(_) => self.has_attribute_store = true,
Op::Isbewr(op) if op.access_type == IsbeAccessType::Attribute => {
self.has_attribute_store = true;
}
_ => {}
}
// Track attribute load.
if matches!(instr.op, Op::ALd(_) | Op::Isberd(_)) {

View file

@ -882,3 +882,130 @@ pub fn test_plop3() {
c.check(sm);
}
}
#[test]
pub fn test_isberd() {
let r1 = RegRef::new(RegFile::GPR, 1, 1);
let r2 = RegRef::new(RegFile::GPR, 2, 1);
let mem_types = [
(MemType::U8, ""),
(MemType::U16, ".u16"),
(MemType::B32, ".32"),
];
let output_type = [(false, ""), (true, ".o")];
let skew_type = [(false, ""), (true, ".skew")];
let access_type_list = [
(IsbeAccessType::Map, ""),
(IsbeAccessType::Patch, ".patch"),
(IsbeAccessType::Primitive, ".prim"),
(IsbeAccessType::Attribute, ".attr"),
];
for &sm in sm_list() {
if sm < 50 {
continue;
}
let mut c = DisasmCheck::new();
for (output, output_str) in output_type {
for (access_type, access_type_str) in access_type_list {
if access_type != IsbeAccessType::Map && sm < 75 {
continue;
}
for (skew, skew_str) in skew_type {
if skew && sm < 75 {
continue;
}
for (mem_type, mem_type_str) in mem_types {
if mem_type != MemType::U8 && sm < 75 {
continue;
}
for imm_offset in [0, 0x42] {
if imm_offset != 0 && sm < 86 {
continue;
}
let instr = OpIsberd {
dst: Dst::Reg(r1),
offset: SrcRef::Reg(r2).into(),
imm_offset,
mem_type,
access_type,
output,
skew,
};
let disasm = if imm_offset != 0 {
format!("isberd{output_str}{access_type_str}{skew_str}{mem_type_str} r1, [r2+0x{imm_offset:x}];")
} else {
format!("isberd{output_str}{access_type_str}{skew_str}{mem_type_str} r1, [r2];")
};
c.push(instr, disasm);
}
}
}
}
}
c.check(sm);
}
}
#[test]
pub fn test_isbewr() {
let r1 = RegRef::new(RegFile::GPR, 1, 1);
let r2 = RegRef::new(RegFile::GPR, 2, 1);
let mem_types = [
(MemType::U8, ""),
(MemType::U16, ".u16"),
(MemType::B32, ".32"),
];
let skew_type = [(false, ""), (true, ".skew")];
let access_type_list = [
(IsbeAccessType::Map, ""),
(IsbeAccessType::Attribute, ".attr"),
];
for &sm in sm_list() {
if sm < 75 {
continue;
}
let mut c = DisasmCheck::new();
for (access_type, access_type_str) in access_type_list {
for (skew, skew_str) in skew_type {
for (mem_type, mem_type_str) in mem_types {
for imm_offset in [0, 0x42] {
if imm_offset != 0 && sm < 86 {
continue;
}
let instr = OpIsbewr {
offset: SrcRef::Reg(r2).into(),
data: SrcRef::Reg(r1).into(),
imm_offset,
mem_type,
access_type,
output: true,
skew,
};
let disasm = if imm_offset != 0 {
format!("isbewr.o{access_type_str}{skew_str}{mem_type_str} [r2+0x{imm_offset:x}], r1;")
} else {
format!("isbewr.o{access_type_str}{skew_str}{mem_type_str} [r2], r1;")
};
c.push(instr, disasm);
}
}
}
}
c.check(sm);
}
}

View file

@ -213,6 +213,7 @@ pub fn side_effect_type(op: &Op) -> SideEffect {
| Op::TexDepBar(_)
| Op::CS2R(_)
| Op::Isberd(_)
| Op::Isbewr(_)
| Op::ViLd(_)
| Op::Kill(_)
| Op::S2R(_) => SideEffect::Barrier,
@ -329,6 +330,7 @@ pub fn estimate_variable_latency(sm: &ShaderModelInfo, op: &Op) -> u32 {
| Op::TexDepBar(_)
| Op::CS2R(_)
| Op::Isberd(_)
| Op::Isbewr(_)
| Op::ViLd(_)
| Op::Kill(_)
| Op::PixLd(_)

View file

@ -159,6 +159,7 @@ fn op_reg_latency(op: &Op, reader: bool, op_reg_idx: usize) -> RegLatencySM100 {
Op::SuAtom(_) => Decoupled,
Op::PixLd(_) => DecoupledAgu,
Op::Isberd(_) => DecoupledAgu,
Op::Isbewr(_) => DecoupledAgu,
Op::LdTram(_) => DecoupledAgu,
Op::Shfl(_) => DecoupledAgu,
Op::Ldsm(_) => DecoupledAgu,

View file

@ -3153,9 +3153,13 @@ impl SM50Op for OpIsberd {
}
fn encode(&self, e: &mut SM50Encoder<'_>) {
assert!(
self.access_type == IsbeAccessType::Map && self.imm_offset == 0
);
e.set_opcode(0xefd0);
e.set_dst(&self.dst);
e.set_reg_src(8..16, &self.idx);
e.set_reg_src(8..16, &self.offset);
}
}

View file

@ -3751,14 +3751,76 @@ impl SM70Op for OpCS2R {
}
impl SM70Op for OpIsberd {
fn legalize(&mut self, _b: &mut LegalizeBuilder) {
// Nothing to do
fn legalize(&mut self, b: &mut LegalizeBuilder) {
b.copy_src_if_uniform(&mut self.offset);
}
fn encode(&self, e: &mut SM70Encoder<'_>) {
// The immediate offset is only supported on SM86+
assert!(e.sm >= 86 || self.imm_offset == 0);
e.set_opcode(0x923);
e.set_dst(&self.dst);
e.set_reg_src(24..32, &self.idx);
e.set_reg_src(24..32, &self.offset);
e.set_field(40..56, self.imm_offset);
e.set_field(
74..76,
match self.mem_type {
MemType::U8 => 0_u8,
MemType::U16 => 1_u8,
MemType::B32 => 2_u8,
_ => panic!("Invalid ISBERD mem type"),
},
);
e.set_field(
76..78,
match self.access_type {
IsbeAccessType::Map => 0_u8,
IsbeAccessType::Patch => 1_u8,
IsbeAccessType::Primitive => 2_u8,
IsbeAccessType::Attribute => 3_u8,
},
);
e.set_bit(78, self.skew);
e.set_bit(79, self.output);
}
}
impl SM70Op for OpIsbewr {
fn legalize(&mut self, b: &mut LegalizeBuilder) {
b.copy_src_if_uniform(&mut self.offset);
b.copy_src_if_uniform(&mut self.data);
}
fn encode(&self, e: &mut SM70Encoder<'_>) {
assert!(e.sm >= 75);
// The immediate offset is only supported on SM86+
assert!(e.sm >= 86 || self.imm_offset == 0);
e.set_opcode(0x927);
e.set_reg_src(24..32, &self.offset);
e.set_reg_src(32..40, &self.data);
e.set_field(40..56, self.imm_offset);
e.set_field(
74..76,
match self.mem_type {
MemType::U8 => 0_u8,
MemType::U16 => 1_u8,
MemType::B32 => 2_u8,
_ => panic!("Invalid ISBEWR mem type"),
},
);
e.set_field(
76..78,
match self.access_type {
IsbeAccessType::Map => 0_u8,
IsbeAccessType::Attribute => 3_u8,
_ => panic!("Invalid ISBEWR access type"),
},
);
e.set_bit(78, self.skew);
e.set_bit(79, self.output);
}
}
@ -4156,6 +4218,7 @@ macro_rules! sm70_op_match {
Op::Bar($x) => $y,
Op::CS2R($x) => $y,
Op::Isberd($x) => $y,
Op::Isbewr($x) => $y,
Op::Kill($x) => $y,
Op::Nop($x) => $y,
Op::PixLd($x) => $y,

View file

@ -194,6 +194,7 @@ impl RegLatencySM75 {
Op::SuAtom(_) => Decoupled,
Op::PixLd(_) => Decoupled,
Op::Isberd(_) => Decoupled,
Op::Isbewr(_) => Decoupled,
Op::LdTram(_) => Decoupled,
Op::Shfl(_) => Decoupled,
Op::Ldsm(_) => Decoupled,

View file

@ -243,6 +243,7 @@ impl RegLatencySM80 {
Op::SuAtom(_) => Decoupled,
Op::PixLd(_) => DecoupledAgu,
Op::Isberd(_) => DecoupledAgu,
Op::Isbewr(_) => DecoupledAgu,
Op::LdTram(_) => DecoupledAgu,
Op::Shfl(_) => DecoupledAgu,
Op::Ldsm(_) => DecoupledAgu,

View file

@ -1148,8 +1148,19 @@ nak_nir_opt_offset_shift_nv(nir_shader *nir, const struct nak_compiler *nak)
);
}
const static struct nir_opt_offsets_options nak_offset_options = {
};
static uint32_t
nak_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data)
{
const struct nak_compiler *nak = data;
switch (intrin->intrinsic) {
case nir_intrinsic_isberd_nv:
case nir_intrinsic_isbewr_nv:
return nak->sm >= 86 ? UINT16_MAX : 0;
default:
return 0;
}
}
void
nak_postprocess_nir(nir_shader *nir,
@ -1289,6 +1300,11 @@ nak_postprocess_nir(nir_shader *nir,
}
OPT(nir, nak_nir_lower_load_store, nak);
struct nir_opt_offsets_options nak_offset_options = {
.max_offset_cb = nak_nir_max_imm_offset,
.cb_data = nak,
};
OPT(nir, nir_opt_offsets, &nak_offset_options);
/* Should run after nir_opt_offsets, because nir_opt_algebraic will move

View file

@ -144,7 +144,17 @@ lower_vtg_io_intrin(nir_builder *b,
nir_def *lo = nir_extract_u8_imm(b, info, 0);
nir_def *hi = nir_extract_u8_imm(b, info, 2);
nir_def *idx = nir_iadd(b, nir_imul(b, lo, hi), vtx);
vtx = nir_isberd_nv(b, idx);
const struct nak_nir_isbe_flags flags = {
.access = NAK_ISBE_ACCESS_MAP,
.output = false,
.skew = false,
.per_primitive = false,
};
vtx = nir_isberd_nv(b, 8, idx,
.flags = NAK_AS_U32(flags),
.access = ACCESS_CAN_REORDER);
} else {
vtx = nir_vild_nv(b, vtx);
}

View file

@ -257,6 +257,21 @@ struct nak_nir_imadsp_flags {
bool nak_nir_lower_vtg_io(nir_shader *nir, const struct nak_compiler *nak);
enum nak_isbe_access {
NAK_ISBE_ACCESS_MAP,
NAK_ISBE_ACCESS_PATCH,
NAK_ISBE_ACCESS_PRIM,
NAK_ISBE_ACCESS_ATTR,
};
struct nak_nir_isbe_flags {
enum nak_isbe_access access : 2;
bool output : 1;
bool skew : 1;
bool per_primitive : 1;
uint32_t pad : 27;
};
enum nak_interp_mode {
NAK_INTERP_MODE_PERSPECTIVE,
NAK_INTERP_MODE_SCREEN_LINEAR,