mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-11 11:20:41 +01:00
Merge branch 'nak-isbe-instrs' into 'main'
nak: Implement ISBEWR and extend ISBERD implementation See merge request mesa/mesa!39716
This commit is contained in:
commit
413f82cb85
15 changed files with 453 additions and 15 deletions
|
|
@ -994,6 +994,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_dpas_intel:
|
||||
case nir_intrinsic_convert_cmat_intel:
|
||||
case nir_intrinsic_isberd_nv:
|
||||
case nir_intrinsic_isbewr_nv:
|
||||
case nir_intrinsic_vild_nv:
|
||||
case nir_intrinsic_al2p_nv:
|
||||
case nir_intrinsic_ald_nv:
|
||||
|
|
|
|||
|
|
@ -2768,8 +2768,15 @@ intrinsic("ldcx_nv", dest_comp=0, src_comp=[1, 1],
|
|||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64],
|
||||
indices=[ACCESS, BASE, DIVERGENT], flags=[CAN_ELIMINATE])
|
||||
intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { offset }.
|
||||
# FLAGS is struct nak_nir_isbe_flags
|
||||
intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[8, 16, 32],
|
||||
indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS],
|
||||
flags=[CAN_ELIMINATE])
|
||||
# src[] = { data, offset }.
|
||||
# FLAGS is struct nak_nir_isbe_flags
|
||||
intrinsic("isbewr_nv", src_comp=[0, 1],
|
||||
indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS], flags=[])
|
||||
intrinsic("vild_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER],
|
||||
indices=[BASE])
|
||||
|
|
|
|||
|
|
@ -383,6 +383,10 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
|
|||
/* Always signed offset */
|
||||
case nir_intrinsic_cmat_load_shared_nv:
|
||||
return try_fold_load_store(b, intrin, state, 0, -8388608, 0x7fffff, false);
|
||||
case nir_intrinsic_isberd_nv:
|
||||
return try_fold_load_store(b, intrin, state, 0, 0, get_max(state, intrin, 0), false);
|
||||
case nir_intrinsic_isbewr_nv:
|
||||
return try_fold_load_store(b, intrin, state, 1, 0, get_max(state, intrin, 0), false);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3033,13 +3033,104 @@ impl<'a> ShaderFromNir<'a> {
|
|||
self.set_dst(&intrin.def, dst.into());
|
||||
}
|
||||
nir_intrinsic_isberd_nv => {
|
||||
let base = u16::try_from(intrin.range_base()).unwrap();
|
||||
let range = u16::try_from(intrin.range()).unwrap();
|
||||
let range = base..(base + range);
|
||||
|
||||
let flags = intrin.flags();
|
||||
let flags: nak_nir_isbe_flags =
|
||||
unsafe { std::mem::transmute_copy(&flags) };
|
||||
|
||||
assert!(intrin.def.num_components() == 1);
|
||||
|
||||
let size_B = intrin.def.bit_size() / 8;
|
||||
|
||||
let access_type = match flags.access() {
|
||||
NAK_ISBE_ACCESS_MAP => IsbeAccessType::Map,
|
||||
NAK_ISBE_ACCESS_PATCH => IsbeAccessType::Patch,
|
||||
NAK_ISBE_ACCESS_PRIM => IsbeAccessType::Primitive,
|
||||
NAK_ISBE_ACCESS_ATTR => IsbeAccessType::Attribute,
|
||||
_ => panic!("Invalid ISBE access {}", flags.access()),
|
||||
};
|
||||
|
||||
if matches!(access_type, IsbeAccessType::Attribute)
|
||||
&& !flags.per_primitive()
|
||||
&& !range.is_empty()
|
||||
{
|
||||
if let ShaderIoInfo::Vtg(io) = &mut self.info.io {
|
||||
if flags.output() {
|
||||
io.mark_attrs_written(range);
|
||||
} else {
|
||||
io.mark_attrs_read(range);
|
||||
}
|
||||
} else {
|
||||
panic!("Must be a VTG stage");
|
||||
}
|
||||
}
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR);
|
||||
b.push_op(OpIsberd {
|
||||
dst: dst.into(),
|
||||
idx: self.get_src(&srcs[0]),
|
||||
offset: self.get_src(&srcs[0]),
|
||||
imm_offset: u16::try_from(intrin.base()).unwrap(),
|
||||
output: flags.output(),
|
||||
skew: flags.skew(),
|
||||
mem_type: MemType::from_size(size_B, false),
|
||||
access_type,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst.into());
|
||||
}
|
||||
nir_intrinsic_isbewr_nv => {
|
||||
let base = u16::try_from(intrin.range_base()).unwrap();
|
||||
let range = u16::try_from(intrin.range()).unwrap();
|
||||
let range = base..(base + range);
|
||||
|
||||
let flags = intrin.flags();
|
||||
let flags: nak_nir_isbe_flags =
|
||||
unsafe { std::mem::transmute_copy(&flags) };
|
||||
|
||||
assert!(srcs[0].num_components() == 1);
|
||||
|
||||
let size_B = srcs[0].bit_size() / 8;
|
||||
|
||||
let access_type = match flags.access() {
|
||||
NAK_ISBE_ACCESS_MAP => IsbeAccessType::Map,
|
||||
NAK_ISBE_ACCESS_PATCH => {
|
||||
panic!("PATCH access is invalid in ISBEWR")
|
||||
}
|
||||
NAK_ISBE_ACCESS_PRIM => {
|
||||
panic!("PRIM access is invalid in ISBEWR")
|
||||
}
|
||||
NAK_ISBE_ACCESS_ATTR => IsbeAccessType::Attribute,
|
||||
_ => panic!("Invalid ISBE access {}", flags.access()),
|
||||
};
|
||||
|
||||
if matches!(access_type, IsbeAccessType::Attribute)
|
||||
&& !flags.per_primitive()
|
||||
&& !range.is_empty()
|
||||
{
|
||||
if let ShaderIoInfo::Vtg(io) = &mut self.info.io {
|
||||
if flags.output() {
|
||||
io.mark_store_req(range.clone());
|
||||
io.mark_attrs_written(range);
|
||||
} else {
|
||||
io.mark_attrs_read(range);
|
||||
}
|
||||
} else {
|
||||
panic!("Must be a VTG stage");
|
||||
}
|
||||
}
|
||||
|
||||
b.push_op(OpIsbewr {
|
||||
offset: self.get_src(&srcs[1]),
|
||||
imm_offset: u16::try_from(intrin.base()).unwrap(),
|
||||
data: self.get_src(&srcs[0]),
|
||||
output: flags.output(),
|
||||
skew: flags.skew(),
|
||||
mem_type: MemType::from_size(size_B, false),
|
||||
access_type,
|
||||
});
|
||||
}
|
||||
nir_intrinsic_vild_nv => {
|
||||
let dst = b.alloc_ssa(RegFile::GPR);
|
||||
let idx = self.get_src(&srcs[0]);
|
||||
|
|
|
|||
|
|
@ -7233,23 +7233,108 @@ impl DisplayOp for OpCS2R {
|
|||
}
|
||||
impl_display_for_op!(OpCS2R);
|
||||
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub enum IsbeAccessType {
|
||||
Map,
|
||||
Patch,
|
||||
Primitive,
|
||||
Attribute,
|
||||
}
|
||||
|
||||
impl fmt::Display for IsbeAccessType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IsbeAccessType::Map => write!(f, "map"),
|
||||
IsbeAccessType::Patch => write!(f, "patch"),
|
||||
IsbeAccessType::Primitive => write!(f, "prim"),
|
||||
IsbeAccessType::Attribute => write!(f, "attr"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIsberd {
|
||||
#[dst_type(GPR)]
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(SSA)]
|
||||
pub idx: Src,
|
||||
#[src_type(GPR)]
|
||||
pub offset: Src,
|
||||
|
||||
pub imm_offset: u16,
|
||||
pub mem_type: MemType,
|
||||
pub access_type: IsbeAccessType,
|
||||
pub output: bool,
|
||||
pub skew: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpIsberd {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "isberd [{}]", self.idx)
|
||||
write!(f, "isberd")?;
|
||||
|
||||
if self.output {
|
||||
write!(f, ".o")?;
|
||||
}
|
||||
|
||||
write!(f, ".{}", self.access_type)?;
|
||||
|
||||
if self.skew {
|
||||
write!(f, ".skew")?;
|
||||
}
|
||||
|
||||
write!(f, "{} {}", self.mem_type, self.dst)?;
|
||||
|
||||
if self.imm_offset != 0 {
|
||||
write!(f, " [{}+0x{:x}]", self.offset, self.imm_offset)
|
||||
} else {
|
||||
write!(f, " [{}]", self.offset)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpIsberd);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIsbewr {
|
||||
#[src_type(GPR)]
|
||||
pub offset: Src,
|
||||
|
||||
#[src_type(GPR)]
|
||||
pub data: Src,
|
||||
|
||||
pub imm_offset: u16,
|
||||
pub mem_type: MemType,
|
||||
pub access_type: IsbeAccessType,
|
||||
pub output: bool,
|
||||
pub skew: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpIsbewr {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "isbewr")?;
|
||||
|
||||
if self.output {
|
||||
write!(f, ".o")?;
|
||||
}
|
||||
|
||||
write!(f, ".{}", self.access_type)?;
|
||||
|
||||
if self.skew {
|
||||
write!(f, ".skew")?;
|
||||
}
|
||||
|
||||
write!(f, "{}", self.mem_type)?;
|
||||
if self.imm_offset != 0 {
|
||||
write!(f, " [{}+0x{:x}]", self.offset, self.imm_offset)?;
|
||||
} else {
|
||||
write!(f, " [{}]", self.offset)?;
|
||||
}
|
||||
|
||||
write!(f, " {}", self.data)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpIsbewr);
|
||||
|
||||
/// Vertex Index Load
|
||||
/// (Only available in Kepler)
|
||||
///
|
||||
|
|
@ -8094,6 +8179,7 @@ pub enum Op {
|
|||
TexDepBar(Box<OpTexDepBar>),
|
||||
CS2R(Box<OpCS2R>),
|
||||
Isberd(Box<OpIsberd>),
|
||||
Isbewr(Box<OpIsbewr>),
|
||||
ViLd(Box<OpViLd>),
|
||||
Kill(Box<OpKill>),
|
||||
Nop(OpNop),
|
||||
|
|
@ -8279,6 +8365,7 @@ impl Op {
|
|||
| Op::TexDepBar(_)
|
||||
| Op::CS2R(_)
|
||||
| Op::Isberd(_)
|
||||
| Op::Isbewr(_)
|
||||
| Op::ViLd(_)
|
||||
| Op::Kill(_)
|
||||
| Op::PixLd(_)
|
||||
|
|
@ -8461,6 +8548,7 @@ impl Op {
|
|||
| Op::TexDepBar(_)
|
||||
| Op::CS2R(_)
|
||||
| Op::Isberd(_)
|
||||
| Op::Isbewr(_)
|
||||
| Op::ViLd(_)
|
||||
| Op::Kill(_)
|
||||
| Op::PixLd(_)
|
||||
|
|
@ -8856,6 +8944,7 @@ impl Instr {
|
|||
| Op::RegOut(_)
|
||||
| Op::Out(_)
|
||||
| Op::OutFinal(_)
|
||||
| Op::Isbewr(_)
|
||||
| Op::Annotate(_) => false,
|
||||
Op::BMov(op) => !op.clear,
|
||||
_ => true,
|
||||
|
|
@ -9662,8 +9751,14 @@ impl IsbeSpaceSharingStateTracker {
|
|||
}
|
||||
|
||||
pub fn visit_instr(&mut self, instr: &Instr) {
|
||||
// Track attribute store. (XXX: ISBEWR)
|
||||
self.has_attribute_store |= matches!(instr.op, Op::ASt(_));
|
||||
// Track attribute store.
|
||||
match &instr.op {
|
||||
Op::ASt(_) => self.has_attribute_store = true,
|
||||
Op::Isbewr(op) if op.access_type == IsbeAccessType::Attribute => {
|
||||
self.has_attribute_store = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Track attribute load.
|
||||
if matches!(instr.op, Op::ALd(_) | Op::Isberd(_)) {
|
||||
|
|
|
|||
|
|
@ -882,3 +882,130 @@ pub fn test_plop3() {
|
|||
c.check(sm);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_isberd() {
|
||||
let r1 = RegRef::new(RegFile::GPR, 1, 1);
|
||||
let r2 = RegRef::new(RegFile::GPR, 2, 1);
|
||||
|
||||
let mem_types = [
|
||||
(MemType::U8, ""),
|
||||
(MemType::U16, ".u16"),
|
||||
(MemType::B32, ".32"),
|
||||
];
|
||||
|
||||
let output_type = [(false, ""), (true, ".o")];
|
||||
let skew_type = [(false, ""), (true, ".skew")];
|
||||
let access_type_list = [
|
||||
(IsbeAccessType::Map, ""),
|
||||
(IsbeAccessType::Patch, ".patch"),
|
||||
(IsbeAccessType::Primitive, ".prim"),
|
||||
(IsbeAccessType::Attribute, ".attr"),
|
||||
];
|
||||
|
||||
for &sm in sm_list() {
|
||||
if sm < 50 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut c = DisasmCheck::new();
|
||||
for (output, output_str) in output_type {
|
||||
for (access_type, access_type_str) in access_type_list {
|
||||
if access_type != IsbeAccessType::Map && sm < 75 {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (skew, skew_str) in skew_type {
|
||||
if skew && sm < 75 {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (mem_type, mem_type_str) in mem_types {
|
||||
if mem_type != MemType::U8 && sm < 75 {
|
||||
continue;
|
||||
}
|
||||
|
||||
for imm_offset in [0, 0x42] {
|
||||
if imm_offset != 0 && sm < 86 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let instr = OpIsberd {
|
||||
dst: Dst::Reg(r1),
|
||||
offset: SrcRef::Reg(r2).into(),
|
||||
imm_offset,
|
||||
mem_type,
|
||||
access_type,
|
||||
output,
|
||||
skew,
|
||||
};
|
||||
let disasm = if imm_offset != 0 {
|
||||
format!("isberd{output_str}{access_type_str}{skew_str}{mem_type_str} r1, [r2+0x{imm_offset:x}];")
|
||||
} else {
|
||||
format!("isberd{output_str}{access_type_str}{skew_str}{mem_type_str} r1, [r2];")
|
||||
};
|
||||
c.push(instr, disasm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.check(sm);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_isbewr() {
|
||||
let r1 = RegRef::new(RegFile::GPR, 1, 1);
|
||||
let r2 = RegRef::new(RegFile::GPR, 2, 1);
|
||||
|
||||
let mem_types = [
|
||||
(MemType::U8, ""),
|
||||
(MemType::U16, ".u16"),
|
||||
(MemType::B32, ".32"),
|
||||
];
|
||||
|
||||
let skew_type = [(false, ""), (true, ".skew")];
|
||||
let access_type_list = [
|
||||
(IsbeAccessType::Map, ""),
|
||||
(IsbeAccessType::Attribute, ".attr"),
|
||||
];
|
||||
|
||||
for &sm in sm_list() {
|
||||
if sm < 75 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut c = DisasmCheck::new();
|
||||
for (access_type, access_type_str) in access_type_list {
|
||||
for (skew, skew_str) in skew_type {
|
||||
for (mem_type, mem_type_str) in mem_types {
|
||||
for imm_offset in [0, 0x42] {
|
||||
if imm_offset != 0 && sm < 86 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let instr = OpIsbewr {
|
||||
offset: SrcRef::Reg(r2).into(),
|
||||
data: SrcRef::Reg(r1).into(),
|
||||
imm_offset,
|
||||
mem_type,
|
||||
access_type,
|
||||
output: true,
|
||||
skew,
|
||||
};
|
||||
let disasm = if imm_offset != 0 {
|
||||
format!("isbewr.o{access_type_str}{skew_str}{mem_type_str} [r2+0x{imm_offset:x}], r1;")
|
||||
} else {
|
||||
format!("isbewr.o{access_type_str}{skew_str}{mem_type_str} [r2], r1;")
|
||||
};
|
||||
c.push(instr, disasm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.check(sm);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -213,6 +213,7 @@ pub fn side_effect_type(op: &Op) -> SideEffect {
|
|||
| Op::TexDepBar(_)
|
||||
| Op::CS2R(_)
|
||||
| Op::Isberd(_)
|
||||
| Op::Isbewr(_)
|
||||
| Op::ViLd(_)
|
||||
| Op::Kill(_)
|
||||
| Op::S2R(_) => SideEffect::Barrier,
|
||||
|
|
@ -329,6 +330,7 @@ pub fn estimate_variable_latency(sm: &ShaderModelInfo, op: &Op) -> u32 {
|
|||
| Op::TexDepBar(_)
|
||||
| Op::CS2R(_)
|
||||
| Op::Isberd(_)
|
||||
| Op::Isbewr(_)
|
||||
| Op::ViLd(_)
|
||||
| Op::Kill(_)
|
||||
| Op::PixLd(_)
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ fn op_reg_latency(op: &Op, reader: bool, op_reg_idx: usize) -> RegLatencySM100 {
|
|||
Op::SuAtom(_) => Decoupled,
|
||||
Op::PixLd(_) => DecoupledAgu,
|
||||
Op::Isberd(_) => DecoupledAgu,
|
||||
Op::Isbewr(_) => DecoupledAgu,
|
||||
Op::LdTram(_) => DecoupledAgu,
|
||||
Op::Shfl(_) => DecoupledAgu,
|
||||
Op::Ldsm(_) => DecoupledAgu,
|
||||
|
|
|
|||
|
|
@ -3153,9 +3153,13 @@ impl SM50Op for OpIsberd {
|
|||
}
|
||||
|
||||
fn encode(&self, e: &mut SM50Encoder<'_>) {
|
||||
assert!(
|
||||
self.access_type == IsbeAccessType::Map && self.imm_offset == 0
|
||||
);
|
||||
|
||||
e.set_opcode(0xefd0);
|
||||
e.set_dst(&self.dst);
|
||||
e.set_reg_src(8..16, &self.idx);
|
||||
e.set_reg_src(8..16, &self.offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3751,14 +3751,76 @@ impl SM70Op for OpCS2R {
|
|||
}
|
||||
|
||||
impl SM70Op for OpIsberd {
|
||||
fn legalize(&mut self, _b: &mut LegalizeBuilder) {
|
||||
// Nothing to do
|
||||
fn legalize(&mut self, b: &mut LegalizeBuilder) {
|
||||
b.copy_src_if_uniform(&mut self.offset);
|
||||
}
|
||||
|
||||
fn encode(&self, e: &mut SM70Encoder<'_>) {
|
||||
// The immediate offset is only supported on SM86+
|
||||
assert!(e.sm >= 86 || self.imm_offset == 0);
|
||||
|
||||
e.set_opcode(0x923);
|
||||
e.set_dst(&self.dst);
|
||||
e.set_reg_src(24..32, &self.idx);
|
||||
e.set_reg_src(24..32, &self.offset);
|
||||
e.set_field(40..56, self.imm_offset);
|
||||
e.set_field(
|
||||
74..76,
|
||||
match self.mem_type {
|
||||
MemType::U8 => 0_u8,
|
||||
MemType::U16 => 1_u8,
|
||||
MemType::B32 => 2_u8,
|
||||
_ => panic!("Invalid ISBERD mem type"),
|
||||
},
|
||||
);
|
||||
e.set_field(
|
||||
76..78,
|
||||
match self.access_type {
|
||||
IsbeAccessType::Map => 0_u8,
|
||||
IsbeAccessType::Patch => 1_u8,
|
||||
IsbeAccessType::Primitive => 2_u8,
|
||||
IsbeAccessType::Attribute => 3_u8,
|
||||
},
|
||||
);
|
||||
e.set_bit(78, self.skew);
|
||||
e.set_bit(79, self.output);
|
||||
}
|
||||
}
|
||||
|
||||
impl SM70Op for OpIsbewr {
|
||||
fn legalize(&mut self, b: &mut LegalizeBuilder) {
|
||||
b.copy_src_if_uniform(&mut self.offset);
|
||||
b.copy_src_if_uniform(&mut self.data);
|
||||
}
|
||||
|
||||
fn encode(&self, e: &mut SM70Encoder<'_>) {
|
||||
assert!(e.sm >= 75);
|
||||
|
||||
// The immediate offset is only supported on SM86+
|
||||
assert!(e.sm >= 86 || self.imm_offset == 0);
|
||||
|
||||
e.set_opcode(0x927);
|
||||
e.set_reg_src(24..32, &self.offset);
|
||||
e.set_reg_src(32..40, &self.data);
|
||||
e.set_field(40..56, self.imm_offset);
|
||||
e.set_field(
|
||||
74..76,
|
||||
match self.mem_type {
|
||||
MemType::U8 => 0_u8,
|
||||
MemType::U16 => 1_u8,
|
||||
MemType::B32 => 2_u8,
|
||||
_ => panic!("Invalid ISBEWR mem type"),
|
||||
},
|
||||
);
|
||||
e.set_field(
|
||||
76..78,
|
||||
match self.access_type {
|
||||
IsbeAccessType::Map => 0_u8,
|
||||
IsbeAccessType::Attribute => 3_u8,
|
||||
_ => panic!("Invalid ISBEWR access type"),
|
||||
},
|
||||
);
|
||||
e.set_bit(78, self.skew);
|
||||
e.set_bit(79, self.output);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4156,6 +4218,7 @@ macro_rules! sm70_op_match {
|
|||
Op::Bar($x) => $y,
|
||||
Op::CS2R($x) => $y,
|
||||
Op::Isberd($x) => $y,
|
||||
Op::Isbewr($x) => $y,
|
||||
Op::Kill($x) => $y,
|
||||
Op::Nop($x) => $y,
|
||||
Op::PixLd($x) => $y,
|
||||
|
|
|
|||
|
|
@ -194,6 +194,7 @@ impl RegLatencySM75 {
|
|||
Op::SuAtom(_) => Decoupled,
|
||||
Op::PixLd(_) => Decoupled,
|
||||
Op::Isberd(_) => Decoupled,
|
||||
Op::Isbewr(_) => Decoupled,
|
||||
Op::LdTram(_) => Decoupled,
|
||||
Op::Shfl(_) => Decoupled,
|
||||
Op::Ldsm(_) => Decoupled,
|
||||
|
|
|
|||
|
|
@ -243,6 +243,7 @@ impl RegLatencySM80 {
|
|||
Op::SuAtom(_) => Decoupled,
|
||||
Op::PixLd(_) => DecoupledAgu,
|
||||
Op::Isberd(_) => DecoupledAgu,
|
||||
Op::Isbewr(_) => DecoupledAgu,
|
||||
Op::LdTram(_) => DecoupledAgu,
|
||||
Op::Shfl(_) => DecoupledAgu,
|
||||
Op::Ldsm(_) => DecoupledAgu,
|
||||
|
|
|
|||
|
|
@ -1148,8 +1148,19 @@ nak_nir_opt_offset_shift_nv(nir_shader *nir, const struct nak_compiler *nak)
|
|||
);
|
||||
}
|
||||
|
||||
const static struct nir_opt_offsets_options nak_offset_options = {
|
||||
};
|
||||
static uint32_t
|
||||
nak_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data)
|
||||
{
|
||||
const struct nak_compiler *nak = data;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_isberd_nv:
|
||||
case nir_intrinsic_isbewr_nv:
|
||||
return nak->sm >= 86 ? UINT16_MAX : 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nak_postprocess_nir(nir_shader *nir,
|
||||
|
|
@ -1289,6 +1300,11 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
}
|
||||
|
||||
OPT(nir, nak_nir_lower_load_store, nak);
|
||||
|
||||
struct nir_opt_offsets_options nak_offset_options = {
|
||||
.max_offset_cb = nak_nir_max_imm_offset,
|
||||
.cb_data = nak,
|
||||
};
|
||||
OPT(nir, nir_opt_offsets, &nak_offset_options);
|
||||
|
||||
/* Should run after nir_opt_offsets, because nir_opt_algebraic will move
|
||||
|
|
|
|||
|
|
@ -144,7 +144,17 @@ lower_vtg_io_intrin(nir_builder *b,
|
|||
nir_def *lo = nir_extract_u8_imm(b, info, 0);
|
||||
nir_def *hi = nir_extract_u8_imm(b, info, 2);
|
||||
nir_def *idx = nir_iadd(b, nir_imul(b, lo, hi), vtx);
|
||||
vtx = nir_isberd_nv(b, idx);
|
||||
|
||||
const struct nak_nir_isbe_flags flags = {
|
||||
.access = NAK_ISBE_ACCESS_MAP,
|
||||
.output = false,
|
||||
.skew = false,
|
||||
.per_primitive = false,
|
||||
};
|
||||
|
||||
vtx = nir_isberd_nv(b, 8, idx,
|
||||
.flags = NAK_AS_U32(flags),
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
} else {
|
||||
vtx = nir_vild_nv(b, vtx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -257,6 +257,21 @@ struct nak_nir_imadsp_flags {
|
|||
|
||||
bool nak_nir_lower_vtg_io(nir_shader *nir, const struct nak_compiler *nak);
|
||||
|
||||
enum nak_isbe_access {
|
||||
NAK_ISBE_ACCESS_MAP,
|
||||
NAK_ISBE_ACCESS_PATCH,
|
||||
NAK_ISBE_ACCESS_PRIM,
|
||||
NAK_ISBE_ACCESS_ATTR,
|
||||
};
|
||||
|
||||
struct nak_nir_isbe_flags {
|
||||
enum nak_isbe_access access : 2;
|
||||
bool output : 1;
|
||||
bool skew : 1;
|
||||
bool per_primitive : 1;
|
||||
uint32_t pad : 27;
|
||||
};
|
||||
|
||||
enum nak_interp_mode {
|
||||
NAK_INTERP_MODE_PERSPECTIVE,
|
||||
NAK_INTERP_MODE_SCREEN_LINEAR,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue