mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 13:10:10 +01:00
nak: Use nak_nir_lower_vtg_io
This massively simplifies nak_from_nir.rs because it lets us do all the annoying NIR fiddling in NIR and not in the back-end. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
parent
7ece220f96
commit
f85069ed57
4 changed files with 184 additions and 201 deletions
|
|
@ -1478,6 +1478,18 @@ impl SM75Instr {
|
|||
}
|
||||
}
|
||||
|
||||
fn encode_al2p(&mut self, op: &OpAL2P) {
|
||||
self.set_opcode(0x920);
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src(24..32, op.offset);
|
||||
|
||||
self.set_field(40..50, op.access.addr);
|
||||
self.set_field(74..76, 0_u8); // comps
|
||||
assert!(!op.access.patch);
|
||||
self.set_bit(79, op.access.output);
|
||||
}
|
||||
|
||||
fn encode_ald(&mut self, op: &OpALd) {
|
||||
self.set_opcode(0x321);
|
||||
|
||||
|
|
@ -1774,6 +1786,7 @@ impl SM75Instr {
|
|||
Op::St(op) => si.encode_st(&op),
|
||||
Op::Atom(op) => si.encode_atom(&op),
|
||||
Op::AtomCas(op) => si.encode_atom_cas(&op),
|
||||
Op::AL2P(op) => si.encode_al2p(&op),
|
||||
Op::ALd(op) => si.encode_ald(&op),
|
||||
Op::ASt(op) => si.encode_ast(&op),
|
||||
Op::Ipa(op) => si.encode_ipa(&op),
|
||||
|
|
|
|||
|
|
@ -1200,6 +1200,105 @@ impl<'a> ShaderFromNir<'a> {
|
|||
) {
|
||||
let srcs = intrin.srcs_as_slice();
|
||||
match intrin.intrinsic {
|
||||
nir_intrinsic_al2p_nv => {
|
||||
let offset = self.get_src(&srcs[0]);
|
||||
let addr = u16::try_from(intrin.base()).unwrap();
|
||||
|
||||
let flags = intrin.flags();
|
||||
let flags: nak_nir_attr_io_flags =
|
||||
unsafe { std::mem::transmute_copy(&flags) };
|
||||
|
||||
let access = AttrAccess {
|
||||
addr: addr,
|
||||
comps: 1,
|
||||
patch: flags.patch(),
|
||||
output: flags.output(),
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpAL2P {
|
||||
dst: dst.into(),
|
||||
offset: offset,
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_ald_nv | nir_intrinsic_ast_nv => {
|
||||
let addr = u16::try_from(intrin.base()).unwrap();
|
||||
let base = u16::try_from(intrin.range_base()).unwrap();
|
||||
let range = u16::try_from(intrin.range()).unwrap();
|
||||
let range = base..(base + range);
|
||||
|
||||
let flags = intrin.flags();
|
||||
let flags: nak_nir_attr_io_flags =
|
||||
unsafe { std::mem::transmute_copy(&flags) };
|
||||
assert!(!flags.patch() || !flags.phys());
|
||||
|
||||
if let ShaderIoInfo::Vtg(io) = &mut self.info.io {
|
||||
if flags.patch() {
|
||||
match &mut self.info.stage {
|
||||
ShaderStageInfo::TessellationInit(stage) => {
|
||||
assert!(flags.output());
|
||||
stage.per_patch_attribute_count = max(
|
||||
stage.per_patch_attribute_count,
|
||||
(range.end / 4).try_into().unwrap(),
|
||||
);
|
||||
}
|
||||
ShaderStageInfo::Tessellation => (),
|
||||
_ => panic!("Patch I/O not supported"),
|
||||
}
|
||||
} else {
|
||||
if flags.output() {
|
||||
if intrin.intrinsic == nir_intrinsic_ast_nv {
|
||||
io.mark_store_req(range.clone());
|
||||
}
|
||||
io.mark_attrs_written(range);
|
||||
} else {
|
||||
io.mark_attrs_read(range);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
panic!("Must be a VTG stage");
|
||||
}
|
||||
|
||||
let access = AttrAccess {
|
||||
addr: addr,
|
||||
comps: intrin.num_components,
|
||||
patch: flags.patch(),
|
||||
output: flags.output(),
|
||||
flags: flags.phys().into(),
|
||||
};
|
||||
|
||||
if intrin.intrinsic == nir_intrinsic_ald_nv {
|
||||
let vtx = self.get_src(&srcs[0]);
|
||||
let offset = self.get_src(&srcs[1]);
|
||||
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let dst = b.alloc_ssa(RegFile::GPR, access.comps);
|
||||
b.push_op(OpALd {
|
||||
dst: dst.into(),
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
} else if intrin.intrinsic == nir_intrinsic_ast_nv {
|
||||
assert!(srcs[0].bit_size() == 32);
|
||||
let data = self.get_src(&srcs[0]);
|
||||
let vtx = self.get_src(&srcs[1]);
|
||||
let offset = self.get_src(&srcs[2]);
|
||||
|
||||
b.push_op(OpASt {
|
||||
data: data,
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
access: access,
|
||||
});
|
||||
} else {
|
||||
panic!("Invalid VTG I/O intrinsic");
|
||||
}
|
||||
}
|
||||
nir_intrinsic_bindless_image_atomic => {
|
||||
let handle = self.get_src(&srcs[0]);
|
||||
let dim = self.get_image_dim(intrin);
|
||||
|
|
@ -1366,173 +1465,33 @@ impl<'a> ShaderFromNir<'a> {
|
|||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_load_input
|
||||
| nir_intrinsic_load_output
|
||||
| nir_intrinsic_load_per_vertex_input
|
||||
| nir_intrinsic_load_per_vertex_output
|
||||
| nir_intrinsic_store_output
|
||||
| nir_intrinsic_store_per_vertex_output => {
|
||||
let comps = intrin.num_components;
|
||||
|
||||
let store_data = match intrin.intrinsic {
|
||||
nir_intrinsic_load_input
|
||||
| nir_intrinsic_load_output
|
||||
| nir_intrinsic_load_per_vertex_input
|
||||
| nir_intrinsic_load_per_vertex_output => {
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
assert!(intrin.def.num_components() == comps);
|
||||
None
|
||||
}
|
||||
nir_intrinsic_store_output
|
||||
| nir_intrinsic_store_per_vertex_output => {
|
||||
assert!(srcs[0].bit_size() == 32);
|
||||
assert!(srcs[0].num_components() == comps);
|
||||
Some(self.get_src(&srcs[0]))
|
||||
}
|
||||
_ => panic!("Unhandled intrinsic"),
|
||||
nir_intrinsic_load_input => {
|
||||
let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
|
||||
panic!("load_input is only used for fragment shaders");
|
||||
};
|
||||
|
||||
let (vtx, offset, offset_as_u32) = match intrin.intrinsic {
|
||||
nir_intrinsic_load_input | nir_intrinsic_load_output => (
|
||||
Src::new_zero(),
|
||||
self.get_src(&srcs[0]),
|
||||
srcs[0].as_uint(),
|
||||
),
|
||||
nir_intrinsic_load_per_vertex_input
|
||||
| nir_intrinsic_load_per_vertex_output => (
|
||||
self.get_src(&srcs[0]),
|
||||
self.get_src(&srcs[1]),
|
||||
srcs[1].as_uint(),
|
||||
),
|
||||
nir_intrinsic_store_output => (
|
||||
Src::new_zero(),
|
||||
self.get_src(&srcs[1]),
|
||||
srcs[1].as_uint(),
|
||||
),
|
||||
nir_intrinsic_store_per_vertex_output => (
|
||||
self.get_src(&srcs[1]),
|
||||
self.get_src(&srcs[2]),
|
||||
srcs[2].as_uint(),
|
||||
),
|
||||
_ => panic!("Unhandled intrinsic"),
|
||||
};
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let comps = intrin.def.num_components;
|
||||
|
||||
let base = u16::try_from(intrin.base()).unwrap();
|
||||
let range = u16::try_from(intrin.range()).unwrap();
|
||||
let comp = u16::try_from(intrin.component()).unwrap();
|
||||
let addr = u16::try_from(intrin.base()).unwrap()
|
||||
+ u16::try_from(srcs[0].as_uint().unwrap()).unwrap()
|
||||
+ 4 * u16::try_from(intrin.component()).unwrap();
|
||||
|
||||
let (range, addr, offset) = match offset_as_u32 {
|
||||
Some(imm) => {
|
||||
let imm = u16::try_from(imm).unwrap();
|
||||
let addr = base + imm + 4 * comp;
|
||||
let range = addr..(addr + 4 * u16::from(comps));
|
||||
(range, addr, Src::new_zero())
|
||||
}
|
||||
None => {
|
||||
let range = base..(base + range);
|
||||
(range, base + 4 * comp, offset)
|
||||
}
|
||||
};
|
||||
let dst = b.alloc_ssa(RegFile::GPR, comps);
|
||||
for c in 0..comps {
|
||||
let c_addr = addr + 4 * u16::from(c);
|
||||
|
||||
let stage = self.nir.info.stage();
|
||||
let (output, patch) = match intrin.intrinsic {
|
||||
nir_intrinsic_load_input => {
|
||||
(false, stage == MESA_SHADER_TESS_EVAL)
|
||||
}
|
||||
nir_intrinsic_load_output | nir_intrinsic_store_output => {
|
||||
(true, stage == MESA_SHADER_TESS_CTRL)
|
||||
}
|
||||
nir_intrinsic_load_per_vertex_input => (false, false),
|
||||
nir_intrinsic_load_per_vertex_output
|
||||
| nir_intrinsic_store_per_vertex_output => (true, false),
|
||||
_ => panic!("Unhandled intrinsic"),
|
||||
};
|
||||
io.mark_attr_read(c_addr, PixelImap::Constant);
|
||||
|
||||
match &mut self.info.io {
|
||||
ShaderIoInfo::None => {
|
||||
panic!("Stage does not support load_input")
|
||||
}
|
||||
ShaderIoInfo::Fragment(io) => {
|
||||
if let Some(data) = store_data {
|
||||
// We assume these only ever happen in the
|
||||
// last block. This is ensured by
|
||||
// nir_lower_io_to_temporaries()
|
||||
assert!(offset_as_u32 == Some(0));
|
||||
assert!(addr % 4 == 0);
|
||||
let data = data.as_ssa().unwrap();
|
||||
for c in 0..usize::from(comps) {
|
||||
let idx =
|
||||
usize::from(addr / 4) + usize::from(c);
|
||||
self.fs_out_regs[idx] = data[c];
|
||||
}
|
||||
} else {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, comps);
|
||||
for c in 0..comps {
|
||||
let c_addr = addr + 4 * u16::from(c);
|
||||
|
||||
io.mark_attr_read(c_addr, PixelImap::Constant);
|
||||
|
||||
b.push_op(OpIpa {
|
||||
dst: dst[usize::from(c)].into(),
|
||||
addr: c_addr,
|
||||
freq: InterpFreq::Constant,
|
||||
loc: InterpLoc::Default,
|
||||
offset: SrcRef::Zero.into(),
|
||||
});
|
||||
}
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
}
|
||||
ShaderIoInfo::Vtg(io) => {
|
||||
if patch {
|
||||
match &mut self.info.stage {
|
||||
ShaderStageInfo::TessellationInit(stage) => {
|
||||
stage.per_patch_attribute_count = max(
|
||||
stage.per_patch_attribute_count,
|
||||
(range.end / 4).try_into().unwrap(),
|
||||
);
|
||||
}
|
||||
ShaderStageInfo::Tessellation => (),
|
||||
_ => panic!("Patch I/O not supported"),
|
||||
}
|
||||
} else {
|
||||
if output {
|
||||
if store_data.is_none() {
|
||||
io.mark_store_req(range.clone());
|
||||
}
|
||||
io.mark_attrs_written(range);
|
||||
} else {
|
||||
io.mark_attrs_read(range);
|
||||
}
|
||||
}
|
||||
|
||||
let access = AttrAccess {
|
||||
addr: addr,
|
||||
comps: comps,
|
||||
patch: patch,
|
||||
output: output,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
if let Some(data) = store_data {
|
||||
b.push_op(OpASt {
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
data: data,
|
||||
access: access,
|
||||
});
|
||||
} else {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, comps);
|
||||
b.push_op(OpALd {
|
||||
dst: dst.into(),
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
}
|
||||
b.push_op(OpIpa {
|
||||
dst: dst[usize::from(c)].into(),
|
||||
addr: c_addr,
|
||||
freq: InterpFreq::Constant,
|
||||
loc: InterpLoc::Default,
|
||||
offset: SrcRef::Zero.into(),
|
||||
});
|
||||
}
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_load_interpolated_input => {
|
||||
let bary =
|
||||
|
|
@ -1851,6 +1810,22 @@ impl<'a> ShaderFromNir<'a> {
|
|||
access: access,
|
||||
});
|
||||
}
|
||||
nir_intrinsic_store_output => {
|
||||
let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
|
||||
panic!("load_input is only used for fragment shaders");
|
||||
};
|
||||
let data = self.get_src(&srcs[0]);
|
||||
|
||||
let addr = u16::try_from(intrin.base()).unwrap()
|
||||
+ u16::try_from(srcs[1].as_uint().unwrap()).unwrap()
|
||||
+ 4 * u16::try_from(intrin.component()).unwrap();
|
||||
assert!(addr % 4 == 0);
|
||||
|
||||
for c in 0..usize::from(intrin.num_components) {
|
||||
let idx = usize::from(addr / 4) + usize::from(c);
|
||||
self.fs_out_regs[idx] = data.as_ssa().unwrap()[c];
|
||||
}
|
||||
}
|
||||
nir_intrinsic_store_scratch => {
|
||||
let data = self.get_src(&srcs[0]);
|
||||
let size_B =
|
||||
|
|
|
|||
|
|
@ -3313,6 +3313,34 @@ impl fmt::Display for OpAtomCas {
|
|||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpAL2P {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(GPR)]
|
||||
pub offset: Src,
|
||||
|
||||
pub access: AttrAccess,
|
||||
}
|
||||
|
||||
impl fmt::Display for OpAL2P {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "AL2P")?;
|
||||
if self.access.output {
|
||||
write!(f, ".O")?;
|
||||
}
|
||||
if self.access.patch {
|
||||
write!(f, ".P")?;
|
||||
}
|
||||
write!(f, " {} a[{:#x}", self.dst, self.access.addr)?;
|
||||
if !self.offset.is_zero() {
|
||||
write!(f, "+{}", self.offset)?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpALd {
|
||||
|
|
@ -4019,6 +4047,7 @@ pub enum Op {
|
|||
St(OpSt),
|
||||
Atom(OpAtom),
|
||||
AtomCas(OpAtomCas),
|
||||
AL2P(OpAL2P),
|
||||
ALd(OpALd),
|
||||
ASt(OpASt),
|
||||
Ipa(OpIpa),
|
||||
|
|
@ -4446,6 +4475,7 @@ impl Instr {
|
|||
| Op::St(_)
|
||||
| Op::Atom(_)
|
||||
| Op::AtomCas(_)
|
||||
| Op::AL2P(_)
|
||||
| Op::ALd(_)
|
||||
| Op::ASt(_)
|
||||
| Op::Ipa(_)
|
||||
|
|
|
|||
|
|
@ -304,18 +304,6 @@ nak_sysval_sysval_idx(gl_system_value sysval)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
nak_nir_isberd(nir_builder *b, nir_def *vertex)
|
||||
{
|
||||
nir_def *info = nir_load_sysval_nv(b, 32, .base = NAK_SV_INVOCATION_INFO,
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
nir_def *lo = nir_extract_u8_imm(b, info, 0);
|
||||
nir_def *hi = nir_extract_u8_imm(b, info, 2);
|
||||
nir_def *idx = nir_iadd(b, nir_imul(b, lo, hi), vertex);
|
||||
|
||||
return nir_isberd_nv(b, idx);
|
||||
}
|
||||
|
||||
static bool
|
||||
nak_nir_lower_system_value_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
|
|
@ -338,8 +326,8 @@ nak_nir_lower_system_value_instr(nir_builder *b, nir_instr *instr, void *data)
|
|||
case nir_intrinsic_load_primitive_id: {
|
||||
assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
|
||||
b->shader->info.stage == MESA_SHADER_TESS_EVAL);
|
||||
nir_def *idx = nak_nir_isberd(b, nir_imm_int(b, 0));
|
||||
val = nir_load_per_vertex_input(b, 1, 32, idx, nir_imm_int(b, 0),
|
||||
val = nir_load_per_vertex_input(b, 1, 32, nir_imm_int(b, 0),
|
||||
nir_imm_int(b, 0),
|
||||
.base = NAK_ATTR_PRIMITIVE_ID,
|
||||
.dest_type = nir_type_int32);
|
||||
break;
|
||||
|
|
@ -423,23 +411,6 @@ nak_nir_lower_system_values(nir_shader *nir)
|
|||
NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_per_vertex_io_intrin(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_per_vertex_input)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_src *vertex = &intrin->src[0];
|
||||
nir_def *idx = nak_nir_isberd(b, vertex->ssa);
|
||||
nir_src_rewrite(vertex, idx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
nak_nir_lower_varyings(nir_shader *nir, nir_variable_mode modes)
|
||||
{
|
||||
|
|
@ -452,16 +423,6 @@ nak_nir_lower_varyings(nir_shader *nir, nir_variable_mode modes)
|
|||
|
||||
OPT(nir, nir_lower_io, modes, type_size_vec4_bytes, 0);
|
||||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
OPT(nir, nir_shader_intrinsics_pass, lower_per_vertex_io_intrin,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
|
@ -782,12 +743,16 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
case MESA_SHADER_VERTEX:
|
||||
OPT(nir, nak_nir_lower_vs_inputs);
|
||||
OPT(nir, nak_nir_lower_varyings, nir_var_shader_out);
|
||||
OPT(nir, nir_opt_constant_folding);
|
||||
OPT(nir, nak_nir_lower_vtg_io, nak);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
OPT(nir, nak_nir_lower_varyings, nir_var_shader_in | nir_var_shader_out);
|
||||
OPT(nir, nir_opt_constant_folding);
|
||||
OPT(nir, nak_nir_lower_vtg_io, nak);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue