nak: Implement more attribute I/O

This commits implement load/store_per_vertex* and load_output which are
required for tessellation shaders.  Because things are getting a bit
complicated, it's easier to combine all the attribute load/store ops in
a single case in the match.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand 2023-09-29 17:27:08 -05:00 committed by Marge Bot
parent 091410c708
commit c2768736c9
2 changed files with 149 additions and 115 deletions

View file

@ -16,7 +16,7 @@ use crate::util::DivCeil;
use nak_bindings::*;
use std::cmp::min;
use std::cmp::{max, min};
use std::collections::{HashMap, HashSet};
fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
@ -61,7 +61,11 @@ fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
})
}
MESA_SHADER_TESS_CTRL => {
ShaderStageInfo::TessellationInit(Default::default())
let info_tess = unsafe { &nir.info.__bindgen_anon_1.tess };
ShaderStageInfo::TessellationInit(TessellationInitShaderInfo {
per_patch_attribute_count: 6,
threads_per_patch: info_tess.tcs_vertices_out,
})
}
MESA_SHADER_TESS_EVAL => ShaderStageInfo::Tessellation,
_ => panic!("Unknown shader stage"),
@ -91,7 +95,7 @@ fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
attr_out: [0; 4],
// TODO: figure out how to fill this.
store_req_start: 0xff,
store_req_start: u8::MAX,
store_req_end: 0,
}),
_ => panic!("Unknown shader stage"),
@ -1362,22 +1366,85 @@ impl<'a> ShaderFromNir<'a> {
});
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_load_input | nir_intrinsic_load_per_vertex_input => {
assert!(intrin.def.bit_size() == 32);
let comps = intrin.def.num_components();
let dst = b.alloc_ssa(RegFile::GPR, comps);
nir_intrinsic_load_input
| nir_intrinsic_load_output
| nir_intrinsic_load_per_vertex_input
| nir_intrinsic_load_per_vertex_output
| nir_intrinsic_store_output
| nir_intrinsic_store_per_vertex_output => {
let comps = intrin.num_components;
let store_data = match intrin.intrinsic {
nir_intrinsic_load_input
| nir_intrinsic_load_output
| nir_intrinsic_load_per_vertex_input
| nir_intrinsic_load_per_vertex_output => {
assert!(intrin.def.bit_size() == 32);
assert!(intrin.def.num_components() == comps);
None
}
nir_intrinsic_store_output
| nir_intrinsic_store_per_vertex_output => {
assert!(srcs[0].bit_size() == 32);
assert!(srcs[0].num_components() == comps);
Some(self.get_src(&srcs[0]))
}
_ => panic!("Unhandled intrinsic"),
};
let (vtx, offset, offset_as_u32) = match intrin.intrinsic {
nir_intrinsic_load_input => (
nir_intrinsic_load_input | nir_intrinsic_load_output => (
Src::new_zero(),
self.get_src(&srcs[0]),
srcs[0].as_uint(),
),
nir_intrinsic_load_per_vertex_input => (
nir_intrinsic_load_per_vertex_input
| nir_intrinsic_load_per_vertex_output => (
self.get_src(&srcs[0]),
self.get_src(&srcs[1]),
srcs[1].as_uint(),
),
nir_intrinsic_store_output => (
Src::new_zero(),
self.get_src(&srcs[1]),
srcs[1].as_uint(),
),
nir_intrinsic_store_per_vertex_output => (
self.get_src(&srcs[1]),
self.get_src(&srcs[2]),
srcs[2].as_uint(),
),
_ => panic!("Unhandled intrinsic"),
};
let base = u16::try_from(intrin.base()).unwrap();
let range = u16::try_from(intrin.range()).unwrap();
let comp = u16::try_from(intrin.component()).unwrap();
let (range, addr, offset) = match offset_as_u32 {
Some(imm) => {
let imm = u16::try_from(imm).unwrap();
let addr = base + imm + 4 * comp;
let range = addr..(addr + 4 * u16::from(comps));
(range, addr, Src::new_zero())
}
None => {
let range = base..(base + range);
(range, base + 4 * comp, offset)
}
};
let stage = self.nir.info.stage();
let (output, patch) = match intrin.intrinsic {
nir_intrinsic_load_input => {
(false, stage == MESA_SHADER_TESS_EVAL)
}
nir_intrinsic_load_output | nir_intrinsic_store_output => {
(true, stage == MESA_SHADER_TESS_CTRL)
}
nir_intrinsic_load_per_vertex_input => (false, false),
nir_intrinsic_load_per_vertex_output
| nir_intrinsic_store_per_vertex_output => (true, false),
_ => panic!("Unhandled intrinsic"),
};
@ -1386,63 +1453,86 @@ impl<'a> ShaderFromNir<'a> {
panic!("Stage does not support load_input")
}
ShaderIoInfo::Fragment(io) => {
assert!(intrin.intrinsic == nir_intrinsic_load_input);
let addr = u16::try_from(intrin.base()).unwrap()
+ u16::try_from(offset_as_u32.unwrap()).unwrap()
+ u16::try_from(intrin.component()).unwrap() * 4;
if let Some(data) = store_data {
// We assume these only ever happen in the
// last block. This is ensured by
// nir_lower_io_to_temporaries()
assert!(offset_as_u32 == Some(0));
assert!(addr % 4 == 0);
let data = data.as_ssa().unwrap();
for c in 0..usize::from(comps) {
let idx =
usize::from(addr / 4) + usize::from(c);
self.fs_out_regs[idx] = data[c];
}
} else {
let dst = b.alloc_ssa(RegFile::GPR, comps);
for c in 0..comps {
let c_addr = addr + 4 * u16::from(c);
for c in 0..comps {
let c_addr = addr + 4 * u16::from(c);
io.mark_attr_read(c_addr, PixelImap::Constant);
io.mark_attr_read(c_addr, PixelImap::Constant);
b.push_op(OpIpa {
dst: dst[usize::from(c)].into(),
addr: c_addr,
freq: InterpFreq::Constant,
loc: InterpLoc::Default,
offset: SrcRef::Zero.into(),
});
b.push_op(OpIpa {
dst: dst[usize::from(c)].into(),
addr: c_addr,
freq: InterpFreq::Constant,
loc: InterpLoc::Default,
offset: SrcRef::Zero.into(),
});
}
self.set_dst(&intrin.def, dst);
}
}
ShaderIoInfo::Vtg(io) => {
let base = u16::try_from(intrin.base()).unwrap();
let range = u16::try_from(intrin.base()).unwrap();
let comp = u16::try_from(intrin.component()).unwrap();
let (addr, offset) = match offset_as_u32 {
Some(imm) => {
let imm = u16::try_from(imm).unwrap();
let addr = base + imm + 4 * comp;
io.mark_attrs_read(
addr..(addr + 4 * u16::from(comps)),
);
(addr, Src::new_zero())
if patch {
match &mut self.info.stage {
ShaderStageInfo::TessellationInit(stage) => {
stage.per_patch_attribute_count = max(
stage.per_patch_attribute_count,
(range.end / 4).try_into().unwrap(),
);
}
ShaderStageInfo::Tessellation => (),
_ => panic!("Patch I/O not supported"),
}
None => {
io.mark_attrs_read(base..(base + range));
(base + 4 * comp, offset)
} else {
if output {
if store_data.is_none() {
io.mark_store_req(range.clone());
}
io.mark_attrs_written(range);
} else {
io.mark_attrs_read(range);
}
};
}
let access = AttrAccess {
addr: addr,
comps: comps,
patch: false,
output: false,
patch: patch,
output: output,
flags: 0,
};
b.push_op(OpALd {
dst: dst.into(),
vtx: vtx,
offset: offset,
access: access,
});
if let Some(data) = store_data {
b.push_op(OpASt {
vtx: vtx,
offset: offset,
data: data,
access: access,
});
} else {
let dst = b.alloc_ssa(RegFile::GPR, comps);
b.push_op(OpALd {
dst: dst.into(),
vtx: vtx,
offset: offset,
access: access,
});
self.set_dst(&intrin.def, dst);
}
}
}
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_load_interpolated_input => {
let bary =
@ -1717,70 +1807,6 @@ impl<'a> ShaderFromNir<'a> {
access: access,
});
}
nir_intrinsic_store_output => {
assert!(intrin.get_src(0).bit_size() == 32);
let comps = intrin.num_components;
let data = self.get_src(&srcs[0]);
let vtx = Src::new_zero();
let offset = self.get_src(&srcs[1]);
let offset_as_u32 = srcs[1].as_uint();
match &mut self.info.io {
ShaderIoInfo::None => {
panic!("Stage does not support load_input")
}
ShaderIoInfo::Fragment(io) => {
/* We assume these only ever happen in the last block.
* This is ensured by nir_lower_io_to_temporaries()
*/
let data = *self.get_src(&srcs[0]).as_ssa().unwrap();
assert!(offset_as_u32 == Some(0));
let base: u8 = intrin.base().try_into().unwrap();
let comp: u8 = intrin.component().try_into().unwrap();
assert!(base % 4 == 0);
let out_idx = usize::from((base / 4) + comp);
for c in 0..usize::from(comps) {
self.fs_out_regs[out_idx + c] = data[c];
}
}
ShaderIoInfo::Vtg(io) => {
let base = u16::try_from(intrin.base()).unwrap();
let range = u16::try_from(intrin.base()).unwrap();
let comp = u16::try_from(intrin.component()).unwrap();
let (addr, offset) = match offset_as_u32 {
Some(imm) => {
let imm = u16::try_from(imm).unwrap();
let addr = base + imm + 4 * comp;
io.mark_attrs_written(
addr..(addr + 4 * u16::from(comps)),
);
(addr, Src::new_zero())
}
None => {
io.mark_attrs_written(base..(base + range));
(base + 4 * comp, offset)
}
};
let access = AttrAccess {
addr: addr,
comps: comps,
patch: false,
output: true,
flags: 0,
};
b.push_op(OpASt {
vtx: vtx,
offset: offset,
data: data,
access: access,
});
}
}
}
nir_intrinsic_store_scratch => {
let data = self.get_src(&srcs[0]);
let size_B =

View file

@ -13,6 +13,7 @@ use crate::nak_cfg::CFG;
use crate::nak_sph::{OutputTopology, PixelImap};
use crate::{GetDebugFlags, DEBUG};
use nak_ir_proc::*;
use std::cmp::{max, min};
use std::fmt;
use std::iter::Zip;
use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
@ -4735,7 +4736,7 @@ impl Default for GeometryShaderInfo {
}
}
#[derive(Debug, Default)]
#[derive(Debug)]
pub struct TessellationInitShaderInfo {
pub per_patch_attribute_count: u8,
pub threads_per_patch: u8,
@ -4804,6 +4805,13 @@ impl VtgIoInfo {
pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
self.mark_attrs(addrs, true);
}
pub fn mark_store_req(&mut self, addrs: Range<u16>) {
let start = (addrs.start / 4).try_into().unwrap();
let end = ((addrs.end - 1) / 4).try_into().unwrap();
self.store_req_start = min(self.store_req_start, start);
self.store_req_end = max(self.store_req_end, end);
}
}
#[derive(Debug)]