mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
nak: Implement more attribute I/O
This commits implement load/store_per_vertex* and load_output which are required for tessellation shaders. Because things are getting a bit complicated, it's easier to combine all the attribute load/store ops in a single case in the match. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
parent
091410c708
commit
c2768736c9
2 changed files with 149 additions and 115 deletions
|
|
@ -16,7 +16,7 @@ use crate::util::DivCeil;
|
|||
|
||||
use nak_bindings::*;
|
||||
|
||||
use std::cmp::min;
|
||||
use std::cmp::{max, min};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
|
||||
|
|
@ -61,7 +61,11 @@ fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
|
|||
})
|
||||
}
|
||||
MESA_SHADER_TESS_CTRL => {
|
||||
ShaderStageInfo::TessellationInit(Default::default())
|
||||
let info_tess = unsafe { &nir.info.__bindgen_anon_1.tess };
|
||||
ShaderStageInfo::TessellationInit(TessellationInitShaderInfo {
|
||||
per_patch_attribute_count: 6,
|
||||
threads_per_patch: info_tess.tcs_vertices_out,
|
||||
})
|
||||
}
|
||||
MESA_SHADER_TESS_EVAL => ShaderStageInfo::Tessellation,
|
||||
_ => panic!("Unknown shader stage"),
|
||||
|
|
@ -91,7 +95,7 @@ fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
|
|||
attr_out: [0; 4],
|
||||
|
||||
// TODO: figure out how to fill this.
|
||||
store_req_start: 0xff,
|
||||
store_req_start: u8::MAX,
|
||||
store_req_end: 0,
|
||||
}),
|
||||
_ => panic!("Unknown shader stage"),
|
||||
|
|
@ -1362,22 +1366,85 @@ impl<'a> ShaderFromNir<'a> {
|
|||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_load_input | nir_intrinsic_load_per_vertex_input => {
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let comps = intrin.def.num_components();
|
||||
let dst = b.alloc_ssa(RegFile::GPR, comps);
|
||||
nir_intrinsic_load_input
|
||||
| nir_intrinsic_load_output
|
||||
| nir_intrinsic_load_per_vertex_input
|
||||
| nir_intrinsic_load_per_vertex_output
|
||||
| nir_intrinsic_store_output
|
||||
| nir_intrinsic_store_per_vertex_output => {
|
||||
let comps = intrin.num_components;
|
||||
|
||||
let store_data = match intrin.intrinsic {
|
||||
nir_intrinsic_load_input
|
||||
| nir_intrinsic_load_output
|
||||
| nir_intrinsic_load_per_vertex_input
|
||||
| nir_intrinsic_load_per_vertex_output => {
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
assert!(intrin.def.num_components() == comps);
|
||||
None
|
||||
}
|
||||
nir_intrinsic_store_output
|
||||
| nir_intrinsic_store_per_vertex_output => {
|
||||
assert!(srcs[0].bit_size() == 32);
|
||||
assert!(srcs[0].num_components() == comps);
|
||||
Some(self.get_src(&srcs[0]))
|
||||
}
|
||||
_ => panic!("Unhandled intrinsic"),
|
||||
};
|
||||
|
||||
let (vtx, offset, offset_as_u32) = match intrin.intrinsic {
|
||||
nir_intrinsic_load_input => (
|
||||
nir_intrinsic_load_input | nir_intrinsic_load_output => (
|
||||
Src::new_zero(),
|
||||
self.get_src(&srcs[0]),
|
||||
srcs[0].as_uint(),
|
||||
),
|
||||
nir_intrinsic_load_per_vertex_input => (
|
||||
nir_intrinsic_load_per_vertex_input
|
||||
| nir_intrinsic_load_per_vertex_output => (
|
||||
self.get_src(&srcs[0]),
|
||||
self.get_src(&srcs[1]),
|
||||
srcs[1].as_uint(),
|
||||
),
|
||||
nir_intrinsic_store_output => (
|
||||
Src::new_zero(),
|
||||
self.get_src(&srcs[1]),
|
||||
srcs[1].as_uint(),
|
||||
),
|
||||
nir_intrinsic_store_per_vertex_output => (
|
||||
self.get_src(&srcs[1]),
|
||||
self.get_src(&srcs[2]),
|
||||
srcs[2].as_uint(),
|
||||
),
|
||||
_ => panic!("Unhandled intrinsic"),
|
||||
};
|
||||
|
||||
let base = u16::try_from(intrin.base()).unwrap();
|
||||
let range = u16::try_from(intrin.range()).unwrap();
|
||||
let comp = u16::try_from(intrin.component()).unwrap();
|
||||
|
||||
let (range, addr, offset) = match offset_as_u32 {
|
||||
Some(imm) => {
|
||||
let imm = u16::try_from(imm).unwrap();
|
||||
let addr = base + imm + 4 * comp;
|
||||
let range = addr..(addr + 4 * u16::from(comps));
|
||||
(range, addr, Src::new_zero())
|
||||
}
|
||||
None => {
|
||||
let range = base..(base + range);
|
||||
(range, base + 4 * comp, offset)
|
||||
}
|
||||
};
|
||||
|
||||
let stage = self.nir.info.stage();
|
||||
let (output, patch) = match intrin.intrinsic {
|
||||
nir_intrinsic_load_input => {
|
||||
(false, stage == MESA_SHADER_TESS_EVAL)
|
||||
}
|
||||
nir_intrinsic_load_output | nir_intrinsic_store_output => {
|
||||
(true, stage == MESA_SHADER_TESS_CTRL)
|
||||
}
|
||||
nir_intrinsic_load_per_vertex_input => (false, false),
|
||||
nir_intrinsic_load_per_vertex_output
|
||||
| nir_intrinsic_store_per_vertex_output => (true, false),
|
||||
_ => panic!("Unhandled intrinsic"),
|
||||
};
|
||||
|
||||
|
|
@ -1386,63 +1453,86 @@ impl<'a> ShaderFromNir<'a> {
|
|||
panic!("Stage does not support load_input")
|
||||
}
|
||||
ShaderIoInfo::Fragment(io) => {
|
||||
assert!(intrin.intrinsic == nir_intrinsic_load_input);
|
||||
let addr = u16::try_from(intrin.base()).unwrap()
|
||||
+ u16::try_from(offset_as_u32.unwrap()).unwrap()
|
||||
+ u16::try_from(intrin.component()).unwrap() * 4;
|
||||
if let Some(data) = store_data {
|
||||
// We assume these only ever happen in the
|
||||
// last block. This is ensured by
|
||||
// nir_lower_io_to_temporaries()
|
||||
assert!(offset_as_u32 == Some(0));
|
||||
assert!(addr % 4 == 0);
|
||||
let data = data.as_ssa().unwrap();
|
||||
for c in 0..usize::from(comps) {
|
||||
let idx =
|
||||
usize::from(addr / 4) + usize::from(c);
|
||||
self.fs_out_regs[idx] = data[c];
|
||||
}
|
||||
} else {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, comps);
|
||||
for c in 0..comps {
|
||||
let c_addr = addr + 4 * u16::from(c);
|
||||
|
||||
for c in 0..comps {
|
||||
let c_addr = addr + 4 * u16::from(c);
|
||||
io.mark_attr_read(c_addr, PixelImap::Constant);
|
||||
|
||||
io.mark_attr_read(c_addr, PixelImap::Constant);
|
||||
|
||||
b.push_op(OpIpa {
|
||||
dst: dst[usize::from(c)].into(),
|
||||
addr: c_addr,
|
||||
freq: InterpFreq::Constant,
|
||||
loc: InterpLoc::Default,
|
||||
offset: SrcRef::Zero.into(),
|
||||
});
|
||||
b.push_op(OpIpa {
|
||||
dst: dst[usize::from(c)].into(),
|
||||
addr: c_addr,
|
||||
freq: InterpFreq::Constant,
|
||||
loc: InterpLoc::Default,
|
||||
offset: SrcRef::Zero.into(),
|
||||
});
|
||||
}
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
}
|
||||
ShaderIoInfo::Vtg(io) => {
|
||||
let base = u16::try_from(intrin.base()).unwrap();
|
||||
let range = u16::try_from(intrin.base()).unwrap();
|
||||
let comp = u16::try_from(intrin.component()).unwrap();
|
||||
|
||||
let (addr, offset) = match offset_as_u32 {
|
||||
Some(imm) => {
|
||||
let imm = u16::try_from(imm).unwrap();
|
||||
let addr = base + imm + 4 * comp;
|
||||
io.mark_attrs_read(
|
||||
addr..(addr + 4 * u16::from(comps)),
|
||||
);
|
||||
(addr, Src::new_zero())
|
||||
if patch {
|
||||
match &mut self.info.stage {
|
||||
ShaderStageInfo::TessellationInit(stage) => {
|
||||
stage.per_patch_attribute_count = max(
|
||||
stage.per_patch_attribute_count,
|
||||
(range.end / 4).try_into().unwrap(),
|
||||
);
|
||||
}
|
||||
ShaderStageInfo::Tessellation => (),
|
||||
_ => panic!("Patch I/O not supported"),
|
||||
}
|
||||
None => {
|
||||
io.mark_attrs_read(base..(base + range));
|
||||
(base + 4 * comp, offset)
|
||||
} else {
|
||||
if output {
|
||||
if store_data.is_none() {
|
||||
io.mark_store_req(range.clone());
|
||||
}
|
||||
io.mark_attrs_written(range);
|
||||
} else {
|
||||
io.mark_attrs_read(range);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let access = AttrAccess {
|
||||
addr: addr,
|
||||
comps: comps,
|
||||
patch: false,
|
||||
output: false,
|
||||
patch: patch,
|
||||
output: output,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
b.push_op(OpALd {
|
||||
dst: dst.into(),
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
access: access,
|
||||
});
|
||||
if let Some(data) = store_data {
|
||||
b.push_op(OpASt {
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
data: data,
|
||||
access: access,
|
||||
});
|
||||
} else {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, comps);
|
||||
b.push_op(OpALd {
|
||||
dst: dst.into(),
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
access: access,
|
||||
});
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_load_interpolated_input => {
|
||||
let bary =
|
||||
|
|
@ -1717,70 +1807,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
access: access,
|
||||
});
|
||||
}
|
||||
nir_intrinsic_store_output => {
|
||||
assert!(intrin.get_src(0).bit_size() == 32);
|
||||
let comps = intrin.num_components;
|
||||
|
||||
let data = self.get_src(&srcs[0]);
|
||||
let vtx = Src::new_zero();
|
||||
let offset = self.get_src(&srcs[1]);
|
||||
let offset_as_u32 = srcs[1].as_uint();
|
||||
|
||||
match &mut self.info.io {
|
||||
ShaderIoInfo::None => {
|
||||
panic!("Stage does not support load_input")
|
||||
}
|
||||
ShaderIoInfo::Fragment(io) => {
|
||||
/* We assume these only ever happen in the last block.
|
||||
* This is ensured by nir_lower_io_to_temporaries()
|
||||
*/
|
||||
let data = *self.get_src(&srcs[0]).as_ssa().unwrap();
|
||||
assert!(offset_as_u32 == Some(0));
|
||||
let base: u8 = intrin.base().try_into().unwrap();
|
||||
let comp: u8 = intrin.component().try_into().unwrap();
|
||||
assert!(base % 4 == 0);
|
||||
let out_idx = usize::from((base / 4) + comp);
|
||||
for c in 0..usize::from(comps) {
|
||||
self.fs_out_regs[out_idx + c] = data[c];
|
||||
}
|
||||
}
|
||||
ShaderIoInfo::Vtg(io) => {
|
||||
let base = u16::try_from(intrin.base()).unwrap();
|
||||
let range = u16::try_from(intrin.base()).unwrap();
|
||||
let comp = u16::try_from(intrin.component()).unwrap();
|
||||
|
||||
let (addr, offset) = match offset_as_u32 {
|
||||
Some(imm) => {
|
||||
let imm = u16::try_from(imm).unwrap();
|
||||
let addr = base + imm + 4 * comp;
|
||||
io.mark_attrs_written(
|
||||
addr..(addr + 4 * u16::from(comps)),
|
||||
);
|
||||
(addr, Src::new_zero())
|
||||
}
|
||||
None => {
|
||||
io.mark_attrs_written(base..(base + range));
|
||||
(base + 4 * comp, offset)
|
||||
}
|
||||
};
|
||||
|
||||
let access = AttrAccess {
|
||||
addr: addr,
|
||||
comps: comps,
|
||||
patch: false,
|
||||
output: true,
|
||||
flags: 0,
|
||||
};
|
||||
|
||||
b.push_op(OpASt {
|
||||
vtx: vtx,
|
||||
offset: offset,
|
||||
data: data,
|
||||
access: access,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
nir_intrinsic_store_scratch => {
|
||||
let data = self.get_src(&srcs[0]);
|
||||
let size_B =
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use crate::nak_cfg::CFG;
|
|||
use crate::nak_sph::{OutputTopology, PixelImap};
|
||||
use crate::{GetDebugFlags, DEBUG};
|
||||
use nak_ir_proc::*;
|
||||
use std::cmp::{max, min};
|
||||
use std::fmt;
|
||||
use std::iter::Zip;
|
||||
use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
|
||||
|
|
@ -4735,7 +4736,7 @@ impl Default for GeometryShaderInfo {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug)]
|
||||
pub struct TessellationInitShaderInfo {
|
||||
pub per_patch_attribute_count: u8,
|
||||
pub threads_per_patch: u8,
|
||||
|
|
@ -4804,6 +4805,13 @@ impl VtgIoInfo {
|
|||
pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
|
||||
self.mark_attrs(addrs, true);
|
||||
}
|
||||
|
||||
pub fn mark_store_req(&mut self, addrs: Range<u16>) {
|
||||
let start = (addrs.start / 4).try_into().unwrap();
|
||||
let end = ((addrs.end - 1) / 4).try_into().unwrap();
|
||||
self.store_req_start = min(self.store_req_start, start);
|
||||
self.store_req_end = max(self.store_req_end, end);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue