mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-02 13:50:09 +01:00
nak: Make barriers SSA-friendly
The NIR intrinsics now take and return a barrier whenever one is modified instead of modifying in-place. In NAK, we give the internal instructions the same treatment and convert everything to use barrier SSA values and RegRefs. In nak_from_nir, we move all barriers to/from GPRs. We'll clean up the massive pile of OpBMov later. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26463>
This commit is contained in:
parent
71813afa7a
commit
eda940c855
5 changed files with 77 additions and 133 deletions
|
|
@ -2036,8 +2036,9 @@ intrinsic("end_primitive_nv", dest_comp=1, src_comp=[1], indices=[STREAM_ID])
|
|||
intrinsic("final_primitive_nv", src_comp=[1])
|
||||
|
||||
intrinsic("bar_set_nv", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
intrinsic("bar_break_nv", src_comp=[1])
|
||||
intrinsic("bar_sync_nv", src_comp=[1])
|
||||
intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
|
||||
# src[] = { bar, bar_set }
|
||||
intrinsic("bar_sync_nv", src_comp=[1, 1])
|
||||
|
||||
# In order to deal with flipped render targets, gl_PointCoord may be flipped
|
||||
# in the shader requiring a shader key or extra instructions or it may be
|
||||
|
|
|
|||
|
|
@ -1692,7 +1692,7 @@ impl SM70Instr {
|
|||
self.set_opcode(0x355);
|
||||
|
||||
self.set_dst(Dst::None);
|
||||
self.set_field(24..28, op.dst.idx());
|
||||
self.set_bar_dst(24..28, op.dst);
|
||||
|
||||
self.set_bit(84, true); // .CLEAR
|
||||
}
|
||||
|
|
@ -1717,7 +1717,8 @@ impl SM70Instr {
|
|||
|
||||
fn encode_break(&mut self, op: &OpBreak) {
|
||||
self.set_opcode(0x942);
|
||||
self.set_field(16..20, op.bar.idx());
|
||||
assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
|
||||
self.set_bar_dst(16..20, op.bar_out);
|
||||
self.set_pred_src(87..90, 90, op.cond);
|
||||
}
|
||||
|
||||
|
|
@ -1728,14 +1729,15 @@ impl SM70Instr {
|
|||
labels: &HashMap<Label, usize>,
|
||||
) {
|
||||
self.set_opcode(0x945);
|
||||
self.set_field(16..20, op.bar.idx());
|
||||
assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
|
||||
self.set_bar_dst(16..20, op.bar_out);
|
||||
self.set_rel_offset(34..64, &op.target, ip, labels);
|
||||
self.set_pred_src(87..90, 90, op.cond);
|
||||
}
|
||||
|
||||
fn encode_bsync(&mut self, op: &OpBSync) {
|
||||
self.set_opcode(0x941);
|
||||
self.set_field(16..20, op.bar.idx());
|
||||
self.set_bar_src(16..20, op.bar);
|
||||
self.set_pred_src(87..90, 90, op.cond);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -144,53 +144,13 @@ impl<'a> PhiAllocMap<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
struct BarAlloc {
|
||||
used: u16,
|
||||
num_bars: u8,
|
||||
}
|
||||
|
||||
impl BarAlloc {
|
||||
pub fn new() -> BarAlloc {
|
||||
BarAlloc {
|
||||
used: 0,
|
||||
num_bars: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn num_bars(&self) -> u8 {
|
||||
self.num_bars
|
||||
}
|
||||
|
||||
pub fn reserve(&mut self, idx: u8) {
|
||||
self.num_bars = max(self.num_bars, idx + 1);
|
||||
let bit = 1 << idx;
|
||||
assert!(self.used & bit == 0);
|
||||
self.used |= bit;
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self) -> BarRef {
|
||||
let idx = self.used.trailing_ones();
|
||||
assert!(idx < 16);
|
||||
let idx = idx as u8;
|
||||
self.reserve(idx);
|
||||
BarRef::new(idx)
|
||||
}
|
||||
|
||||
pub fn free(&mut self, bar: BarRef) {
|
||||
let bit = 1 << bar.idx();
|
||||
assert!(self.used & bit != 0);
|
||||
self.used &= !bit;
|
||||
}
|
||||
}
|
||||
|
||||
struct ShaderFromNir<'a> {
|
||||
nir: &'a nir_shader,
|
||||
info: ShaderInfo,
|
||||
cfg: CFGBuilder<u32, BasicBlock>,
|
||||
label_alloc: LabelAllocator,
|
||||
block_label: HashMap<u32, Label>,
|
||||
bar_alloc: BarAlloc,
|
||||
bar_ref_label: HashMap<u32, (BarRef, Label)>,
|
||||
bar_label: HashMap<u32, Label>,
|
||||
fs_out_regs: [SSAValue; 34],
|
||||
end_block_id: u32,
|
||||
ssa_map: HashMap<u32, Vec<SSAValue>>,
|
||||
|
|
@ -205,8 +165,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
cfg: CFGBuilder::new(),
|
||||
label_alloc: LabelAllocator::new(),
|
||||
block_label: HashMap::new(),
|
||||
bar_alloc: BarAlloc::new(),
|
||||
bar_ref_label: HashMap::new(),
|
||||
bar_label: HashMap::new(),
|
||||
fs_out_regs: [SSAValue::NONE; 34],
|
||||
end_block_id: 0,
|
||||
ssa_map: HashMap::new(),
|
||||
|
|
@ -1614,50 +1573,53 @@ impl<'a> ShaderFromNir<'a> {
|
|||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_bar_break_nv => {
|
||||
let idx = &srcs[0].as_def().index;
|
||||
let (bar, _) = self.bar_ref_label.get(idx).unwrap();
|
||||
let src = self.get_src(&srcs[0]);
|
||||
let bar_in = b.bmov_to_bar(src);
|
||||
|
||||
let brk = b.push_op(OpBreak {
|
||||
bar: *bar,
|
||||
let bar_out = b.alloc_ssa(RegFile::Bar, 1);
|
||||
b.push_op(OpBreak {
|
||||
bar_out: bar_out.into(),
|
||||
bar_in: bar_in.into(),
|
||||
cond: SrcRef::True.into(),
|
||||
});
|
||||
brk.deps.yld = true;
|
||||
|
||||
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
|
||||
}
|
||||
nir_intrinsic_bar_set_nv => {
|
||||
let label = self.label_alloc.alloc();
|
||||
let bar = self.bar_alloc.alloc();
|
||||
let old = self.bar_label.insert(intrin.def.index, label);
|
||||
assert!(old.is_none());
|
||||
|
||||
let bmov = b.push_op(OpBClear {
|
||||
dst: bar,
|
||||
let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
|
||||
b.push_op(OpBClear {
|
||||
dst: bar_clear.into(),
|
||||
});
|
||||
bmov.deps.yld = true;
|
||||
|
||||
let bssy = b.push_op(OpBSSy {
|
||||
bar: bar,
|
||||
let bar_out = b.alloc_ssa(RegFile::Bar, 1);
|
||||
b.push_op(OpBSSy {
|
||||
bar_out: bar_out.into(),
|
||||
bar_in: bar_clear.into(),
|
||||
cond: SrcRef::True.into(),
|
||||
target: label,
|
||||
});
|
||||
bssy.deps.yld = true;
|
||||
|
||||
let old =
|
||||
self.bar_ref_label.insert(intrin.def.index, (bar, label));
|
||||
assert!(old.is_none());
|
||||
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
|
||||
}
|
||||
nir_intrinsic_bar_sync_nv => {
|
||||
let idx = &srcs[0].as_def().index;
|
||||
let (bar, label) = self.bar_ref_label.get(idx).unwrap();
|
||||
let src = self.get_src(&srcs[0]);
|
||||
|
||||
let bsync = b.push_op(OpBSync {
|
||||
bar: *bar,
|
||||
let bar = b.bmov_to_bar(src);
|
||||
b.push_op(OpBSync {
|
||||
bar: bar.into(),
|
||||
cond: SrcRef::True.into(),
|
||||
});
|
||||
bsync.deps.yld = true;
|
||||
|
||||
self.bar_alloc.free(*bar);
|
||||
|
||||
b.push_op(OpNop {
|
||||
label: Some(*label),
|
||||
});
|
||||
let bar_set_idx = &srcs[1].as_def().index;
|
||||
if let Some(label) = self.bar_label.get(bar_set_idx) {
|
||||
b.push_op(OpNop {
|
||||
label: Some(*label),
|
||||
});
|
||||
}
|
||||
}
|
||||
nir_intrinsic_bindless_image_atomic
|
||||
| nir_intrinsic_bindless_image_atomic_swap => {
|
||||
|
|
@ -2126,6 +2088,9 @@ impl<'a> ShaderFromNir<'a> {
|
|||
SCOPE_NONE => (),
|
||||
SCOPE_WORKGROUP => {
|
||||
if self.nir.info.stage() == MESA_SHADER_COMPUTE {
|
||||
// OpBar needs num_barriers > 0 but, as far as we
|
||||
// know, it doesn't actually use a barrier.
|
||||
self.info.num_barriers = 1;
|
||||
b.push_op(OpBar {});
|
||||
b.push_op(OpNop { label: None });
|
||||
}
|
||||
|
|
@ -2538,27 +2503,24 @@ impl<'a> ShaderFromNir<'a> {
|
|||
// memory. Perhaps this is to ensure that our allocation is
|
||||
// actually available and not in use by another thread?
|
||||
let label = self.label_alloc.alloc();
|
||||
let bar = self.bar_alloc.alloc();
|
||||
let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
|
||||
|
||||
let bmov = b.push_op(OpBClear {
|
||||
dst: bar,
|
||||
b.push_op(OpBClear {
|
||||
dst: bar_clear.into(),
|
||||
});
|
||||
bmov.deps.yld = true;
|
||||
|
||||
let bssy = b.push_op(OpBSSy {
|
||||
bar: bar,
|
||||
let bar = b.alloc_ssa(RegFile::Bar, 1);
|
||||
b.push_op(OpBSSy {
|
||||
bar_out: bar.into(),
|
||||
bar_in: bar_clear.into(),
|
||||
cond: SrcRef::True.into(),
|
||||
target: label,
|
||||
});
|
||||
bssy.deps.yld = true;
|
||||
|
||||
let bsync = b.push_op(OpBSync {
|
||||
bar: bar,
|
||||
b.push_op(OpBSync {
|
||||
bar: bar.into(),
|
||||
cond: SrcRef::True.into(),
|
||||
});
|
||||
bsync.deps.yld = true;
|
||||
|
||||
self.bar_alloc.free(bar);
|
||||
|
||||
b.push_op(OpNop { label: Some(label) });
|
||||
}
|
||||
|
|
@ -2743,15 +2705,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
|
||||
pub fn parse_shader(mut self) -> Shader {
|
||||
if self.nir.info.stage() == MESA_SHADER_COMPUTE
|
||||
&& self.nir.info.uses_control_barrier()
|
||||
{
|
||||
// We know OpBar uses a barrier but we don't know which one. Assume
|
||||
// it implicitly uses B0 and reserve it so it doesn't stomp any
|
||||
// other barriers
|
||||
self.bar_alloc.reserve(0);
|
||||
}
|
||||
|
||||
let mut functions = Vec::new();
|
||||
for nf in self.nir.iter_functions() {
|
||||
if let Some(nfi) = nf.get_impl() {
|
||||
|
|
@ -2760,8 +2713,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
self.info.num_barriers = self.bar_alloc.num_bars();
|
||||
|
||||
// Tessellation evaluation shaders MUST claim to read gl_TessCoord or
|
||||
// the hardware will throw an SPH error.
|
||||
match &self.info.stage {
|
||||
|
|
|
|||
|
|
@ -642,28 +642,6 @@ impl fmt::Display for RegRef {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub struct BarRef {
|
||||
idx: u8,
|
||||
}
|
||||
|
||||
impl BarRef {
|
||||
pub fn new(idx: u8) -> BarRef {
|
||||
assert!(idx < 16);
|
||||
BarRef { idx: idx }
|
||||
}
|
||||
|
||||
pub fn idx(&self) -> u8 {
|
||||
self.idx
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for BarRef {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "B{}", self.idx())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Dst {
|
||||
None,
|
||||
|
|
@ -3633,7 +3611,7 @@ impl_display_for_op!(OpMemBar);
|
|||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBClear {
|
||||
pub dst: BarRef,
|
||||
pub dst: Dst,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpBClear {
|
||||
|
|
@ -3665,7 +3643,10 @@ impl_display_for_op!(OpBMov);
|
|||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBreak {
|
||||
pub bar: BarRef,
|
||||
pub bar_out: Dst,
|
||||
|
||||
#[src_type(Bar)]
|
||||
pub bar_in: Src,
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub cond: Src,
|
||||
|
|
@ -3673,7 +3654,7 @@ pub struct OpBreak {
|
|||
|
||||
impl DisplayOp for OpBreak {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "break {} {}", self.cond, self.bar)
|
||||
write!(f, "break {} {}", self.bar_in, self.cond)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpBreak);
|
||||
|
|
@ -3681,7 +3662,10 @@ impl_display_for_op!(OpBreak);
|
|||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBSSy {
|
||||
pub bar: BarRef,
|
||||
pub bar_out: Dst,
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub bar_in: Src,
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub cond: Src,
|
||||
|
|
@ -3691,7 +3675,7 @@ pub struct OpBSSy {
|
|||
|
||||
impl DisplayOp for OpBSSy {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "bssy {} {} {}", self.cond, self.bar, self.target)
|
||||
write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpBSSy);
|
||||
|
|
@ -3699,7 +3683,8 @@ impl_display_for_op!(OpBSSy);
|
|||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBSync {
|
||||
pub bar: BarRef,
|
||||
#[src_type(Bar)]
|
||||
pub bar: Src,
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub cond: Src,
|
||||
|
|
@ -3707,7 +3692,7 @@ pub struct OpBSync {
|
|||
|
||||
impl DisplayOp for OpBSync {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "bsync {} {}", self.cond, self.bar)
|
||||
write!(f, "bsync {} {}", self.bar, self.cond)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpBSync);
|
||||
|
|
@ -4704,9 +4689,6 @@ impl Instr {
|
|||
| Op::MemBar(_)
|
||||
| Op::Kill(_)
|
||||
| Op::Nop(_)
|
||||
| Op::BClear(_)
|
||||
| Op::Break(_)
|
||||
| Op::BSSy(_)
|
||||
| Op::BSync(_)
|
||||
| Op::Bra(_)
|
||||
| Op::Exit(_)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@
|
|||
|
||||
struct barrier {
|
||||
nir_cf_node *node;
|
||||
nir_def *bar;
|
||||
nir_def *bar_set;
|
||||
nir_def *bar_reg;
|
||||
};
|
||||
|
||||
struct add_barriers_state {
|
||||
|
|
@ -30,13 +31,16 @@ add_bar_cf_node(nir_cf_node *node, struct add_barriers_state *state)
|
|||
|
||||
b->cursor = nir_after_block(before);
|
||||
nir_def *bar = nir_bar_set_nv(b);
|
||||
nir_def *bar_reg = nir_decl_reg(b, 1, 32, 0);
|
||||
nir_store_reg(b, bar, bar_reg);
|
||||
|
||||
b->cursor = nir_before_block_after_phis(after);
|
||||
nir_bar_sync_nv(b, bar);
|
||||
nir_bar_sync_nv(b, nir_load_reg(b, bar_reg), bar);
|
||||
|
||||
struct barrier barrier = {
|
||||
.node = node,
|
||||
.bar = bar,
|
||||
.bar_set = bar,
|
||||
.bar_reg = bar_reg,
|
||||
};
|
||||
util_dynarray_append(&state->barriers, struct barrier, barrier);
|
||||
|
||||
|
|
@ -72,7 +76,9 @@ break_loop_bars(nir_block *block, struct add_barriers_state *state)
|
|||
const struct barrier *bar =
|
||||
util_dynarray_element(&state->barriers, struct barrier, idx);
|
||||
if (bar->node == p) {
|
||||
nir_bar_break_nv(b, bar->bar);
|
||||
nir_def *bar_val = nir_load_reg(b, bar->bar_reg);
|
||||
bar_val = nir_bar_break_nv(b, bar_val);
|
||||
nir_store_reg(b, bar_val, bar->bar_reg);
|
||||
idx--;
|
||||
}
|
||||
}
|
||||
|
|
@ -281,6 +287,8 @@ nak_nir_add_barriers_impl(nir_function_impl *impl,
|
|||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance |
|
||||
nir_metadata_loop_analysis);
|
||||
|
||||
nir_lower_reg_intrinsics_to_ssa_impl(impl);
|
||||
} else {
|
||||
nir_metadata_preserve(impl, nir_metadata_all);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue