nak: Make barriers SSA-friendly

The NIR intrinsics now take and return a barrier whenever one is
modified instead of modifying in-place.  In NAK, we give the internal
instructions the same treatment and convert everything to use barrier
SSA values and RegRefs.  In nak_from_nir, we move all barriers to/from
GPRs.  We'll clean up the massive pile of OpBMov later.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26463>
This commit is contained in:
Faith Ekstrand 2023-12-04 09:03:14 -06:00 committed by Marge Bot
parent 71813afa7a
commit eda940c855
5 changed files with 77 additions and 133 deletions

View file

@ -2036,8 +2036,9 @@ intrinsic("end_primitive_nv", dest_comp=1, src_comp=[1], indices=[STREAM_ID])
intrinsic("final_primitive_nv", src_comp=[1])
intrinsic("bar_set_nv", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("bar_break_nv", src_comp=[1])
intrinsic("bar_sync_nv", src_comp=[1])
intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
# src[] = { bar, bar_set }
intrinsic("bar_sync_nv", src_comp=[1, 1])
# In order to deal with flipped render targets, gl_PointCoord may be flipped
# in the shader requiring a shader key or extra instructions or it may be

View file

@ -1692,7 +1692,7 @@ impl SM70Instr {
self.set_opcode(0x355);
self.set_dst(Dst::None);
self.set_field(24..28, op.dst.idx());
self.set_bar_dst(24..28, op.dst);
self.set_bit(84, true); // .CLEAR
}
@ -1717,7 +1717,8 @@ impl SM70Instr {
fn encode_break(&mut self, op: &OpBreak) {
self.set_opcode(0x942);
self.set_field(16..20, op.bar.idx());
assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
self.set_bar_dst(16..20, op.bar_out);
self.set_pred_src(87..90, 90, op.cond);
}
@ -1728,14 +1729,15 @@ impl SM70Instr {
labels: &HashMap<Label, usize>,
) {
self.set_opcode(0x945);
self.set_field(16..20, op.bar.idx());
assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
self.set_bar_dst(16..20, op.bar_out);
self.set_rel_offset(34..64, &op.target, ip, labels);
self.set_pred_src(87..90, 90, op.cond);
}
fn encode_bsync(&mut self, op: &OpBSync) {
self.set_opcode(0x941);
self.set_field(16..20, op.bar.idx());
self.set_bar_src(16..20, op.bar);
self.set_pred_src(87..90, 90, op.cond);
}

View file

@ -144,53 +144,13 @@ impl<'a> PhiAllocMap<'a> {
}
}
struct BarAlloc {
used: u16,
num_bars: u8,
}
impl BarAlloc {
pub fn new() -> BarAlloc {
BarAlloc {
used: 0,
num_bars: 0,
}
}
pub fn num_bars(&self) -> u8 {
self.num_bars
}
pub fn reserve(&mut self, idx: u8) {
self.num_bars = max(self.num_bars, idx + 1);
let bit = 1 << idx;
assert!(self.used & bit == 0);
self.used |= bit;
}
pub fn alloc(&mut self) -> BarRef {
let idx = self.used.trailing_ones();
assert!(idx < 16);
let idx = idx as u8;
self.reserve(idx);
BarRef::new(idx)
}
pub fn free(&mut self, bar: BarRef) {
let bit = 1 << bar.idx();
assert!(self.used & bit != 0);
self.used &= !bit;
}
}
struct ShaderFromNir<'a> {
nir: &'a nir_shader,
info: ShaderInfo,
cfg: CFGBuilder<u32, BasicBlock>,
label_alloc: LabelAllocator,
block_label: HashMap<u32, Label>,
bar_alloc: BarAlloc,
bar_ref_label: HashMap<u32, (BarRef, Label)>,
bar_label: HashMap<u32, Label>,
fs_out_regs: [SSAValue; 34],
end_block_id: u32,
ssa_map: HashMap<u32, Vec<SSAValue>>,
@ -205,8 +165,7 @@ impl<'a> ShaderFromNir<'a> {
cfg: CFGBuilder::new(),
label_alloc: LabelAllocator::new(),
block_label: HashMap::new(),
bar_alloc: BarAlloc::new(),
bar_ref_label: HashMap::new(),
bar_label: HashMap::new(),
fs_out_regs: [SSAValue::NONE; 34],
end_block_id: 0,
ssa_map: HashMap::new(),
@ -1614,50 +1573,53 @@ impl<'a> ShaderFromNir<'a> {
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_bar_break_nv => {
let idx = &srcs[0].as_def().index;
let (bar, _) = self.bar_ref_label.get(idx).unwrap();
let src = self.get_src(&srcs[0]);
let bar_in = b.bmov_to_bar(src);
let brk = b.push_op(OpBreak {
bar: *bar,
let bar_out = b.alloc_ssa(RegFile::Bar, 1);
b.push_op(OpBreak {
bar_out: bar_out.into(),
bar_in: bar_in.into(),
cond: SrcRef::True.into(),
});
brk.deps.yld = true;
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
}
nir_intrinsic_bar_set_nv => {
let label = self.label_alloc.alloc();
let bar = self.bar_alloc.alloc();
let old = self.bar_label.insert(intrin.def.index, label);
assert!(old.is_none());
let bmov = b.push_op(OpBClear {
dst: bar,
let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
b.push_op(OpBClear {
dst: bar_clear.into(),
});
bmov.deps.yld = true;
let bssy = b.push_op(OpBSSy {
bar: bar,
let bar_out = b.alloc_ssa(RegFile::Bar, 1);
b.push_op(OpBSSy {
bar_out: bar_out.into(),
bar_in: bar_clear.into(),
cond: SrcRef::True.into(),
target: label,
});
bssy.deps.yld = true;
let old =
self.bar_ref_label.insert(intrin.def.index, (bar, label));
assert!(old.is_none());
self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
}
nir_intrinsic_bar_sync_nv => {
let idx = &srcs[0].as_def().index;
let (bar, label) = self.bar_ref_label.get(idx).unwrap();
let src = self.get_src(&srcs[0]);
let bsync = b.push_op(OpBSync {
bar: *bar,
let bar = b.bmov_to_bar(src);
b.push_op(OpBSync {
bar: bar.into(),
cond: SrcRef::True.into(),
});
bsync.deps.yld = true;
self.bar_alloc.free(*bar);
b.push_op(OpNop {
label: Some(*label),
});
let bar_set_idx = &srcs[1].as_def().index;
if let Some(label) = self.bar_label.get(bar_set_idx) {
b.push_op(OpNop {
label: Some(*label),
});
}
}
nir_intrinsic_bindless_image_atomic
| nir_intrinsic_bindless_image_atomic_swap => {
@ -2126,6 +2088,9 @@ impl<'a> ShaderFromNir<'a> {
SCOPE_NONE => (),
SCOPE_WORKGROUP => {
if self.nir.info.stage() == MESA_SHADER_COMPUTE {
// OpBar needs num_barriers > 0 but, as far as we
// know, it doesn't actually use a barrier.
self.info.num_barriers = 1;
b.push_op(OpBar {});
b.push_op(OpNop { label: None });
}
@ -2538,27 +2503,24 @@ impl<'a> ShaderFromNir<'a> {
// memory. Perhaps this is to ensure that our allocation is
// actually available and not in use by another thread?
let label = self.label_alloc.alloc();
let bar = self.bar_alloc.alloc();
let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
let bmov = b.push_op(OpBClear {
dst: bar,
b.push_op(OpBClear {
dst: bar_clear.into(),
});
bmov.deps.yld = true;
let bssy = b.push_op(OpBSSy {
bar: bar,
let bar = b.alloc_ssa(RegFile::Bar, 1);
b.push_op(OpBSSy {
bar_out: bar.into(),
bar_in: bar_clear.into(),
cond: SrcRef::True.into(),
target: label,
});
bssy.deps.yld = true;
let bsync = b.push_op(OpBSync {
bar: bar,
b.push_op(OpBSync {
bar: bar.into(),
cond: SrcRef::True.into(),
});
bsync.deps.yld = true;
self.bar_alloc.free(bar);
b.push_op(OpNop { label: Some(label) });
}
@ -2743,15 +2705,6 @@ impl<'a> ShaderFromNir<'a> {
}
pub fn parse_shader(mut self) -> Shader {
if self.nir.info.stage() == MESA_SHADER_COMPUTE
&& self.nir.info.uses_control_barrier()
{
// We know OpBar uses a barrier but we don't know which one. Assume
// it implicitly uses B0 and reserve it so it doesn't stomp any
// other barriers
self.bar_alloc.reserve(0);
}
let mut functions = Vec::new();
for nf in self.nir.iter_functions() {
if let Some(nfi) = nf.get_impl() {
@ -2760,8 +2713,6 @@ impl<'a> ShaderFromNir<'a> {
}
}
self.info.num_barriers = self.bar_alloc.num_bars();
// Tessellation evaluation shaders MUST claim to read gl_TessCoord or
// the hardware will throw an SPH error.
match &self.info.stage {

View file

@ -642,28 +642,6 @@ impl fmt::Display for RegRef {
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct BarRef {
idx: u8,
}
impl BarRef {
pub fn new(idx: u8) -> BarRef {
assert!(idx < 16);
BarRef { idx: idx }
}
pub fn idx(&self) -> u8 {
self.idx
}
}
impl fmt::Display for BarRef {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "B{}", self.idx())
}
}
#[derive(Clone, Copy)]
pub enum Dst {
None,
@ -3633,7 +3611,7 @@ impl_display_for_op!(OpMemBar);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpBClear {
pub dst: BarRef,
pub dst: Dst,
}
impl DisplayOp for OpBClear {
@ -3665,7 +3643,10 @@ impl_display_for_op!(OpBMov);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpBreak {
pub bar: BarRef,
pub bar_out: Dst,
#[src_type(Bar)]
pub bar_in: Src,
#[src_type(Pred)]
pub cond: Src,
@ -3673,7 +3654,7 @@ pub struct OpBreak {
impl DisplayOp for OpBreak {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "break {} {}", self.cond, self.bar)
write!(f, "break {} {}", self.bar_in, self.cond)
}
}
impl_display_for_op!(OpBreak);
@ -3681,7 +3662,10 @@ impl_display_for_op!(OpBreak);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpBSSy {
pub bar: BarRef,
pub bar_out: Dst,
#[src_type(Pred)]
pub bar_in: Src,
#[src_type(Pred)]
pub cond: Src,
@ -3691,7 +3675,7 @@ pub struct OpBSSy {
impl DisplayOp for OpBSSy {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "bssy {} {} {}", self.cond, self.bar, self.target)
write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
}
}
impl_display_for_op!(OpBSSy);
@ -3699,7 +3683,8 @@ impl_display_for_op!(OpBSSy);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpBSync {
pub bar: BarRef,
#[src_type(Bar)]
pub bar: Src,
#[src_type(Pred)]
pub cond: Src,
@ -3707,7 +3692,7 @@ pub struct OpBSync {
impl DisplayOp for OpBSync {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "bsync {} {}", self.cond, self.bar)
write!(f, "bsync {} {}", self.bar, self.cond)
}
}
impl_display_for_op!(OpBSync);
@ -4704,9 +4689,6 @@ impl Instr {
| Op::MemBar(_)
| Op::Kill(_)
| Op::Nop(_)
| Op::BClear(_)
| Op::Break(_)
| Op::BSSy(_)
| Op::BSync(_)
| Op::Bra(_)
| Op::Exit(_)

View file

@ -10,7 +10,8 @@
struct barrier {
nir_cf_node *node;
nir_def *bar;
nir_def *bar_set;
nir_def *bar_reg;
};
struct add_barriers_state {
@ -30,13 +31,16 @@ add_bar_cf_node(nir_cf_node *node, struct add_barriers_state *state)
b->cursor = nir_after_block(before);
nir_def *bar = nir_bar_set_nv(b);
nir_def *bar_reg = nir_decl_reg(b, 1, 32, 0);
nir_store_reg(b, bar, bar_reg);
b->cursor = nir_before_block_after_phis(after);
nir_bar_sync_nv(b, bar);
nir_bar_sync_nv(b, nir_load_reg(b, bar_reg), bar);
struct barrier barrier = {
.node = node,
.bar = bar,
.bar_set = bar,
.bar_reg = bar_reg,
};
util_dynarray_append(&state->barriers, struct barrier, barrier);
@ -72,7 +76,9 @@ break_loop_bars(nir_block *block, struct add_barriers_state *state)
const struct barrier *bar =
util_dynarray_element(&state->barriers, struct barrier, idx);
if (bar->node == p) {
nir_bar_break_nv(b, bar->bar);
nir_def *bar_val = nir_load_reg(b, bar->bar_reg);
bar_val = nir_bar_break_nv(b, bar_val);
nir_store_reg(b, bar_val, bar->bar_reg);
idx--;
}
}
@ -281,6 +287,8 @@ nak_nir_add_barriers_impl(nir_function_impl *impl,
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance |
nir_metadata_loop_analysis);
nir_lower_reg_intrinsics_to_ssa_impl(impl);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}