nak: Rework FS outputs again

This time we map them to a consistent output address space like we do
for all other I/O and system values and do the remap in nak_from_nir.
This lets us know very precise usage information and more robustly build
the OMask in the shader header.  We also handle location_frac now.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand 2023-09-14 12:37:41 -05:00 committed by Marge Bot
parent 7ce8470bd4
commit 9eeda9dd6d
5 changed files with 113 additions and 63 deletions

View file

@ -25,7 +25,7 @@ mod nak_to_cssa;
mod nir;
mod util;
use crate::nak_ir::ShaderInfo;
use crate::nak_ir::{ShaderInfo, ShaderStageInfo};
use crate::nir::NirShader;
use bitview::*;
@ -234,8 +234,8 @@ fn encode_hdr_for_nir(
_ => panic!("Unknown shader stage"),
};
cw0.set_field(10..14, shader_type);
if nir.info.stage() == MESA_SHADER_FRAGMENT {
cw0.set_bit(14, nir.num_outputs > 1);
if let ShaderStageInfo::Fragment(fs_info) = &shader_info.stage {
cw0.set_bit(14, fs_info.writes_color > 0xf);
let info_fs = unsafe { &nir.info.__bindgen_anon_1.fs };
let zs_self_dep = fs_key.map_or(false, |key| key.zs_self_dep);
cw0.set_bit(15, info_fs.uses_discard() || zs_self_dep);
@ -401,25 +401,18 @@ fn encode_hdr_for_nir(
/* [480, 559]: ImapFixedFncTexture[10] */
/* [560, 575]: ImapReserved */
/* [576, 607]: OmapTarget[8] */
let mut omap_color = hdr_view.subset_mut(576..608);
let ShaderStageInfo::Fragment(fs_info) = &shader_info.stage else {
panic!("Not a fragment shader");
};
let nir_ow = BitView::new(&nir.info.outputs_written);
let output0 = usize::try_from(FRAG_RESULT_DATA0).unwrap();
for i in 0..8 {
if nir_ow.get_bit(output0 + i) {
omap_color.set_field((i * 4)..(i * 4 + 4), 0xf_u32);
}
}
/* [576, 607]: OmapTarget[8] */
hdr_view.set_field(576..608, fs_info.writes_color);
/* [608]: OmapSampleMask */
let has_sample_mask =
nir_ow.get_bit(FRAG_RESULT_SAMPLE_MASK.try_into().unwrap());
hdr_view.set_bit(608, has_sample_mask);
hdr_view.set_bit(608, fs_info.writes_sample_mask);
/* [609]: OmapDepth */
let has_depth = nir_ow.get_bit(FRAG_RESULT_DEPTH.try_into().unwrap());
hdr_view.set_bit(609, has_depth);
hdr_view.set_bit(609, fs_info.writes_depth);
/* [610, 639]: Reserved */
}

View file

@ -21,6 +21,13 @@ fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
sm: sm,
num_gprs: 0,
tls_size: nir.scratch_size,
stage: match nir.info.stage() {
MESA_SHADER_COMPUTE => ShaderStageInfo::Compute,
MESA_SHADER_FRAGMENT => {
ShaderStageInfo::Fragment(Default::default())
}
_ => panic!("Unknown shader stage"),
},
}
}
@ -64,7 +71,7 @@ struct ShaderFromNir<'a> {
nir: &'a nir_shader,
info: ShaderInfo,
cfg: CFGBuilder<u32, BasicBlock>,
fs_out_regs: Vec<Src>,
fs_out_regs: [SSAValue; 34],
end_block_id: u32,
ssa_map: HashMap<u32, Vec<SSAValue>>,
saturated: HashSet<*const nir_def>,
@ -72,17 +79,11 @@ struct ShaderFromNir<'a> {
impl<'a> ShaderFromNir<'a> {
fn new(nir: &'a nir_shader, sm: u8) -> Self {
let mut fs_out_regs = Vec::new();
if nir.info.stage() == MESA_SHADER_FRAGMENT {
fs_out_regs
.resize(nir.num_outputs.try_into().unwrap(), Src::new_zero());
}
Self {
nir: nir,
info: init_info_from_nir(nir, sm),
cfg: CFGBuilder::new(),
fs_out_regs: fs_out_regs,
fs_out_regs: [SSAValue::NONE; 34],
end_block_id: 0,
ssa_map: HashMap::new(),
saturated: HashSet::new(),
@ -1483,10 +1484,10 @@ impl<'a> ShaderFromNir<'a> {
*/
let data = *self.get_src(&srcs[0]).as_ssa().unwrap();
assert!(srcs[1].is_zero());
let base: u8 = intrin.base().try_into().unwrap();
for c in 0..intrin.num_components {
self.fs_out_regs[usize::from(base + c)] =
data[usize::from(c)].into();
let base: usize = intrin.base().try_into().unwrap();
assert!(base % 4 == 0);
for c in 0..usize::from(intrin.num_components) {
self.fs_out_regs[(base / 4) + c] = data[c];
}
} else {
let data = self.get_src(&srcs[0]);
@ -1603,6 +1604,48 @@ impl<'a> ShaderFromNir<'a> {
self.set_ssa(&undef.def, dst);
}
fn store_fs_outputs(&mut self, b: &mut impl SSABuilder) {
let ShaderStageInfo::Fragment(info) = &mut self.info.stage else {
return;
};
for i in 0..32 {
// Assume that colors have to come a vec4 at a time
if !self.fs_out_regs[i].is_none() {
info.writes_color |= 0xf << (i & !3)
}
}
let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize;
info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none();
let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize;
info.writes_depth = !self.fs_out_regs[depth_idx].is_none();
let mut srcs = Vec::new();
for i in 0..32 {
if info.writes_color & (1 << i) != 0 {
if self.fs_out_regs[i].is_none() {
srcs.push(Src::new_zero());
} else {
srcs.push(self.fs_out_regs[i].into());
}
}
}
// These always come together for some reason
if info.writes_sample_mask || info.writes_depth {
if info.writes_sample_mask {
srcs.push(self.fs_out_regs[mask_idx].into());
} else {
srcs.push(Src::new_zero());
}
if info.writes_depth {
srcs.push(self.fs_out_regs[depth_idx].into());
}
}
b.push_op(OpFSOut { srcs: srcs });
}
fn parse_block<'b>(
&mut self,
ssa_alloc: &mut SSAValueAllocator,
@ -1707,14 +1750,7 @@ impl<'a> ShaderFromNir<'a> {
assert!(succ[1].is_none());
let s0 = succ[0].unwrap();
if s0.index == self.end_block_id {
if self.nir.info.stage() == MESA_SHADER_FRAGMENT {
b.push_op(OpFSOut {
srcs: std::mem::replace(
&mut self.fs_out_regs,
Vec::new(),
),
});
}
self.store_fs_outputs(&mut b);
b.push_op(OpExit {});
} else {
self.cfg.add_edge(nb.index, s0.index);

View file

@ -4330,11 +4330,25 @@ impl fmt::Display for Function {
}
}
#[derive(Debug, Default)]
pub struct FragmentShaderInfo {
pub writes_color: u32,
pub writes_sample_mask: bool,
pub writes_depth: bool,
}
#[derive(Debug)]
pub enum ShaderStageInfo {
Compute,
Fragment(FragmentShaderInfo),
}
#[derive(Debug)]
pub struct ShaderInfo {
pub sm: u8,
pub num_gprs: u8,
pub tls_size: u32,
pub stage: ShaderStageInfo,
}
pub struct Shader {

View file

@ -369,7 +369,7 @@ static int
fs_out_size(const struct glsl_type *type, bool bindless)
{
assert(glsl_type_is_vector_or_scalar(type));
return 4;
return 16;
}
static bool
@ -380,43 +380,35 @@ nak_nir_lower_fs_outputs(nir_shader *nir)
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
nir_foreach_shader_out_variable(var, nir) {
if (var->data.index > 0) {
assert(var->data.location == FRAG_RESULT_DATA0);
assert(!(nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA1)));
var->data.location = FRAG_RESULT_DATA1;
nir->info.outputs_written |= BITFIELD_BIT(FRAG_RESULT_DATA1);
}
}
const uint32_t color_targets =
(nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) ?
1 : (nir->info.outputs_written >> FRAG_RESULT_DATA0);
const bool writes_depth =
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DEPTH);
const bool writes_sample_mask =
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_SAMPLE_MASK);
nir->num_outputs = util_bitcount(color_targets) * 4 +
(writes_depth || writes_sample_mask) * 2;
nir->num_outputs = 0;
nir_foreach_shader_out_variable(var, nir) {
assert(nir->info.outputs_written & BITFIELD_BIT(var->data.location));
switch (var->data.location) {
case FRAG_RESULT_DEPTH:
var->data.driver_location = util_bitcount(color_targets) * 4 + 1;
assert(var->data.index == 0);
assert(var->data.location_frac == 0);
var->data.driver_location = NAK_FS_OUT_DEPTH;
break;
case FRAG_RESULT_STENCIL:
unreachable("EXT_shader_stencil_export not supported");
break;
case FRAG_RESULT_COLOR:
var->data.driver_location = 0;
assert(var->data.index == 0);
var->data.driver_location =
NAK_FS_OUT_COLOR0 + var->data.location_frac * 4;
break;
case FRAG_RESULT_SAMPLE_MASK:
var->data.driver_location = util_bitcount(color_targets) * 4;
assert(var->data.index == 0);
assert(var->data.location_frac == 0);
var->data.driver_location = NAK_FS_OUT_SAMPLE_MASK;
break;
default: {
assert(var->data.location >= FRAG_RESULT_DATA0);
const unsigned out = var->data.location - FRAG_RESULT_DATA0;
assert(var->data.index < 2);
const unsigned out =
(var->data.location - FRAG_RESULT_DATA0) + var->data.index;
var->data.driver_location =
util_bitcount(color_targets & BITFIELD_MASK(out)) * 4;
NAK_FS_OUT_COLOR(out) + var->data.location_frac * 4;
break;
}
}

View file

@ -54,6 +54,21 @@ struct nak_nir_tex_flags {
bool nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak);
enum nak_fs_out {
NAK_FS_OUT_COLOR0 = 0x00,
NAK_FS_OUT_COLOR1 = 0x10,
NAK_FS_OUT_COLOR2 = 0x20,
NAK_FS_OUT_COLOR3 = 0x30,
NAK_FS_OUT_COLOR4 = 0x40,
NAK_FS_OUT_COLOR5 = 0x50,
NAK_FS_OUT_COLOR6 = 0x60,
NAK_FS_OUT_COLOR7 = 0x70,
NAK_FS_OUT_SAMPLE_MASK = 0x80,
NAK_FS_OUT_DEPTH = 0x84,
};
#define NAK_FS_OUT_COLOR(n) (NAK_FS_OUT_COLOR0 + (n) * 16)
#ifdef __cplusplus
}
#endif