ac/nir: add ac_nir_export_position

Used by last VGT stage to export position related outputs.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>
This commit is contained in:
Qiang Yu 2022-12-21 15:04:47 +08:00 committed by Marge Bot
parent e9268b2c06
commit f823581dd4
2 changed files with 166 additions and 0 deletions

View file

@ -56,6 +56,164 @@ ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim)
.write_mask = write_mask);
}
static nir_ssa_def *
get_export_output(nir_builder *b, nir_ssa_def **output)
{
nir_ssa_def *vec[4];
for (int i = 0; i < 4; i++) {
if (output[i])
vec[i] = nir_u2uN(b, output[i], 32);
else
vec[i] = nir_ssa_undef(b, 1, 32);
}
return nir_vec(b, vec, 4);
}
void
ac_nir_export_position(nir_builder *b,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool no_param_export,
uint64_t outputs_written,
nir_ssa_def *(*outputs)[4])
{
nir_intrinsic_instr *exp[4];
unsigned exp_num = 0;
nir_ssa_def *pos;
if (outputs_written & VARYING_BIT_POS) {
pos = get_export_output(b, outputs[VARYING_SLOT_POS]);
} else {
nir_ssa_def *zero = nir_imm_float(b, 0);
nir_ssa_def *one = nir_imm_float(b, 1);
pos = nir_vec4(b, zero, zero, zero, one);
}
/* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
* Setting valid_mask=1 prevents it and has no other effect.
*/
unsigned pos_flags = gfx_level == GFX10 ? AC_EXP_FLAG_VALID_MASK : 0;
exp[exp_num] = nir_export_amd(
b, pos, .base = V_008DFC_SQ_EXP_POS + exp_num,
.flags = pos_flags, .write_mask = 0xf);
exp_num++;
uint64_t mask =
VARYING_BIT_PSIZ |
VARYING_BIT_EDGE |
VARYING_BIT_LAYER |
VARYING_BIT_VIEWPORT |
VARYING_BIT_PRIMITIVE_SHADING_RATE;
/* clear output mask if no one written */
if (!outputs[VARYING_SLOT_PSIZ][0])
outputs_written &= ~VARYING_BIT_PSIZ;
if (!outputs[VARYING_SLOT_EDGE][0])
outputs_written &= ~VARYING_BIT_EDGE;
if (!outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0])
outputs_written &= ~VARYING_BIT_PRIMITIVE_SHADING_RATE;
if (!outputs[VARYING_SLOT_LAYER][0])
outputs_written &= ~VARYING_BIT_LAYER;
if (!outputs[VARYING_SLOT_VIEWPORT][0])
outputs_written &= ~VARYING_BIT_VIEWPORT;
if (outputs_written & mask) {
nir_ssa_def *zero = nir_imm_float(b, 0);
nir_ssa_def *vec[4] = { zero, zero, zero, zero };
unsigned flags = 0;
unsigned write_mask = 0;
if (outputs_written & VARYING_BIT_PSIZ) {
vec[0] = outputs[VARYING_SLOT_PSIZ][0];
write_mask |= BITFIELD_BIT(0);
}
if (outputs_written & VARYING_BIT_EDGE) {
vec[1] = nir_umin(b, outputs[VARYING_SLOT_EDGE][0], nir_imm_int(b, 1));
write_mask |= BITFIELD_BIT(1);
}
if (outputs_written & VARYING_BIT_PRIMITIVE_SHADING_RATE) {
vec[1] = nir_ior(b, vec[1], outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]);
write_mask |= BITFIELD_BIT(1);
}
if (outputs_written & VARYING_BIT_LAYER) {
vec[2] = outputs[VARYING_SLOT_LAYER][0];
write_mask |= BITFIELD_BIT(2);
}
if (outputs_written & VARYING_BIT_VIEWPORT) {
if (gfx_level >= GFX9) {
/* GFX9 has the layer in [10:0] and the viewport index in [19:16]. */
nir_ssa_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16);
vec[2] = nir_ior(b, vec[2], v);
write_mask |= BITFIELD_BIT(2);
} else {
vec[3] = outputs[VARYING_SLOT_VIEWPORT][0];
write_mask |= BITFIELD_BIT(3);
}
}
exp[exp_num] = nir_export_amd(
b, nir_vec(b, vec, 4),
.base = V_008DFC_SQ_EXP_POS + exp_num,
.flags = flags,
.write_mask = write_mask);
exp_num++;
}
for (int i = 0; i < 2; i++) {
if ((outputs_written & (VARYING_BIT_CLIP_DIST0 << i)) &&
(clip_cull_mask & BITFIELD_RANGE(i * 4, 4))) {
exp[exp_num] = nir_export_amd(
b, get_export_output(b, outputs[VARYING_SLOT_CLIP_DIST0 + i]),
.base = V_008DFC_SQ_EXP_POS + exp_num,
.write_mask = (clip_cull_mask >> (i * 4)) & 0xf);
exp_num++;
}
}
if (outputs_written & VARYING_BIT_CLIP_VERTEX) {
nir_ssa_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]);
/* Clip distance for clip vertex to each user clip plane. */
nir_ssa_def *clip_dist[8] = {0};
u_foreach_bit (i, clip_cull_mask) {
nir_ssa_def *ucp = nir_load_user_clip_plane(b, .ucp_id = i);
clip_dist[i] = nir_fdot4(b, vtx, ucp);
}
for (int i = 0; i < 2; i++) {
if (clip_cull_mask & BITFIELD_RANGE(i * 4, 4)) {
exp[exp_num] = nir_export_amd(
b, get_export_output(b, clip_dist + i * 4),
.base = V_008DFC_SQ_EXP_POS + exp_num,
.write_mask = (clip_cull_mask >> (i * 4)) & 0xf);
exp_num++;
}
}
}
/* Specify that this is the last export */
nir_intrinsic_instr *final_exp = exp[exp_num - 1];
unsigned final_exp_flags = nir_intrinsic_flags(final_exp);
nir_intrinsic_set_flags(final_exp, final_exp_flags | AC_EXP_FLAG_DONE);
/* If a shader has no param exports, rasterization can start before
* the shader finishes and thus memory stores might not finish before
* the pixel shader starts.
*/
if (gfx_level >= GFX10 && no_param_export && b->shader->info.writes_memory) {
nir_intrinsic_instr *wait_instr =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier_buffer);
nir_instr_insert_before(&final_exp->instr, &wait_instr->instr);
}
}
/**
* This function takes an I/O intrinsic like load/store_input,
* and emits a sequence that calculates the full offset of that instruction,

View file

@ -76,6 +76,14 @@ ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct a
void
ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim);
void
ac_nir_export_position(nir_builder *b,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
bool no_param_export,
uint64_t outputs_written,
nir_ssa_def *(*outputs)[4]);
nir_ssa_def *
ac_nir_calc_io_offset(nir_builder *b,
nir_intrinsic_instr *intrin,