mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 05:40:13 +01:00
ac/nir: add ac_nir_export_position
Used by last VGT stage to export position related outputs. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>
This commit is contained in:
parent
e9268b2c06
commit
f823581dd4
2 changed files with 166 additions and 0 deletions
|
|
@ -56,6 +56,164 @@ ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim)
|
|||
.write_mask = write_mask);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_export_output(nir_builder *b, nir_ssa_def **output)
|
||||
{
|
||||
nir_ssa_def *vec[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (output[i])
|
||||
vec[i] = nir_u2uN(b, output[i], 32);
|
||||
else
|
||||
vec[i] = nir_ssa_undef(b, 1, 32);
|
||||
}
|
||||
|
||||
return nir_vec(b, vec, 4);
|
||||
}
|
||||
|
||||
void
|
||||
ac_nir_export_position(nir_builder *b,
|
||||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool no_param_export,
|
||||
uint64_t outputs_written,
|
||||
nir_ssa_def *(*outputs)[4])
|
||||
{
|
||||
nir_intrinsic_instr *exp[4];
|
||||
unsigned exp_num = 0;
|
||||
|
||||
nir_ssa_def *pos;
|
||||
if (outputs_written & VARYING_BIT_POS) {
|
||||
pos = get_export_output(b, outputs[VARYING_SLOT_POS]);
|
||||
} else {
|
||||
nir_ssa_def *zero = nir_imm_float(b, 0);
|
||||
nir_ssa_def *one = nir_imm_float(b, 1);
|
||||
pos = nir_vec4(b, zero, zero, zero, one);
|
||||
}
|
||||
|
||||
/* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
|
||||
* Setting valid_mask=1 prevents it and has no other effect.
|
||||
*/
|
||||
unsigned pos_flags = gfx_level == GFX10 ? AC_EXP_FLAG_VALID_MASK : 0;
|
||||
|
||||
exp[exp_num] = nir_export_amd(
|
||||
b, pos, .base = V_008DFC_SQ_EXP_POS + exp_num,
|
||||
.flags = pos_flags, .write_mask = 0xf);
|
||||
exp_num++;
|
||||
|
||||
uint64_t mask =
|
||||
VARYING_BIT_PSIZ |
|
||||
VARYING_BIT_EDGE |
|
||||
VARYING_BIT_LAYER |
|
||||
VARYING_BIT_VIEWPORT |
|
||||
VARYING_BIT_PRIMITIVE_SHADING_RATE;
|
||||
|
||||
/* clear output mask if no one written */
|
||||
if (!outputs[VARYING_SLOT_PSIZ][0])
|
||||
outputs_written &= ~VARYING_BIT_PSIZ;
|
||||
if (!outputs[VARYING_SLOT_EDGE][0])
|
||||
outputs_written &= ~VARYING_BIT_EDGE;
|
||||
if (!outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0])
|
||||
outputs_written &= ~VARYING_BIT_PRIMITIVE_SHADING_RATE;
|
||||
if (!outputs[VARYING_SLOT_LAYER][0])
|
||||
outputs_written &= ~VARYING_BIT_LAYER;
|
||||
if (!outputs[VARYING_SLOT_VIEWPORT][0])
|
||||
outputs_written &= ~VARYING_BIT_VIEWPORT;
|
||||
|
||||
if (outputs_written & mask) {
|
||||
nir_ssa_def *zero = nir_imm_float(b, 0);
|
||||
nir_ssa_def *vec[4] = { zero, zero, zero, zero };
|
||||
unsigned flags = 0;
|
||||
unsigned write_mask = 0;
|
||||
|
||||
if (outputs_written & VARYING_BIT_PSIZ) {
|
||||
vec[0] = outputs[VARYING_SLOT_PSIZ][0];
|
||||
write_mask |= BITFIELD_BIT(0);
|
||||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_EDGE) {
|
||||
vec[1] = nir_umin(b, outputs[VARYING_SLOT_EDGE][0], nir_imm_int(b, 1));
|
||||
write_mask |= BITFIELD_BIT(1);
|
||||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_PRIMITIVE_SHADING_RATE) {
|
||||
vec[1] = nir_ior(b, vec[1], outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]);
|
||||
write_mask |= BITFIELD_BIT(1);
|
||||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_LAYER) {
|
||||
vec[2] = outputs[VARYING_SLOT_LAYER][0];
|
||||
write_mask |= BITFIELD_BIT(2);
|
||||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_VIEWPORT) {
|
||||
if (gfx_level >= GFX9) {
|
||||
/* GFX9 has the layer in [10:0] and the viewport index in [19:16]. */
|
||||
nir_ssa_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16);
|
||||
vec[2] = nir_ior(b, vec[2], v);
|
||||
write_mask |= BITFIELD_BIT(2);
|
||||
} else {
|
||||
vec[3] = outputs[VARYING_SLOT_VIEWPORT][0];
|
||||
write_mask |= BITFIELD_BIT(3);
|
||||
}
|
||||
}
|
||||
|
||||
exp[exp_num] = nir_export_amd(
|
||||
b, nir_vec(b, vec, 4),
|
||||
.base = V_008DFC_SQ_EXP_POS + exp_num,
|
||||
.flags = flags,
|
||||
.write_mask = write_mask);
|
||||
exp_num++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if ((outputs_written & (VARYING_BIT_CLIP_DIST0 << i)) &&
|
||||
(clip_cull_mask & BITFIELD_RANGE(i * 4, 4))) {
|
||||
exp[exp_num] = nir_export_amd(
|
||||
b, get_export_output(b, outputs[VARYING_SLOT_CLIP_DIST0 + i]),
|
||||
.base = V_008DFC_SQ_EXP_POS + exp_num,
|
||||
.write_mask = (clip_cull_mask >> (i * 4)) & 0xf);
|
||||
exp_num++;
|
||||
}
|
||||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_CLIP_VERTEX) {
|
||||
nir_ssa_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]);
|
||||
|
||||
/* Clip distance for clip vertex to each user clip plane. */
|
||||
nir_ssa_def *clip_dist[8] = {0};
|
||||
u_foreach_bit (i, clip_cull_mask) {
|
||||
nir_ssa_def *ucp = nir_load_user_clip_plane(b, .ucp_id = i);
|
||||
clip_dist[i] = nir_fdot4(b, vtx, ucp);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (clip_cull_mask & BITFIELD_RANGE(i * 4, 4)) {
|
||||
exp[exp_num] = nir_export_amd(
|
||||
b, get_export_output(b, clip_dist + i * 4),
|
||||
.base = V_008DFC_SQ_EXP_POS + exp_num,
|
||||
.write_mask = (clip_cull_mask >> (i * 4)) & 0xf);
|
||||
exp_num++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Specify that this is the last export */
|
||||
nir_intrinsic_instr *final_exp = exp[exp_num - 1];
|
||||
unsigned final_exp_flags = nir_intrinsic_flags(final_exp);
|
||||
nir_intrinsic_set_flags(final_exp, final_exp_flags | AC_EXP_FLAG_DONE);
|
||||
|
||||
/* If a shader has no param exports, rasterization can start before
|
||||
* the shader finishes and thus memory stores might not finish before
|
||||
* the pixel shader starts.
|
||||
*/
|
||||
if (gfx_level >= GFX10 && no_param_export && b->shader->info.writes_memory) {
|
||||
nir_intrinsic_instr *wait_instr =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_memory_barrier_buffer);
|
||||
|
||||
nir_instr_insert_before(&final_exp->instr, &wait_instr->instr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function takes an I/O intrinsic like load/store_input,
|
||||
* and emits a sequence that calculates the full offset of that instruction,
|
||||
|
|
|
|||
|
|
@ -76,6 +76,14 @@ ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct a
|
|||
void
|
||||
ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim);
|
||||
|
||||
void
|
||||
ac_nir_export_position(nir_builder *b,
|
||||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool no_param_export,
|
||||
uint64_t outputs_written,
|
||||
nir_ssa_def *(*outputs)[4]);
|
||||
|
||||
nir_ssa_def *
|
||||
ac_nir_calc_io_offset(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue