ac/nir/ngg: Remove dead code for attribute ring stores.

These are replaced by the new helpers added in previous commits.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32640>
This commit is contained in:
Timur Kristóf 2024-12-20 12:40:25 -06:00
parent f528de896e
commit 9acc2f2435

View file

@ -2499,122 +2499,6 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
}
}
static unsigned
gather_vs_outputs(nir_builder *b, vs_output *outputs,
const uint8_t *param_offsets,
nir_def *(*data)[4],
nir_def *(*data_16bit_lo)[4],
nir_def *(*data_16bit_hi)[4])
{
unsigned num_outputs = 0;
u_foreach_bit64 (slot, b->shader->info.outputs_written) {
if (param_offsets[slot] > AC_EXP_PARAM_OFFSET_31)
continue;
nir_def **output = data[slot];
/* skip output if no one written before */
if (!output[0] && !output[1] && !output[2] && !output[3])
continue;
outputs[num_outputs].slot = slot;
for (int i = 0; i < 4; i++) {
outputs[num_outputs].chan[i] = output[i];
}
num_outputs++;
}
u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
unsigned slot = VARYING_SLOT_VAR0_16BIT + i;
if (param_offsets[slot] > AC_EXP_PARAM_OFFSET_31)
continue;
nir_def **output_lo = data_16bit_lo[i];
nir_def **output_hi = data_16bit_hi[i];
/* skip output if no one written before */
if (!output_lo[0] && !output_lo[1] && !output_lo[2] && !output_lo[3] &&
!output_hi[0] && !output_hi[1] && !output_hi[2] && !output_hi[3])
continue;
vs_output *output = &outputs[num_outputs++];
output->slot = slot;
nir_def *undef = nir_undef(b, 1, 16);
for (int j = 0; j < 4; j++) {
nir_def *lo = output_lo[j] ? output_lo[j] : undef;
nir_def *hi = output_hi[j] ? output_hi[j] : undef;
if (output_lo[j] || output_hi[j])
output->chan[j] = nir_pack_32_2x16_split(b, lo, hi);
else
output->chan[j] = NULL;
}
}
return num_outputs;
}
static void
create_vertex_param_phis(nir_builder *b, unsigned num_outputs, vs_output *outputs)
{
nir_def *undef = nir_undef(b, 1, 32); /* inserted at the start of the shader */
for (unsigned i = 0; i < num_outputs; i++) {
for (unsigned j = 0; j < 4; j++) {
if (outputs[i].chan[j])
outputs[i].chan[j] = nir_if_phi(b, outputs[i].chan[j], undef);
}
}
}
static void
export_vertex_params_gfx11(nir_builder *b, nir_def *export_tid, nir_def *num_export_threads,
unsigned num_outputs, vs_output *outputs,
const uint8_t *vs_output_param_offset)
{
nir_def *attr_rsrc = nir_load_ring_attr_amd(b);
/* We should always store full vec4s in groups of 8 lanes for the best performance even if
* some of them are garbage or have unused components, so align the number of export threads
* to 8.
*/
num_export_threads = nir_iand_imm(b, nir_iadd_imm(b, num_export_threads, 7), ~7);
if (!export_tid)
nir_push_if(b, nir_is_subgroup_invocation_lt_amd(b, num_export_threads));
else
nir_push_if(b, nir_ult(b, export_tid, num_export_threads));
nir_def *attr_offset = nir_load_ring_attr_offset_amd(b);
nir_def *vindex = nir_load_local_invocation_index(b);
nir_def *voffset = nir_imm_int(b, 0);
nir_def *undef = nir_undef(b, 1, 32);
uint32_t exported_params = 0;
for (unsigned i = 0; i < num_outputs; i++) {
gl_varying_slot slot = outputs[i].slot;
unsigned offset = vs_output_param_offset[slot];
/* Since vs_output_param_offset[] can map multiple varying slots to
* the same param export index (that's radeonsi-specific behavior),
* we need to do this so as not to emit duplicated exports.
*/
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_def *comp[4];
for (unsigned j = 0; j < 4; j++)
comp[j] = outputs[i].chan[j] ? outputs[i].chan[j] : undef;
nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, attr_offset, vindex,
.base = offset * 16,
.memory_modes = nir_var_shader_out,
.access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD);
exported_params |= BITFIELD_BIT(offset);
}
nir_pop_if(b, NULL);
}
static void
create_output_phis(nir_builder *b, const uint64_t outputs_written, const uint64_t outputs_written_16bit, ac_nir_prerast_out *out)
{