r600/sfn: Lower all GS indirect input loads after lowering IO

Extend the lowering of indirect GS array inputs to include
all indirect access. This replaces
   r600_gs_load_deref_io_to_indirect_per_vertex_input
which was restricted to vec4 POS inputs. The
new lowering pass works in all vayings and all types, so we
can also drop the call to nir_lower_indirect_derefs.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36488>
This commit is contained in:
Gert Wollny 2025-07-31 17:17:15 +02:00 committed by Marge Bot
parent 356bc41797
commit eccd178f0b

View file

@ -282,16 +282,33 @@ class LowerGSArrayInput : public NirLowerInstruction {
return false;
auto intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_per_vertex_input)
return false;
return nir_intrinsic_io_semantics(intr).num_slots != 1;
return intr->intrinsic == nir_intrinsic_load_per_vertex_input;
}
nir_def *lower(nir_instr *instr) override
{
auto intr = nir_instr_as_intrinsic(instr);
auto vbase = nir_r600_indirect_vertex_at_index(b, 32, intr->src[0].ssa);
bool const_vertex_index = nir_src_as_const_value(intr->src[0]) != NULL;
if (!const_vertex_index && !m_base_array) {
auto nvtx = b->shader->info.gs.vertices_in;
m_base_array = nir_decl_reg(b, 1, 32, nvtx);
for (int i = 0; i < nvtx; ++i) {
auto idx = nir_imm_int(b, i);
auto base = nir_r600_indirect_vertex_at_index(b, 32, idx);
nir_store_reg_indirect(b, base, m_base_array, idx);
}
}
/* If the input indices were stored in the array for indirect access
* always use this to reduce the live range of the registers that are
* initialized with these values before the shader is run
*/
auto vbase = (const_vertex_index && !m_base_array)
? nir_r600_indirect_vertex_at_index(b, 32, intr->src[0].ssa)
: nir_load_reg_indirect(b, 1, 32, m_base_array, intr->src[0].ssa);
;
auto new_addr =
nir_iadd(b, vbase, nir_ishl(b, intr->src[1].ssa, nir_imm_int(b, 2)));
auto io_semantics = nir_intrinsic_io_semantics(intr);
@ -304,6 +321,7 @@ class LowerGSArrayInput : public NirLowerInstruction {
.range = nir_intrinsic_range(intr),
.io_semantics = io_semantics);
}
nir_def *m_base_array{nullptr};
};
} // namespace r600
@ -671,169 +689,11 @@ r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
}
}
struct indirect_per_vertex {
nir_deref_instr *array_indirect_deref;
uint32_t mask;
nir_instr *saved_for_removal[R600_GS_VERTEX_INDIRECT_TOTAL][4];
unsigned obsolete_deref_count;
nir_instr *obsolete_deref[32];
};
static bool
r600_nir_gs_load_deref_io_to_indirect_per_vertex_input(nir_builder *b,
nir_intrinsic_instr *intrin,
void *cb_data)
{
struct indirect_per_vertex *indirect_per_vertex =
(struct indirect_per_vertex *)cb_data;
unsigned j;
if (intrin->intrinsic != nir_intrinsic_load_deref)
return false;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (!nir_deref_mode_is_one_of(deref, nir_var_shader_in))
return false;
nir_variable *var = nir_deref_instr_get_variable(deref);
const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
if (!is_arrayed || var->data.location != VARYING_SLOT_POS)
return false;
nir_def *array_index = deref->arr.index.ssa;
if (!array_index)
return false;
assert(intrin->def.num_components == 4);
nir_deref_instr *original_array = nir_instr_as_deref(
nir_instr_as_deref(intrin->src[0].ssa->parent_instr)->parent.ssa->parent_instr);
if (!original_array || original_array->deref_type != nir_deref_type_var ||
!glsl_type_is_array(original_array->type))
return false;
auto element_type = glsl_without_array(original_array->type);
if (element_type != &glsl_type_builtin_vec4)
return false;
const unsigned array_length = glsl_get_length(original_array->type);
assert(array_length <= R600_GS_VERTEX_INDIRECT_TOTAL);
for (j = 0; j < indirect_per_vertex->obsolete_deref_count &&
j < ARRAY_SIZE(indirect_per_vertex->obsolete_deref);
j++)
if (intrin->src[0].ssa->parent_instr == indirect_per_vertex->obsolete_deref[j])
break;
if (j == indirect_per_vertex->obsolete_deref_count &&
j != ARRAY_SIZE(indirect_per_vertex->obsolete_deref)) {
indirect_per_vertex->obsolete_deref[j] = intrin->src[0].ssa->parent_instr;
indirect_per_vertex->obsolete_deref_count++;
}
/* The next block generates a global array which is required
* for the indirect access. This array is located at the
* beginning. All the possible elements are generated. At the
* end, the elements which are not referenced are removed. */
if (!indirect_per_vertex->array_indirect_deref) {
static const char array_indirect_name[] = "r600_indirect_vertex_at_index";
b->cursor = nir_before_block(nir_start_block(b->impl));
nir_variable *array_indirect_var = nir_local_variable_create(
b->impl,
glsl_array_type(glsl_int_type(), R600_GS_VERTEX_INDIRECT_TOTAL, 0),
array_indirect_name);
indirect_per_vertex->array_indirect_deref =
nir_build_deref_var(b, array_indirect_var);
for (unsigned k = 0; k < R600_GS_VERTEX_INDIRECT_TOTAL; k++) {
nir_def *build_count = nir_imm_int(b, k);
nir_deref_instr *build_array =
nir_build_deref_array(b,
indirect_per_vertex->array_indirect_deref,
build_count);
nir_def *build_store =
nir_r600_indirect_vertex_at_index(b, intrin->def.bit_size, build_count);
nir_store_deref(b, build_array, build_store, 1);
indirect_per_vertex->saved_for_removal[k][0] = build_count->parent_instr;
indirect_per_vertex->saved_for_removal[k][1] = &build_array->instr;
indirect_per_vertex->saved_for_removal[k][2] = build_store->parent_instr;
indirect_per_vertex->saved_for_removal[k][3] =
nir_instr_next(build_store->parent_instr); // nir_store_deref
}
}
b->cursor = nir_before_instr(&intrin->instr);
for (unsigned k = 0; k < array_length; k++)
indirect_per_vertex->mask |= (1 << k);
nir_def *zero = nir_imm_int(b, 0);
nir_def *array_indirect_def = nir_load_deref(
b,
nir_build_deref_array(b, indirect_per_vertex->array_indirect_deref, array_index));
nir_def *load = nir_load_r600_indirect_per_vertex_input(b,
intrin->def.num_components,
intrin->def.bit_size,
array_indirect_def,
zero);
nir_intrinsic_set_base(nir_instr_as_intrinsic(load->parent_instr),
var->data.driver_location);
nir_def_rewrite_uses(&intrin->def, load);
nir_instr_remove(&intrin->instr);
return true;
}
static bool
r600_gs_load_deref_io_to_indirect_per_vertex_input(nir_shader *shader)
{
struct indirect_per_vertex indirect_per_vertex = {nullptr};
bool ret =
nir_shader_intrinsics_pass(shader,
r600_nir_gs_load_deref_io_to_indirect_per_vertex_input,
nir_metadata_control_flow,
&indirect_per_vertex);
if (indirect_per_vertex.array_indirect_deref) {
for (unsigned k = 0; k < R600_GS_VERTEX_INDIRECT_TOTAL; k++)
if ((indirect_per_vertex.mask & (1 << k)) == 0) {
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][3]);
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][2]);
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][1]);
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][0]);
}
for (unsigned k = 0; k < indirect_per_vertex.obsolete_deref_count; k++)
nir_instr_remove(indirect_per_vertex.obsolete_deref[k]);
}
return ret;
}
void
r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level)
{
const int nir_lower_flrp_mask = 16 | 32 | 64;
if (nir->info.stage == MESA_SHADER_GEOMETRY) {
NIR_PASS(_, nir, r600_gs_load_deref_io_to_indirect_per_vertex_input);
NIR_PASS(_,
nir,
nir_lower_indirect_derefs,
nir_var_shader_in,
R600_GS_VERTEX_INDIRECT_TOTAL);
}
NIR_PASS(_, nir, nir_lower_flrp, nir_lower_flrp_mask, false);
nir_lower_idiv_options idiv_options = {0};